Skip to content
Snippets Groups Projects
Commit eb056826 authored by Loic Hausammann's avatar Loic Hausammann
Browse files

Merge branch 'hashmap' into 'master'

Implement hashmap for generation of index

See merge request !15
parents 7941d887 c364e2aa
Branches
Tags
1 merge request!15Implement hashmap for generation of index
......@@ -38,13 +38,14 @@ GRAVITY_SRC = gravity/MultiSoftening/csds_gravity.c
# List required headers
include_HEADERS = csds_header.h csds_loader_io.h csds_particle.h csds_time.h csds_tools.h
include_HEADERS += csds_reader.h csds_logfile.h csds_index.h quick_sort.h csds_python_tools.h
include_HEADERS += csds_reader.h csds_logfile.h csds_index.h quick_sort.h csds_python_tools.h
include_HEADERS += csds_interpolation.h csds_parameters.h csds_cosmology.h csds_fields.h
include_HEADERS += csds_hashmap.h
# Common source files
AM_SOURCES = csds_header.c csds_loader_io.c csds_particle.c csds_time.c csds_tools.c csds_reader.c
AM_SOURCES += csds_logfile.c csds_index.c quick_sort.c csds_parameters.c csds_reader_generate_index.c
AM_SOURCES += csds_cosmology.c csds_fields.c
AM_SOURCES += csds_cosmology.c csds_fields.c csds_hashmap.c
if HAVEPYTHON
AM_SOURCES += csds_python_wrapper.c
......
// Copyright 2020 Joshua J Baker. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
#include <stdio.h>
#include <string.h>
/* CSDS headers */
#include "csds_hashmap.h"
/* SWIFT headers */
#include "error.h"
/**
* @brief returns a new hash map.
*
* The csds_hashmap must be freed with csds_hashmap_free().
*
* @param cap The default lower capacity of the csds_hashmap. Setting this to
* zero will default to 16.
*/
struct csds_hashmap *csds_hashmap_new(size_t cap) {
/* Get the size as a power of 2 */
size_t ncap = 16;
if (cap < ncap) {
cap = ncap;
} else {
while (ncap < cap) {
ncap *= 2;
}
cap = ncap;
}
/* A bucket is composed of the bucket + the data */
size_t bucketsz = sizeof(struct bucket) + sizeof(struct index_data);
while (bucketsz & (sizeof(uintptr_t) - 1)) {
bucketsz++;
}
/* We allocate some extra space for temporary data */
size_t size = sizeof(struct csds_hashmap) + 2 * bucketsz;
struct csds_hashmap *map = malloc(size);
if (!map) {
return NULL;
}
/* Set all the attributes */
memset(map, 0, sizeof(struct csds_hashmap));
map->bucketsz = bucketsz;
map->spare = ((char *)map) + sizeof(struct csds_hashmap);
map->edata = (char *)map->spare + bucketsz;
map->cap = cap;
map->nbuckets = cap;
map->mask = map->nbuckets - 1;
/* Allocate the array of buckets */
map->buckets = malloc(map->bucketsz * map->nbuckets);
if (!map->buckets) {
free(map);
return NULL;
}
/* Ensures that all the buckets are empty */
memset(map->buckets, 0, map->bucketsz * map->nbuckets);
/* Set the limits for the size */
map->growat = map->nbuckets / hashmap_overallocation;
map->shrinkat = map->nbuckets * 0.10;
return map;
}
/**
* @brief quickly clears the map.
* When the update_cap is provided, the map's capacity will be updated to match
* the currently number of allocated buckets. This is an optimization to ensure
* that this operation does not perform any allocations.
*/
void csds_hashmap_clear(struct csds_hashmap *map, int update_cap) {
map->count = 0;
if (update_cap) {
map->cap = map->nbuckets;
} else if (map->nbuckets != map->cap) {
void *new_buckets = malloc(map->bucketsz * map->cap);
if (new_buckets) {
free(map->buckets);
map->buckets = new_buckets;
}
map->nbuckets = map->cap;
}
memset(map->buckets, 0, map->bucketsz * map->nbuckets);
map->mask = map->nbuckets - 1;
map->growat = map->nbuckets / hashmap_overallocation;
map->shrinkat = map->nbuckets * 0.10;
}
/**
* @brief Resize the hash table.
*/
static int resize(struct csds_hashmap *map, size_t new_cap) {
/* Allocate a new hashmap */
struct csds_hashmap *map2 = csds_hashmap_new(new_cap);
if (!map2) {
return 0;
}
/* Copy the buckets */
for (size_t i = 0; i < map->nbuckets; i++) {
struct bucket *entry = bucket_at(map, i);
/* Skip empty buckets */
if (!entry->dib) {
continue;
}
entry->dib = 1;
size_t j = entry->hash & map2->mask;
/* Copy the bucket into map2 */
while (1) {
struct bucket *bucket = bucket_at(map2, j);
/* Empty bucket in map2 */
if (bucket->dib == 0) {
memcpy(bucket, entry, map->bucketsz);
break;
}
/* Bucket not empty shift the previous data */
if (bucket->dib < entry->dib) {
memcpy(map2->spare, bucket, map->bucketsz);
memcpy(bucket, entry, map->bucketsz);
memcpy(entry, map2->spare, map->bucketsz);
}
j = (j + 1) & map2->mask;
entry->dib += 1;
}
}
/* First cleanup */
free(map->buckets);
/* Copy the data */
map->buckets = map2->buckets;
map->nbuckets = map2->nbuckets;
map->mask = map2->mask;
map->growat = map2->growat;
map->shrinkat = map2->shrinkat;
/* Cleanup */
free(map2);
return 1;
}
/**
* @brief inserts or replaces an item in the csds_hash map.
* If an item is
* replaced then it is returned otherwise NULL is returned. This operation
* may allocate memory.
*/
void *csds_hashmap_set(struct csds_hashmap *map, struct index_data *item) {
if (!item) {
error("item is null");
}
/* Increase the size of the map if needed */
if (map->count == map->growat) {
if (!resize(map, map->nbuckets * 2)) {
error("Failed to reallocate memory");
}
}
/* Create a bucket from the item */
struct bucket *entry = map->edata;
entry->hash = get_hash(map, item->id);
entry->dib = 1;
memcpy(bucket_item(entry), item, sizeof(struct index_data));
/* Place the new bucket into the array */
size_t i = entry->hash & map->mask;
while (1) {
struct bucket *bucket = bucket_at(map, i);
/* Empty bucket */
if (bucket->dib == 0) {
memcpy(bucket, entry, map->bucketsz);
map->count++;
return NULL;
}
/* Replace bucket */
if (entry->hash == bucket->hash &&
bucket_item(entry)->id == bucket_item(bucket)->id) {
memcpy(map->spare, bucket_item(bucket), sizeof(struct index_data));
memcpy(bucket_item(bucket), bucket_item(entry),
sizeof(struct index_data));
return map->spare;
}
/* Move current element to next available bucket */
if (bucket->dib < entry->dib) {
memcpy(map->spare, bucket, map->bucketsz);
memcpy(bucket, entry, map->bucketsz);
memcpy(entry, map->spare, map->bucketsz);
}
i = (i + 1) & map->mask;
entry->dib += 1;
}
}
/**
* @brief returns the item based on the provided key. If the item is not
* found then NULL is returned.
*/
struct index_data *csds_hashmap_get(struct csds_hashmap *map, id_type key) {
uint64_t hash = get_hash(map, key);
size_t i = hash & map->mask;
while (1) {
struct bucket *bucket = bucket_at(map, i);
/* Empty bucket, thus key not found */
if (!bucket->dib) {
return NULL;
}
/* Key match */
if (bucket->hash == hash && key == bucket_item(bucket)->id) {
return bucket_item(bucket);
}
/* Update current bucket index */
i = (i + 1) & map->mask;
}
}
/**
* @brief removes an item from the hash map and returns it. If the
* item is not found then NULL is returned.
*/
void *csds_hashmap_delete(struct csds_hashmap *map, id_type key) {
uint64_t hash = get_hash(map, key);
size_t i = hash & map->mask;
while (1) {
struct bucket *bucket = bucket_at(map, i);
/* Not found */
if (!bucket->dib) {
return NULL;
}
/* Did we get the correct particle? */
if (bucket->hash == hash && key == bucket_item(bucket)->id) {
memcpy(map->spare, bucket_item(bucket), sizeof(struct index_data));
bucket->dib = 0;
/* Shift the elements with the same hash */
while (1) {
struct bucket *prev = bucket;
i = (i + 1) & map->mask;
bucket = bucket_at(map, i);
if (bucket->dib <= 1) {
prev->dib = 0;
break;
}
memcpy(prev, bucket, map->bucketsz);
prev->dib--;
}
map->count--;
if (map->nbuckets > map->cap && map->count <= map->shrinkat) {
// Ignore the return value. It's ok for the resize operation to
// fail to allocate enough memory because a shrink operation
// does not change the integrity of the data.
resize(map, map->nbuckets / 2);
}
return map->spare;
}
/* Move to the next element with the same hash */
i = (i + 1) & map->mask;
}
}
/**
* @brief returns the number of items in the hash map.
*/
size_t csds_hashmap_count(struct csds_hashmap *map) { return map->count; }
/**
* @brief frees the hash map
*/
void csds_hashmap_free(struct csds_hashmap *map) {
if (!map) return;
free(map->buckets);
free(map);
}
/**
* @brief Writes the hash map
*
* @param map The hashmap.
* @param f The file to use.
*/
void csds_hashmap_write(struct csds_hashmap *map, FILE *f) {
size_t count = 0;
/* Loop over all the buckets */
for (size_t i = 0; i < map->nbuckets; i++) {
struct bucket *bucket = bucket_at(map, i);
/* Do only the non-empty buckets */
if (bucket->dib) {
count += 1;
fwrite(bucket_item(bucket), sizeof(struct index_data), 1, f);
}
}
/* Ensure that the correct number of elements
* have been written. */
if (count != map->count) {
error("Written a wrong number of elements.");
}
}
/**
* @brief Try to get item in a given bucket.
* Returns NULL if it does not exist.
*
* @param map The hashmap.
* @param i The request bucket
*/
struct index_data *csds_hashmap_get_from_position(struct csds_hashmap *map,
size_t i) {
struct bucket *bucket = bucket_at(map, i);
if (bucket->dib) {
return bucket_item(bucket);
} else {
return NULL;
}
}
/**
* @brief Returns the number of buckets.
*/
size_t csds_hashmap_get_number_buckets(struct csds_hashmap *map) {
return map->nbuckets;
}
/*
* The file was obtained from https://github.com/tidwall/hashmap.c
* and was slightly adapted.
*/
// Copyright 2020 Joshua J Baker. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
#ifndef CSDS_HASHMAP_H
#define CSDS_HASHMAP_H
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
/* SWIFT headers */
#include "inline.h"
#define id_type int64_t
#define hashmap_overallocation 4. / 3.
#define hashmap_seed0 1513
#define hashmap_seed1 7654
/**
* @brief Data structure contained in the csds files.
*/
struct index_data {
/* Id of the particle. */
id_type id;
/* Offset of the particle in the file. */
uint64_t offset;
};
/**
* hashmap is an open addressed hash map using robinhood hashing.
*/
struct csds_hashmap {
/* The capacity of the hashmap. */
size_t cap;
/* The size of each bucket. */
size_t bucketsz;
/* The number of buckets. */
size_t nbuckets;
/* The number of items. */
size_t count;
/* A mask to restrict the hash within nbuckets. */
size_t mask;
/* The size at which the hashmap increases its size. */
size_t growat;
/* The size at which the hashmap decreases its size. */
size_t shrinkat;
/* The array of buckets. */
void *buckets;
/* A spare buckets for some operations */
void *spare;
/* A second spare buckets for setting an item. */
void *edata;
};
struct bucket {
/* The hash value of the bucket */
uint64_t hash : 48;
/* Number of elements with this hash value */
uint64_t dib : 16;
};
struct csds_hashmap *csds_hashmap_new(size_t cap);
void csds_hashmap_free(struct csds_hashmap *map);
void csds_hashmap_clear(struct csds_hashmap *map, int update_cap);
size_t csds_hashmap_count(struct csds_hashmap *map);
struct index_data *csds_hashmap_get(struct csds_hashmap *map, id_type key);
void *csds_hashmap_set(struct csds_hashmap *map, struct index_data *item);
void *csds_hashmap_delete(struct csds_hashmap *map, id_type key);
void csds_hashmap_write(struct csds_hashmap *map, FILE *f);
size_t csds_hashmap_get_number_buckets(struct csds_hashmap *map);
struct index_data *csds_hashmap_get_from_position(struct csds_hashmap *map,
size_t i);
__attribute__((always_inline)) INLINE static struct bucket *bucket_at(
struct csds_hashmap *map, size_t index) {
return (struct bucket *)(((char *)map->buckets) + (map->bucketsz * index));
}
__attribute__((always_inline)) INLINE static struct index_data *bucket_item(
struct bucket *entry) {
char *out = ((char *)entry) + sizeof(struct bucket);
return (struct index_data *)out;
}
//-----------------------------------------------------------------------------
// SipHash reference C implementation
//
// Copyright (c) 2012-2016 Jean-Philippe Aumasson
// <jeanphilippe.aumasson@gmail.com>
// Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
//
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication along
// with this software. If not, see
// <http://creativecommons.org/publicdomain/zero/1.0/>.
//
// default: SipHash-2-4
//-----------------------------------------------------------------------------
static uint64_t SIP64(const id_type *key) {
const uint8_t *in = (uint8_t *)key;
uint64_t seed0 = hashmap_seed0;
uint64_t seed1 = hashmap_seed1;
const int inlen = sizeof(id_type);
#define U8TO64_LE(p) \
{ \
(((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) \
}
#define U64TO8_LE(p, v) \
{ \
U32TO8_LE((p), (uint32_t)((v))); \
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); \
}
#define U32TO8_LE(p, v) \
{ \
(p)[0] = (uint8_t)((v)); \
(p)[1] = (uint8_t)((v) >> 8); \
(p)[2] = (uint8_t)((v) >> 16); \
(p)[3] = (uint8_t)((v) >> 24); \
}
#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
#define SIPROUND \
{ \
v0 += v1; \
v1 = ROTL(v1, 13); \
v1 ^= v0; \
v0 = ROTL(v0, 32); \
v2 += v3; \
v3 = ROTL(v3, 16); \
v3 ^= v2; \
v0 += v3; \
v3 = ROTL(v3, 21); \
v3 ^= v0; \
v2 += v1; \
v1 = ROTL(v1, 17); \
v1 ^= v2; \
v2 = ROTL(v2, 32); \
}
uint64_t k0 = U8TO64_LE((uint8_t *)&seed0);
uint64_t k1 = U8TO64_LE((uint8_t *)&seed1);
uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1;
uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0;
uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1;
uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0;
const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
for (; in != end; in += 8) {
uint64_t m = U8TO64_LE(in);
v3 ^= m;
SIPROUND;
SIPROUND;
v0 ^= m;
}
const int left = inlen & 7;
uint64_t b = ((uint64_t)inlen) << 56;
switch (left) {
case 7:
b |= ((uint64_t)in[6]) << 48;
break;
case 6:
b |= ((uint64_t)in[5]) << 40;
break;
case 5:
b |= ((uint64_t)in[4]) << 32;
break;
case 4:
b |= ((uint64_t)in[3]) << 24;
break;
case 3:
b |= ((uint64_t)in[2]) << 16;
break;
case 2:
b |= ((uint64_t)in[1]) << 8;
break;
case 1:
b |= ((uint64_t)in[0]);
break;
case 0:
break;
}
v3 ^= b;
SIPROUND;
SIPROUND;
v0 ^= b;
v2 ^= 0xff;
SIPROUND;
SIPROUND;
SIPROUND;
SIPROUND;
b = v0 ^ v1 ^ v2 ^ v3;
uint64_t out = 0;
U64TO8_LE((uint8_t *)&out, b);
return out;
}
// hashmap_sip returns a hash value for `data` using SipHash-2-4.
__attribute__((always_inline)) INLINE static uint64_t get_hash(
struct csds_hashmap *map, id_type x) {
return SIP64(&x) & ((1LU << 48) - 1);
}
#endif
......@@ -20,23 +20,13 @@
#ifndef CSDS_CSDS_INDEX_H
#define CSDS_CSDS_INDEX_H
#include "csds_hashmap.h"
#include "csds_loader_io.h"
#include "csds_tools.h"
/* predefine the structure */
struct csds_reader;
/**
* @brief Data structure contained in the csds files.
*/
struct index_data {
/* Id of the particle. */
int64_t id;
/* Offset of the particle in the file. */
uint64_t offset;
};
/**
* @brief Structure dealing with the index files.
*
......
......@@ -21,13 +21,10 @@
#include "csds_reader.h"
/* Include local headers */
#include "csds_hashmap.h"
#include "csds_index.h"
#include "csds_logfile.h"
/* This value of offset is used to tag
the particles as being removed */
#define PARTICLE_REMOVED 0
/**
* @brief Structure that contains all the information
* required to write an index file for a single particle type.
......@@ -160,90 +157,6 @@ void index_writer_log(struct index_writer *writer, const int64_t id,
/* Increase the element counter */
writer->size += 1;
// TODO sort
}
/**
* @brief Cleanup the array from the tagged particles if needed.
*
* @param writer The #index_writer.
* @param force Force the removal?
*/
void index_writer_remove_tagged_particles(struct index_writer *writer,
const int force) {
/* No need to clean the empty arrays. */
if (writer->size == 0) {
return;
}
/* Do we need to clean? */
const float frac = (float)writer->number_tag / (float)writer->size;
if (!force && frac < writer->max_frac_tag) return;
/* Clean */
size_t count = 0;
for (size_t i = 0; i < writer->size; i++) {
if (writer->data[i].offset != PARTICLE_REMOVED) continue;
count += 1;
/* Now replace it with the last particle. */
writer->data[i] = writer->data[writer->size - 1];
writer->size--;
/* We need to ensure that the last particle was not
* flagged */
i--;
}
/* Check that we removed all the particles. */
if (count != writer->number_tag) {
error("A tagged particle is missing.");
}
// TODO sort the particles
#ifdef SWIFT_DEBUG_CHECKS
for (size_t i = 0; i < writer->size; i++) {
if (writer->data[i].offset == PARTICLE_REMOVED)
error("Found a tagged particle after cleaning.");
}
#endif
/* Reset the counter */
writer->number_tag = 0;
}
/**
* @brief Remove the particles contained in parts_removed from current_state.
*
* @param current_state The #index_writer where we remove the particle
* @param part_id The ID of the particle.
*/
void index_writer_remove_part(struct index_writer *current_state,
int64_t part_id) {
// TODO use a binary search + ensure ids are sorted
// Do not forget to write in the index file that the files are sorted
for (size_t j = 0; j < current_state->size; j++) {
if (current_state->data[j].offset == PARTICLE_REMOVED ||
part_id != current_state->data[j].id) {
continue;
}
/* We have the particle, now tag it.
* In order to use the binary search, we cannot afford
* to swap the particle with the last one. */
current_state->data[j].offset = PARTICLE_REMOVED;
current_state->number_tag++;
/* Trigger a cleaning if needed */
index_writer_remove_tagged_particles(current_state, /* force */ 0);
return;
}
error("Trying to remove an unknown particle.");
}
/**
......@@ -294,7 +207,7 @@ void index_writer_write_in_index(const struct index_writer *writers, FILE *f) {
* @param file_number The current file number.
*/
void csds_reader_write_index(const struct csds_reader *reader,
struct index_writer *current_state,
struct csds_hashmap **current_state,
struct index_writer *parts_created,
struct index_writer *parts_removed,
const struct time_record *time, int file_number) {
......@@ -303,14 +216,11 @@ void csds_reader_write_index(const struct csds_reader *reader,
char filename[STRING_SIZE + 15];
sprintf(filename, "%s_%04i.index", reader->basename, file_number);
/* Trigger a cleaning of the arrays */
// TODO Remove this and skip the particles when writing?
for (int i = 0; i < swift_type_count; i++) {
index_writer_remove_tagged_particles(&current_state[i], /* force */ 1);
}
/* Check that we have only implemented particles */
index_writer_check_implemented(current_state);
if (csds_hashmap_count(current_state[swift_type_sink]) != 0 ||
csds_hashmap_count(current_state[swift_type_black_hole]) != 0 ||
csds_hashmap_count(current_state[swift_type_neutrino]) != 0)
error("Not implemented");
index_writer_check_implemented(parts_created);
index_writer_check_implemented(parts_removed);
......@@ -330,7 +240,7 @@ void csds_reader_write_index(const struct csds_reader *reader,
/* Write number of particles */
uint64_t N_total[swift_type_count];
for (int type = 0; type < swift_type_count; type++) {
N_total[type] = current_state[type].size;
N_total[type] = csds_hashmap_count(current_state[type]);
}
fwrite(N_total, sizeof(uint64_t), swift_type_count, f);
......@@ -348,12 +258,12 @@ void csds_reader_write_index(const struct csds_reader *reader,
fwrite(&tmp, d_align, 1, f);
}
/* Write the arrays */
/* Write the current state */
for (int type = 0; type < swift_type_count; type++) {
if (N_total[type] == 0) continue;
fwrite(current_state[type].data, sizeof(struct index_data),
current_state[type].size, f);
// TODO memory map the file
csds_hashmap_write(current_state[type], f);
}
/* Now do the same with the particles created / removed */
......@@ -383,7 +293,7 @@ void csds_reader_write_index(const struct csds_reader *reader,
* (the first record that does not correspond to the IC).
*/
size_t csds_reader_get_initial_state(const struct csds_reader *reader,
struct index_writer *current_state,
struct csds_hashmap **current_state,
struct time_record *time_record) {
/* Get a few variables. */
......@@ -443,7 +353,11 @@ size_t csds_reader_get_initial_state(const struct csds_reader *reader,
/* derivative */ 0, &mask, &prev_offset);
/* Log the particle */
index_writer_log(&current_state[part_type], id, offset);
struct index_data item = {id, offset};
void *p = (void *)csds_hashmap_set(current_state[part_type], &item);
if (p != NULL) {
error("Already found a particle with the same ID");
}
/* Increment the offset */
const int record_size = header_get_record_size_from_mask(h, mask);
......@@ -475,6 +389,9 @@ struct update_particle_data {
/* Time when starting the update. */
int init_time;
/* The hashmap to udpate */
struct csds_hashmap *current_state;
};
/**
......@@ -492,18 +409,24 @@ void csds_reader_update_particles_to_next_index_mapper(void *map_data,
void *extra_data) {
/* Get a few pointers */
struct index_data *current_state = (struct index_data *)map_data;
struct update_particle_data *data = (struct update_particle_data *)extra_data;
const struct csds_reader *reader = data->reader;
const struct csds_logfile *log = &reader->log;
const struct header *h = &log->header;
struct csds_hashmap *current_state = data->current_state;
/* Loop over the particles */
for (int i = 0; i < num_elements; i++) {
size_t current_offset = current_state[i].offset;
for (int local = 0; local < num_elements; local++) {
size_t i = (size_t)map_data + local;
struct index_data *index_data =
csds_hashmap_get_from_position(current_state, i);
/* Did we get an item? */
if (index_data == NULL) {
continue;
}
/* Skip the flagged particles. */
if (current_offset == PARTICLE_REMOVED) continue;
size_t current_offset = index_data->offset;
/* Get the full mask */
size_t full_mask = 0;
......@@ -548,12 +471,13 @@ void csds_reader_update_particles_to_next_index_mapper(void *map_data,
}
/* Update the offset */
current_state[i].offset = last_full_offset;
index_data->offset = last_full_offset;
}
if (reader->verbose) {
/* Update the counter */
atomic_add_f(&data->percentage, num_elements / (float)data->number_particles);
atomic_add_f(&data->percentage,
num_elements / (float)data->number_particles);
/* Update the message */
if (lock_trylock(&data->print_lock)) {
......@@ -564,9 +488,9 @@ void csds_reader_update_particles_to_next_index_mapper(void *map_data,
/* Compute the remaining time */
const int current_time =
clocks_diff_ticks(getticks(), clocks_start_ticks) / 1000.0;
clocks_diff_ticks(getticks(), clocks_start_ticks) / 1000.0;
const int remaining_time =
(current_time - data->init_time) * (100. - percent) / percent;
(current_time - data->init_time) * (100. - percent) / percent;
/* Print the message */
tools_print_progress(percent, remaining_time, "Updating offsets");
......@@ -592,7 +516,7 @@ void csds_reader_update_particles_to_next_index_mapper(void *map_data,
*/
size_t csds_reader_update_state_to_next_index(
const struct csds_reader *reader, size_t init_offset,
struct time_record time_record, struct index_writer *current_state,
struct time_record time_record, struct csds_hashmap **current_state,
struct index_writer *parts_created, struct index_writer *parts_removed) {
const struct csds_logfile *log = &reader->log;
const struct header *h = &log->header;
......@@ -677,10 +601,16 @@ size_t csds_reader_update_state_to_next_index(
if (flag == csds_flag_change_type || flag == csds_flag_mpi_exit ||
flag == csds_flag_delete) {
index_writer_log(&parts_removed[part_type], id, old_offset);
index_writer_remove_part(&current_state[part_type], id);
if (csds_hashmap_delete(current_state[part_type], id) == NULL) {
error("Failed to remove a particle");
};
} else if (flag == csds_flag_create || flag == csds_flag_mpi_enter) {
index_writer_log(&parts_created[part_type], id, old_offset);
index_writer_log(&current_state[part_type], id, old_offset);
struct index_data item = {id, old_offset};
void *p = (void *)csds_hashmap_set(current_state[part_type], &item);
if (p != NULL) {
error("Already found a particle with the same ID");
}
}
}
......@@ -707,7 +637,8 @@ size_t csds_reader_update_state_to_next_index(
error("Failed to initialize the lock");
for (int type = 0; type < swift_type_count; type++) {
extra_data.number_particles += current_state[type].size;
extra_data.number_particles +=
csds_hashmap_get_number_buckets(current_state[type]);
}
/* Update the offsets of current_state
......@@ -715,10 +646,10 @@ size_t csds_reader_update_state_to_next_index(
* data about when particles are removed/created*/
for (int type = 0; type < swift_type_count; type++) {
/* Update the offsets */
threadpool_map(&tp, csds_reader_update_particles_to_next_index_mapper,
current_state[type].data, current_state[type].size,
sizeof(struct index_data), threadpool_auto_chunk_size,
&extra_data);
extra_data.current_state = current_state[type];
threadpool_map(&tp, csds_reader_update_particles_to_next_index_mapper, NULL,
csds_hashmap_get_number_buckets(extra_data.current_state), 1,
threadpool_auto_chunk_size, &extra_data);
}
/* Cleanup the output */
......@@ -765,7 +696,7 @@ void csds_reader_generate_index_files(const struct csds_reader *reader,
}
/* Create the different arrays that will store the information */
struct index_writer current_state[swift_type_count];
struct csds_hashmap *current_state[swift_type_count];
struct index_writer parts_created[swift_type_count];
struct index_writer parts_removed[swift_type_count];
const size_t default_size = 1024;
......@@ -791,9 +722,12 @@ void csds_reader_generate_index_files(const struct csds_reader *reader,
/* Allocate the arrays for the current state */
for (int i = 0; i < swift_type_count; i++) {
index_writer_init(&current_state[i],
reader->params.approximate_number_particles[i],
reader->params.arrays_maximal_tagged_fraction);
current_state[i] =
csds_hashmap_new(hashmap_overallocation *
reader->params.approximate_number_particles[i]);
if (current_state[i] == NULL) {
error("Failed to initialize the hashmap");
}
}
/* Get the initial state */
......@@ -822,13 +756,20 @@ void csds_reader_generate_index_files(const struct csds_reader *reader,
/* Loop over all the particle types */
for (int i = 0; i < swift_type_count; i++) {
/* Allocate the array for the current state */
index_writer_init(&current_state[i], index.nparts[i],
reader->params.arrays_maximal_tagged_fraction);
current_state[i] =
csds_hashmap_new(hashmap_overallocation * index.nparts[i]);
if (current_state[i] == NULL) {
error("Failed to initialize the hashmap");
}
/* Copy the index file into the arrays. */
struct index_data *data = csds_index_get_data(&index, i);
memcpy(current_state[i].data, data,
index.nparts[i] * sizeof(struct index_data));
for (size_t p = 0; p < index.nparts[i]; p++) {
struct index_data *data = csds_index_get_data(&index, i);
void *out = (void *)csds_hashmap_set(current_state[i], data + p);
if (out != NULL) {
error("Already found a particle with the same ID");
}
}
}
/* Set the last offset read */
......@@ -878,7 +819,7 @@ void csds_reader_generate_index_files(const struct csds_reader *reader,
/* Free the memory */
for (int type = 0; type < swift_type_count; type++) {
index_writer_free(&current_state[type]);
csds_hashmap_free(current_state[type]);
index_writer_free(&parts_created[type]);
index_writer_free(&parts_removed[type]);
}
......
......@@ -36,7 +36,7 @@ void quick_sort(struct index_data *data, size_t N) {
struct index_data temp;
/* Allocate a stack of operations */
int stack_size = log(N) + 1;
int stack_size = log(N) + 10;
struct qstack *qstack =
(struct qstack *)malloc(sizeof(struct qstack) * stack_size);
......@@ -45,6 +45,7 @@ void quick_sort(struct index_data *data, size_t N) {
qstack[0].hi = N - 1;
qpos = 0;
while (qpos >= 0) {
if (qpos >= stack_size) error("Quick sort stack too small");
lo = qstack[qpos].lo;
hi = qstack[qpos].hi;
qpos -= 1;
......@@ -97,22 +98,26 @@ void quick_sort(struct index_data *data, size_t N) {
if (j > (lo + hi) / 2) {
if (lo < j) {
qpos += 1;
if (qpos >= stack_size) error("Quick sort stack too small");
qstack[qpos].lo = lo;
qstack[qpos].hi = j;
}
if (i < hi) {
qpos += 1;
if (qpos >= stack_size) error("Quick sort stack too small");
qstack[qpos].lo = i;
qstack[qpos].hi = hi;
}
} else {
if (i < hi) {
qpos += 1;
if (qpos >= stack_size) error("Quick sort stack too small");
qstack[qpos].lo = i;
qstack[qpos].hi = hi;
}
if (lo < j) {
qpos += 1;
if (qpos >= stack_size) error("Quick sort stack too small");
qstack[qpos].lo = lo;
qstack[qpos].hi = j;
}
......
......@@ -21,9 +21,11 @@ AM_LDFLAGS = -L../../src/.libs/ ../src/.libs/libcsds.a $(HDF5_LDFLAGS) $(HDF5_LI
# List of programs and scripts to run in the test suite
TESTS = testLogfileHeader testLogfileReader testTimeArray testQuickSort testVirtualReality
TESTS += testHashmap
# List of test programs to compile
check_PROGRAMS = testLogfileHeader testLogfileReader testTimeArray testQuickSort testVirtualReality
check_PROGRAMS += testHashmap
# Rebuild tests when SWIFT is updated.
$(check_PROGRAMS): ../../src/.libs/libswiftsim.a ../src/.libs/libcsds.a
......@@ -34,6 +36,7 @@ testLogfileReader_SOURCES = testLogfileReader.c
testTimeArray_SOURCES = testTimeArray.c
testQuickSort_SOURCES = testQuickSort.c
testVirtualReality_SOURCES = testVirtualReality.c
testHashmap_SOURCES = testHashmap.c
# Files necessary for distribution
EXTRA_DIST = testLogfileHeader.yml testLogfileReader.yml
/*
* The file was obtained from https://github.com/tidwall/hashmap.c
* and was slightly adapted.
*/
// Copyright 2020 Joshua J Baker. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
/* CSDS header */
#include "csds_hashmap.h"
/* SWIFT headers */
#include "error.h"
#define N 10000
static void shuffle(struct index_data *array, size_t numels) {
struct index_data tmp;
for (size_t i = 0; i < numels - 1; i++) {
int j = i + rand() / (RAND_MAX / (numels - i) + 1);
tmp = array[j];
array[j] = array[i];
array[i] = tmp;
}
}
static size_t deepcount(struct csds_hashmap *map) {
size_t count = 0;
for (size_t i = 0; i < map->nbuckets; i++) {
if (bucket_at(map, i)->dib) {
count++;
}
}
return count;
}
static void all(void) {
struct index_data *vals;
vals = malloc(N * sizeof(struct index_data));
if (vals == NULL) {
error("Failed to allocate the index array");
}
for (int i = 0; i < N; i++) {
vals[i].id = i;
vals[i].offset = i;
}
struct csds_hashmap *map;
map = csds_hashmap_new(0);
if (map == NULL) error("Failed to allocate the hashmap");
shuffle(vals, N);
/* Test addition of particles */
for (size_t i = 0; i < N; i++) {
assert(map->count == i);
assert(map->count == csds_hashmap_count(map));
assert(map->count == deepcount(map));
assert(!csds_hashmap_get(map, vals[i].id));
assert(!csds_hashmap_delete(map, vals[i].id));
assert(!csds_hashmap_set(map, &vals[i]));
}
/* Test getting the particles */
for (size_t i = 0; i < N; i++) {
struct index_data *data = csds_hashmap_get(map, vals[i].id);
assert(data);
assert(data->offset == vals[i].offset);
assert(data->id == vals[i].id);
}
/* Write the particles inside a file */
const char *filename = "testHashmap.bin";
FILE *f = fopen(filename, "wb");
if (f == NULL) error("Failed to open file %s", filename);
csds_hashmap_write(map, f);
fclose(f);
/* Read the particles from the file */
f = fopen(filename, "rb");
if (f == NULL) error("Failed to open file %s", filename);
struct index_data *test =
(struct index_data *)malloc(N * sizeof(struct index_data));
if (test == NULL) error("Failed to allocate array");
size_t count = fread(test, sizeof(struct index_data), N, f);
assert(count == N);
fclose(f);
/* Test the i/o */
for (size_t i = 0; i < N; i++) {
struct index_data *data = csds_hashmap_get(map, test[i].id);
assert(data);
assert(data->id == test[i].id);
assert(data->offset == test[i].offset);
}
/* Test deleting the particles */
for (size_t i = 0; i < N; i++) {
assert(csds_hashmap_delete(map, vals[i].id));
}
free(test);
csds_hashmap_free(map);
}
#define bench(name, N, code) \
{ \
{ \
if (strlen(name) > 0) { \
printf("%-14s ", name); \
} \
clock_t begin = clock(); \
for (int i = 0; i < N; i++) { \
(code); \
} \
clock_t end = clock(); \
double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC; \
printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", N, elapsed_secs, \
elapsed_secs / (double)N * 1e9, (double)N / elapsed_secs); \
printf("\n"); \
} \
}
static void benchmarks(void) {
struct index_data *vals = malloc(N * sizeof(struct index_data));
for (int i = 0; i < N; i++) {
vals[i].id = i;
vals[i].offset = i;
}
struct csds_hashmap *map;
map = csds_hashmap_new(0);
if (map == NULL) {
error("Failed to initialize the hashmap");
}
shuffle(vals, N);
bench("set", N, {
struct index_data *v = csds_hashmap_set(map, &vals[i]);
assert(!v);
});
shuffle(vals, N);
bench("get", N, {
struct index_data *v = csds_hashmap_get(map, vals[i].id);
assert(v && v->id == vals[i].id);
});
shuffle(vals, N);
bench("delete", N, {
struct index_data *v = csds_hashmap_delete(map, vals[i].id);
assert(v && v->id == vals[i].id);
});
csds_hashmap_free(map);
map = csds_hashmap_new(N);
shuffle(vals, N);
bench("set (cap)", N, {
struct index_data *v = csds_hashmap_set(map, &vals[i]);
assert(!v);
});
shuffle(vals, N);
bench("get (cap)", N, {
struct index_data *v = csds_hashmap_get(map, vals[i].id);
assert(v && v->id == vals[i].id);
});
shuffle(vals, N);
bench("delete (cap)", N, {
struct index_data *v = csds_hashmap_delete(map, vals[i].id);
assert(v && v->id == vals[i].id);
});
csds_hashmap_free(map);
free(vals);
}
int main(void) {
all();
benchmarks();
}
......@@ -242,7 +242,8 @@ int main(int argc, char *argv[]) {
strcat(basename, "_0000");
csds_reader_init(&reader, basename, /* verbose */ 1,
/* number_threads */ 1,
/* number_index*/ 5);
/* number_index*/ 5,
/* restart */ 0);
/*
Finally check everything.
......
......@@ -67,7 +67,8 @@ int main(int argc, char *argv[]) {
strcat(basename, "_0000");
csds_reader_init(&reader, basename,
/* Verbose */ 2, /* number_threads */ 1,
/* number_index */ 5);
/* number_index */ 5,
/* restart */ 0);
/* Read the time limits */
double begin = csds_reader_get_time_begin(&reader);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment