Merge branch 'hashmap' into 'master'

Implement hashmap for generation of index See merge request !15

Merge branch 'hashmap' into 'master'
eb056826 · Loic Hausammann · 7941d887 · c364e2aa · eb056826 · eb056826
Commit eb056826 authored 4 years ago by Loic Hausammann
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -38,13 +38,14 @@ GRAVITY_SRC = gravity/MultiSoftening/csds_gravity.c

 # List required headers
 include_HEADERS = csds_header.h csds_loader_io.h csds_particle.h csds_time.h csds_tools.h
-include_HEADERS +=  csds_reader.h csds_logfile.h csds_index.h quick_sort.h csds_python_tools.h
+include_HEADERS += csds_reader.h csds_logfile.h csds_index.h quick_sort.h csds_python_tools.h
 include_HEADERS += csds_interpolation.h csds_parameters.h csds_cosmology.h csds_fields.h
+include_HEADERS += csds_hashmap.h

 # Common source files
 AM_SOURCES = csds_header.c csds_loader_io.c csds_particle.c csds_time.c csds_tools.c csds_reader.c
 AM_SOURCES += csds_logfile.c csds_index.c quick_sort.c csds_parameters.c csds_reader_generate_index.c
-AM_SOURCES += csds_cosmology.c csds_fields.c
+AM_SOURCES += csds_cosmology.c csds_fields.c csds_hashmap.c

 if HAVEPYTHON
 AM_SOURCES += csds_python_wrapper.c

--- a/src/csds_hashmap.c
+++ b/src/csds_hashmap.c
+// Copyright 2020 Joshua J Baker. All rights reserved.
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <string.h>
+
+/* CSDS headers */
+#include "csds_hashmap.h"
+
+/* SWIFT headers */
+#include "error.h"
+
+/**
+ * @brief returns a new hash map.
+ *
+ * The csds_hashmap must be freed with csds_hashmap_free().
+ *
+ * @param cap The default lower capacity of the csds_hashmap. Setting this to
+ * zero will default to 16.
+ */
+struct csds_hashmap *csds_hashmap_new(size_t cap) {
+
+  /* Get the size as a power of 2 */
+  size_t ncap = 16;
+  if (cap < ncap) {
+    cap = ncap;
+  } else {
+    while (ncap < cap) {
+      ncap *= 2;
+    }
+    cap = ncap;
+  }
+
+  /* A bucket is composed of the bucket + the data */
+  size_t bucketsz = sizeof(struct bucket) + sizeof(struct index_data);
+  while (bucketsz & (sizeof(uintptr_t) - 1)) {
+    bucketsz++;
+  }
+  /* We allocate some extra space for temporary data */
+  size_t size = sizeof(struct csds_hashmap) + 2 * bucketsz;
+  struct csds_hashmap *map = malloc(size);
+  if (!map) {
+    return NULL;
+  }
+
+  /* Set all the attributes */
+  memset(map, 0, sizeof(struct csds_hashmap));
+  map->bucketsz = bucketsz;
+  map->spare = ((char *)map) + sizeof(struct csds_hashmap);
+  map->edata = (char *)map->spare + bucketsz;
+  map->cap = cap;
+  map->nbuckets = cap;
+  map->mask = map->nbuckets - 1;
+
+  /* Allocate the array of buckets */
+  map->buckets = malloc(map->bucketsz * map->nbuckets);
+  if (!map->buckets) {
+    free(map);
+    return NULL;
+  }
+
+  /* Ensures that all the buckets are empty */
+  memset(map->buckets, 0, map->bucketsz * map->nbuckets);
+
+  /* Set the limits for the size */
+  map->growat = map->nbuckets / hashmap_overallocation;
+  map->shrinkat = map->nbuckets * 0.10;
+  return map;
+}
+
+/**
+ * @brief quickly clears the map.
+ * When the update_cap is provided, the map's capacity will be updated to match
+ * the currently number of allocated buckets. This is an optimization to ensure
+ * that this operation does not perform any allocations.
+ */
+void csds_hashmap_clear(struct csds_hashmap *map, int update_cap) {
+  map->count = 0;
+  if (update_cap) {
+    map->cap = map->nbuckets;
+  } else if (map->nbuckets != map->cap) {
+    void *new_buckets = malloc(map->bucketsz * map->cap);
+    if (new_buckets) {
+      free(map->buckets);
+      map->buckets = new_buckets;
+    }
+    map->nbuckets = map->cap;
+  }
+  memset(map->buckets, 0, map->bucketsz * map->nbuckets);
+  map->mask = map->nbuckets - 1;
+  map->growat = map->nbuckets / hashmap_overallocation;
+  map->shrinkat = map->nbuckets * 0.10;
+}
+
+/**
+ * @brief Resize the hash table.
+ */
+static int resize(struct csds_hashmap *map, size_t new_cap) {
+
+  /* Allocate a new hashmap */
+  struct csds_hashmap *map2 = csds_hashmap_new(new_cap);
+  if (!map2) {
+    return 0;
+  }
+
+  /* Copy the buckets */
+  for (size_t i = 0; i < map->nbuckets; i++) {
+    struct bucket *entry = bucket_at(map, i);
+    /* Skip empty buckets */
+    if (!entry->dib) {
+      continue;
+    }
+    entry->dib = 1;
+    size_t j = entry->hash & map2->mask;
+    /* Copy the bucket into map2 */
+    while (1) {
+      struct bucket *bucket = bucket_at(map2, j);
+      /* Empty bucket in map2 */
+      if (bucket->dib == 0) {
+        memcpy(bucket, entry, map->bucketsz);
+        break;
+      }
+      /* Bucket not empty shift the previous data */
+      if (bucket->dib < entry->dib) {
+        memcpy(map2->spare, bucket, map->bucketsz);
+        memcpy(bucket, entry, map->bucketsz);
+        memcpy(entry, map2->spare, map->bucketsz);
+      }
+      j = (j + 1) & map2->mask;
+      entry->dib += 1;
+    }
+  }
+
+  /* First cleanup */
+  free(map->buckets);
+
+  /* Copy the data */
+  map->buckets = map2->buckets;
+  map->nbuckets = map2->nbuckets;
+  map->mask = map2->mask;
+  map->growat = map2->growat;
+  map->shrinkat = map2->shrinkat;
+
+  /* Cleanup */
+  free(map2);
+  return 1;
+}
+
+/**
+ * @brief inserts or replaces an item in the csds_hash map.
+ * If an item is
+ * replaced then it is returned otherwise NULL is returned. This operation
+ * may allocate memory.
+ */
+void *csds_hashmap_set(struct csds_hashmap *map, struct index_data *item) {
+  if (!item) {
+    error("item is null");
+  }
+  /* Increase the size of the map if needed */
+  if (map->count == map->growat) {
+    if (!resize(map, map->nbuckets * 2)) {
+      error("Failed to reallocate memory");
+    }
+  }
+
+  /* Create a bucket from the item */
+  struct bucket *entry = map->edata;
+  entry->hash = get_hash(map, item->id);
+  entry->dib = 1;
+  memcpy(bucket_item(entry), item, sizeof(struct index_data));
+
+  /* Place the new bucket into the array */
+  size_t i = entry->hash & map->mask;
+  while (1) {
+    struct bucket *bucket = bucket_at(map, i);
+    /* Empty bucket */
+    if (bucket->dib == 0) {
+      memcpy(bucket, entry, map->bucketsz);
+      map->count++;
+      return NULL;
+    }
+    /* Replace bucket */
+    if (entry->hash == bucket->hash &&
+        bucket_item(entry)->id == bucket_item(bucket)->id) {
+
+      memcpy(map->spare, bucket_item(bucket), sizeof(struct index_data));
+      memcpy(bucket_item(bucket), bucket_item(entry),
+             sizeof(struct index_data));
+      return map->spare;
+    }
+    /* Move current element to next available bucket */
+    if (bucket->dib < entry->dib) {
+      memcpy(map->spare, bucket, map->bucketsz);
+      memcpy(bucket, entry, map->bucketsz);
+      memcpy(entry, map->spare, map->bucketsz);
+    }
+    i = (i + 1) & map->mask;
+    entry->dib += 1;
+  }
+}
+
+/**
+ * @brief returns the item based on the provided key. If the item is not
+ * found then NULL is returned.
+ */
+struct index_data *csds_hashmap_get(struct csds_hashmap *map, id_type key) {
+  uint64_t hash = get_hash(map, key);
+  size_t i = hash & map->mask;
+  while (1) {
+    struct bucket *bucket = bucket_at(map, i);
+    /* Empty bucket, thus key not found */
+    if (!bucket->dib) {
+      return NULL;
+    }
+    /* Key match */
+    if (bucket->hash == hash && key == bucket_item(bucket)->id) {
+      return bucket_item(bucket);
+    }
+
+    /* Update current bucket index */
+    i = (i + 1) & map->mask;
+  }
+}
+
+/**
+ * @brief removes an item from the hash map and returns it. If the
+ * item is not found then NULL is returned.
+ */
+void *csds_hashmap_delete(struct csds_hashmap *map, id_type key) {
+  uint64_t hash = get_hash(map, key);
+  size_t i = hash & map->mask;
+  while (1) {
+    struct bucket *bucket = bucket_at(map, i);
+    /* Not found */
+    if (!bucket->dib) {
+      return NULL;
+    }
+    /* Did we get the correct particle? */
+    if (bucket->hash == hash && key == bucket_item(bucket)->id) {
+      memcpy(map->spare, bucket_item(bucket), sizeof(struct index_data));
+      bucket->dib = 0;
+      /* Shift the elements with the same hash */
+      while (1) {
+        struct bucket *prev = bucket;
+        i = (i + 1) & map->mask;
+        bucket = bucket_at(map, i);
+        if (bucket->dib <= 1) {
+          prev->dib = 0;
+          break;
+        }
+        memcpy(prev, bucket, map->bucketsz);
+        prev->dib--;
+      }
+      map->count--;
+      if (map->nbuckets > map->cap && map->count <= map->shrinkat) {
+        // Ignore the return value. It's ok for the resize operation to
+        // fail to allocate enough memory because a shrink operation
+        // does not change the integrity of the data.
+        resize(map, map->nbuckets / 2);
+      }
+      return map->spare;
+    }
+    /* Move to the next element with the same hash */
+    i = (i + 1) & map->mask;
+  }
+}
+
+/**
+ * @brief returns the number of items in the hash map.
+ */
+size_t csds_hashmap_count(struct csds_hashmap *map) { return map->count; }
+
+/**
+ * @brief frees the hash map
+ */
+void csds_hashmap_free(struct csds_hashmap *map) {
+  if (!map) return;
+  free(map->buckets);
+  free(map);
+}
+
+/**
+ * @brief Writes the hash map
+ *
+ * @param map The hashmap.
+ * @param f The file to use.
+ */
+void csds_hashmap_write(struct csds_hashmap *map, FILE *f) {
+  size_t count = 0;
+  /* Loop over all the buckets */
+  for (size_t i = 0; i < map->nbuckets; i++) {
+    struct bucket *bucket = bucket_at(map, i);
+    /* Do only the non-empty buckets */
+    if (bucket->dib) {
+      count += 1;
+      fwrite(bucket_item(bucket), sizeof(struct index_data), 1, f);
+    }
+  }
+
+  /* Ensure that the correct number of elements
+   * have been written. */
+  if (count != map->count) {
+    error("Written a wrong number of elements.");
+  }
+}
+
+/**
+ * @brief Try to get item in a given bucket.
+ * Returns NULL if it does not exist.
+ *
+ * @param map The hashmap.
+ * @param i The request bucket
+ */
+struct index_data *csds_hashmap_get_from_position(struct csds_hashmap *map,
+                                                  size_t i) {
+  struct bucket *bucket = bucket_at(map, i);
+  if (bucket->dib) {
+    return bucket_item(bucket);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * @brief Returns the number of buckets.
+ */
+size_t csds_hashmap_get_number_buckets(struct csds_hashmap *map) {
+  return map->nbuckets;
+}
--- a/src/csds_hashmap.h
+++ b/src/csds_hashmap.h
+/*
+ * The file was obtained from https://github.com/tidwall/hashmap.c
+ * and was slightly adapted.
+ */
+// Copyright 2020 Joshua J Baker. All rights reserved.
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file.
+
+#ifndef CSDS_HASHMAP_H
+#define CSDS_HASHMAP_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+/* SWIFT headers */
+#include "inline.h"
+
+#define id_type int64_t
+#define hashmap_overallocation 4. / 3.
+#define hashmap_seed0 1513
+#define hashmap_seed1 7654
+
+/**
+ * @brief Data structure contained in the csds files.
+ */
+struct index_data {
+  /* Id of the particle. */
+  id_type id;
+
+  /* Offset of the particle in the file. */
+  uint64_t offset;
+};
+
+/**
+ * hashmap is an open addressed hash map using robinhood hashing.
+ */
+struct csds_hashmap {
+
+  /* The capacity of the hashmap. */
+  size_t cap;
+
+  /* The size of each bucket. */
+  size_t bucketsz;
+
+  /* The number of buckets. */
+  size_t nbuckets;
+
+  /* The number of items. */
+  size_t count;
+
+  /* A mask to restrict the hash within nbuckets. */
+  size_t mask;
+
+  /* The size at which the hashmap increases its size. */
+  size_t growat;
+
+  /* The size at which the hashmap decreases its size. */
+  size_t shrinkat;
+
+  /* The array of buckets. */
+  void *buckets;
+
+  /* A spare buckets for some operations */
+  void *spare;
+
+  /* A second spare buckets for setting an item. */
+  void *edata;
+};
+
+struct bucket {
+  /* The hash value of the bucket */
+  uint64_t hash : 48;
+
+  /* Number of elements with this hash value */
+  uint64_t dib : 16;
+};
+
+struct csds_hashmap *csds_hashmap_new(size_t cap);
+void csds_hashmap_free(struct csds_hashmap *map);
+void csds_hashmap_clear(struct csds_hashmap *map, int update_cap);
+size_t csds_hashmap_count(struct csds_hashmap *map);
+struct index_data *csds_hashmap_get(struct csds_hashmap *map, id_type key);
+void *csds_hashmap_set(struct csds_hashmap *map, struct index_data *item);
+void *csds_hashmap_delete(struct csds_hashmap *map, id_type key);
+void csds_hashmap_write(struct csds_hashmap *map, FILE *f);
+size_t csds_hashmap_get_number_buckets(struct csds_hashmap *map);
+struct index_data *csds_hashmap_get_from_position(struct csds_hashmap *map,
+                                                  size_t i);
+
+__attribute__((always_inline)) INLINE static struct bucket *bucket_at(
+    struct csds_hashmap *map, size_t index) {
+  return (struct bucket *)(((char *)map->buckets) + (map->bucketsz * index));
+}
+
+__attribute__((always_inline)) INLINE static struct index_data *bucket_item(
+    struct bucket *entry) {
+  char *out = ((char *)entry) + sizeof(struct bucket);
+  return (struct index_data *)out;
+}
+
+//-----------------------------------------------------------------------------
+// SipHash reference C implementation
+//
+// Copyright (c) 2012-2016 Jean-Philippe Aumasson
+// <jeanphilippe.aumasson@gmail.com>
+// Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
+//
+// To the extent possible under law, the author(s) have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication along
+// with this software. If not, see
+// <http://creativecommons.org/publicdomain/zero/1.0/>.
+//
+// default: SipHash-2-4
+//-----------------------------------------------------------------------------
+static uint64_t SIP64(const id_type *key) {
+  const uint8_t *in = (uint8_t *)key;
+  uint64_t seed0 = hashmap_seed0;
+  uint64_t seed1 = hashmap_seed1;
+  const int inlen = sizeof(id_type);
+#define U8TO64_LE(p)                                           \
+  {                                                            \
+    (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |        \
+     ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
+     ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
+     ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))  \
+  }
+#define U64TO8_LE(p, v)                        \
+  {                                            \
+    U32TO8_LE((p), (uint32_t)((v)));           \
+    U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); \
+  }
+#define U32TO8_LE(p, v)            \
+  {                                \
+    (p)[0] = (uint8_t)((v));       \
+    (p)[1] = (uint8_t)((v) >> 8);  \
+    (p)[2] = (uint8_t)((v) >> 16); \
+    (p)[3] = (uint8_t)((v) >> 24); \
+  }
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+#define SIPROUND       \
+  {                    \
+    v0 += v1;          \
+    v1 = ROTL(v1, 13); \
+    v1 ^= v0;          \
+    v0 = ROTL(v0, 32); \
+    v2 += v3;          \
+    v3 = ROTL(v3, 16); \
+    v3 ^= v2;          \
+    v0 += v3;          \
+    v3 = ROTL(v3, 21); \
+    v3 ^= v0;          \
+    v2 += v1;          \
+    v1 = ROTL(v1, 17); \
+    v1 ^= v2;          \
+    v2 = ROTL(v2, 32); \
+  }
+  uint64_t k0 = U8TO64_LE((uint8_t *)&seed0);
+  uint64_t k1 = U8TO64_LE((uint8_t *)&seed1);
+  uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1;
+  uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0;
+  uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1;
+  uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0;
+  const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+  for (; in != end; in += 8) {
+    uint64_t m = U8TO64_LE(in);
+    v3 ^= m;
+    SIPROUND;
+    SIPROUND;
+    v0 ^= m;
+  }
+  const int left = inlen & 7;
+  uint64_t b = ((uint64_t)inlen) << 56;
+  switch (left) {
+    case 7:
+      b |= ((uint64_t)in[6]) << 48;
+      break;
+    case 6:
+      b |= ((uint64_t)in[5]) << 40;
+      break;
+    case 5:
+      b |= ((uint64_t)in[4]) << 32;
+      break;
+    case 4:
+      b |= ((uint64_t)in[3]) << 24;
+      break;
+    case 3:
+      b |= ((uint64_t)in[2]) << 16;
+      break;
+    case 2:
+      b |= ((uint64_t)in[1]) << 8;
+      break;
+    case 1:
+      b |= ((uint64_t)in[0]);
+      break;
+    case 0:
+      break;
+  }
+  v3 ^= b;
+  SIPROUND;
+  SIPROUND;
+  v0 ^= b;
+  v2 ^= 0xff;
+  SIPROUND;
+  SIPROUND;
+  SIPROUND;
+  SIPROUND;
+  b = v0 ^ v1 ^ v2 ^ v3;
+  uint64_t out = 0;
+  U64TO8_LE((uint8_t *)&out, b);
+  return out;
+}
+
+// hashmap_sip returns a hash value for `data` using SipHash-2-4.
+__attribute__((always_inline)) INLINE static uint64_t get_hash(
+    struct csds_hashmap *map, id_type x) {
+  return SIP64(&x) & ((1LU << 48) - 1);
+}
+
+#endif
--- a/src/csds_index.h
+++ b/src/csds_index.h
@@ -20,23 +20,13 @@
 #ifndef CSDS_CSDS_INDEX_H
 #define CSDS_CSDS_INDEX_H

+#include "csds_hashmap.h"
 #include "csds_loader_io.h"
 #include "csds_tools.h"

 /* predefine the structure */
 struct csds_reader;

-/**
- * @brief Data structure contained in the csds files.
- */
-struct index_data {
-  /* Id of the particle. */
-  int64_t id;
-
-  /* Offset of the particle in the file. */
-  uint64_t offset;
-};
-
 /**
 * @brief Structure dealing with the index files.
 *

--- a/src/csds_reader_generate_index.c
+++ b/src/csds_reader_generate_index.c
@@ -21,13 +21,10 @@
 #include "csds_reader.h"

 /* Include local headers */
+#include "csds_hashmap.h"
 #include "csds_index.h"
 #include "csds_logfile.h"

-/* This value of offset is used to tag
-   the particles as being removed */
-#define PARTICLE_REMOVED 0
-
 /**
 * @brief Structure that contains all the information
 * required to write an index file for a single particle type.
@@ -160,90 +157,6 @@ void index_writer_log(struct index_writer *writer, const int64_t id,

  /* Increase the element counter */
  writer->size += 1;
-
-  // TODO sort
-}
-
-/**
- * @brief Cleanup the array from the tagged particles if needed.
- *
- * @param writer The #index_writer.
- * @param force Force the removal?
- */
-void index_writer_remove_tagged_particles(struct index_writer *writer,
-                                          const int force) {
-
-  /* No need to clean the empty arrays. */
-  if (writer->size == 0) {
-    return;
-  }
-
-  /* Do we need to clean? */
-  const float frac = (float)writer->number_tag / (float)writer->size;
-  if (!force && frac < writer->max_frac_tag) return;
-
-  /* Clean */
-  size_t count = 0;
-  for (size_t i = 0; i < writer->size; i++) {
-    if (writer->data[i].offset != PARTICLE_REMOVED) continue;
-
-    count += 1;
-    /* Now replace it with the last particle. */
-    writer->data[i] = writer->data[writer->size - 1];
-    writer->size--;
-
-    /* We need to ensure that the last particle was not
-     * flagged */
-    i--;
-  }
-
-  /* Check that we removed all the particles. */
-  if (count != writer->number_tag) {
-    error("A tagged particle is missing.");
-  }
-
-  // TODO sort the particles
-
-#ifdef SWIFT_DEBUG_CHECKS
-  for (size_t i = 0; i < writer->size; i++) {
-    if (writer->data[i].offset == PARTICLE_REMOVED)
-      error("Found a tagged particle after cleaning.");
-  }
-#endif
-
-  /* Reset the counter */
-  writer->number_tag = 0;
-}
-
-/**
- * @brief Remove the particles contained in parts_removed from current_state.
- *
- * @param current_state The #index_writer where we remove the particle
- * @param part_id The ID of the particle.
- */
-void index_writer_remove_part(struct index_writer *current_state,
-                              int64_t part_id) {
-
-  // TODO use a binary search + ensure ids are sorted
-  // Do not forget to write in the index file that the files are sorted
-  for (size_t j = 0; j < current_state->size; j++) {
-    if (current_state->data[j].offset == PARTICLE_REMOVED ||
-        part_id != current_state->data[j].id) {
-      continue;
-    }
-
-    /* We have the particle, now tag it.
-     * In order to use the binary search, we cannot afford
-     * to swap the particle with the last one. */
-    current_state->data[j].offset = PARTICLE_REMOVED;
-    current_state->number_tag++;
-
-    /* Trigger a cleaning if needed */
-    index_writer_remove_tagged_particles(current_state, /* force */ 0);
-    return;
-  }
-
-  error("Trying to remove an unknown particle.");
 }

 /**
@@ -294,7 +207,7 @@ void index_writer_write_in_index(const struct index_writer *writers, FILE *f) {
 * @param file_number The current file number.
 */
 void csds_reader_write_index(const struct csds_reader *reader,
-                             struct index_writer *current_state,
+                             struct csds_hashmap **current_state,
                             struct index_writer *parts_created,
                             struct index_writer *parts_removed,
                             const struct time_record *time, int file_number) {
@@ -303,14 +216,11 @@ void csds_reader_write_index(const struct csds_reader *reader,
  char filename[STRING_SIZE + 15];
  sprintf(filename, "%s_%04i.index", reader->basename, file_number);

-  /* Trigger a cleaning of the arrays */
-  // TODO Remove this and skip the particles when writing?
-  for (int i = 0; i < swift_type_count; i++) {
-    index_writer_remove_tagged_particles(&current_state[i], /* force */ 1);
-  }
-
  /* Check that we have only implemented particles */
-  index_writer_check_implemented(current_state);
+  if (csds_hashmap_count(current_state[swift_type_sink]) != 0 ||
+      csds_hashmap_count(current_state[swift_type_black_hole]) != 0 ||
+      csds_hashmap_count(current_state[swift_type_neutrino]) != 0)
+    error("Not implemented");
  index_writer_check_implemented(parts_created);
  index_writer_check_implemented(parts_removed);

@@ -330,7 +240,7 @@ void csds_reader_write_index(const struct csds_reader *reader,
  /* Write number of particles */
  uint64_t N_total[swift_type_count];
  for (int type = 0; type < swift_type_count; type++) {
-    N_total[type] = current_state[type].size;
+    N_total[type] = csds_hashmap_count(current_state[type]);
  }
  fwrite(N_total, sizeof(uint64_t), swift_type_count, f);

@@ -348,12 +258,12 @@ void csds_reader_write_index(const struct csds_reader *reader,
    fwrite(&tmp, d_align, 1, f);
  }

-  /* Write the arrays */
+  /* Write the current state */
  for (int type = 0; type < swift_type_count; type++) {
    if (N_total[type] == 0) continue;

-    fwrite(current_state[type].data, sizeof(struct index_data),
-           current_state[type].size, f);
+    // TODO memory map the file
+    csds_hashmap_write(current_state[type], f);
  }

  /* Now do the same with the particles created / removed */
@@ -383,7 +293,7 @@ void csds_reader_write_index(const struct csds_reader *reader,
 * (the first record that does not correspond to the IC).
 */
 size_t csds_reader_get_initial_state(const struct csds_reader *reader,
-                                     struct index_writer *current_state,
+                                     struct csds_hashmap **current_state,
                                     struct time_record *time_record) {

  /* Get a few variables. */
@@ -443,7 +353,11 @@ size_t csds_reader_get_initial_state(const struct csds_reader *reader,
                             /* derivative */ 0, &mask, &prev_offset);

    /* Log the particle */
-    index_writer_log(&current_state[part_type], id, offset);
+    struct index_data item = {id, offset};
+    void *p = (void *)csds_hashmap_set(current_state[part_type], &item);
+    if (p != NULL) {
+      error("Already found a particle with the same ID");
+    }

    /* Increment the offset */
    const int record_size = header_get_record_size_from_mask(h, mask);
@@ -475,6 +389,9 @@ struct update_particle_data {

  /* Time when starting the update. */
  int init_time;
+
+  /* The hashmap to udpate */
+  struct csds_hashmap *current_state;
 };

 /**
@@ -492,18 +409,24 @@ void csds_reader_update_particles_to_next_index_mapper(void *map_data,
                                                       void *extra_data) {

  /* Get a few pointers */
-  struct index_data *current_state = (struct index_data *)map_data;
  struct update_particle_data *data = (struct update_particle_data *)extra_data;
  const struct csds_reader *reader = data->reader;
  const struct csds_logfile *log = &reader->log;
  const struct header *h = &log->header;
+  struct csds_hashmap *current_state = data->current_state;

  /* Loop over the particles */
-  for (int i = 0; i < num_elements; i++) {
-    size_t current_offset = current_state[i].offset;
+  for (int local = 0; local < num_elements; local++) {
+    size_t i = (size_t)map_data + local;
+    struct index_data *index_data =
+        csds_hashmap_get_from_position(current_state, i);
+
+    /* Did we get an item? */
+    if (index_data == NULL) {
+      continue;
+    }

-    /* Skip the flagged particles. */
-    if (current_offset == PARTICLE_REMOVED) continue;
+    size_t current_offset = index_data->offset;

    /* Get the full mask */
    size_t full_mask = 0;
@@ -548,12 +471,13 @@ void csds_reader_update_particles_to_next_index_mapper(void *map_data,
    }

    /* Update the offset */
-    current_state[i].offset = last_full_offset;
+    index_data->offset = last_full_offset;
  }

  if (reader->verbose) {
    /* Update the counter */
-    atomic_add_f(&data->percentage, num_elements / (float)data->number_particles);
+    atomic_add_f(&data->percentage,
+                 num_elements / (float)data->number_particles);

    /* Update the message */
    if (lock_trylock(&data->print_lock)) {
@@ -564,9 +488,9 @@ void csds_reader_update_particles_to_next_index_mapper(void *map_data,

      /* Compute the remaining time */
      const int current_time =
-        clocks_diff_ticks(getticks(), clocks_start_ticks) / 1000.0;
+          clocks_diff_ticks(getticks(), clocks_start_ticks) / 1000.0;
      const int remaining_time =
-        (current_time - data->init_time) * (100. - percent) / percent;
+          (current_time - data->init_time) * (100. - percent) / percent;

      /* Print the message */
      tools_print_progress(percent, remaining_time, "Updating offsets");
@@ -592,7 +516,7 @@ void csds_reader_update_particles_to_next_index_mapper(void *map_data,
 */
 size_t csds_reader_update_state_to_next_index(
    const struct csds_reader *reader, size_t init_offset,
-    struct time_record time_record, struct index_writer *current_state,
+    struct time_record time_record, struct csds_hashmap **current_state,
    struct index_writer *parts_created, struct index_writer *parts_removed) {
  const struct csds_logfile *log = &reader->log;
  const struct header *h = &log->header;
@@ -677,10 +601,16 @@ size_t csds_reader_update_state_to_next_index(
    if (flag == csds_flag_change_type || flag == csds_flag_mpi_exit ||
        flag == csds_flag_delete) {
      index_writer_log(&parts_removed[part_type], id, old_offset);
-      index_writer_remove_part(&current_state[part_type], id);
+      if (csds_hashmap_delete(current_state[part_type], id) == NULL) {
+        error("Failed to remove a particle");
+      };
    } else if (flag == csds_flag_create || flag == csds_flag_mpi_enter) {
      index_writer_log(&parts_created[part_type], id, old_offset);
-      index_writer_log(&current_state[part_type], id, old_offset);
+      struct index_data item = {id, old_offset};
+      void *p = (void *)csds_hashmap_set(current_state[part_type], &item);
+      if (p != NULL) {
+        error("Already found a particle with the same ID");
+      }
    }
  }

@@ -707,7 +637,8 @@ size_t csds_reader_update_state_to_next_index(
    error("Failed to initialize the lock");

  for (int type = 0; type < swift_type_count; type++) {
-    extra_data.number_particles += current_state[type].size;
+    extra_data.number_particles +=
+        csds_hashmap_get_number_buckets(current_state[type]);
  }

  /* Update the offsets of current_state
@@ -715,10 +646,10 @@ size_t csds_reader_update_state_to_next_index(
   * data about when particles are removed/created*/
  for (int type = 0; type < swift_type_count; type++) {
    /* Update the offsets */
-    threadpool_map(&tp, csds_reader_update_particles_to_next_index_mapper,
-                   current_state[type].data, current_state[type].size,
-                   sizeof(struct index_data), threadpool_auto_chunk_size,
-                   &extra_data);
+    extra_data.current_state = current_state[type];
+    threadpool_map(&tp, csds_reader_update_particles_to_next_index_mapper, NULL,
+                   csds_hashmap_get_number_buckets(extra_data.current_state), 1,
+                   threadpool_auto_chunk_size, &extra_data);
  }

  /* Cleanup the output */
@@ -765,7 +696,7 @@ void csds_reader_generate_index_files(const struct csds_reader *reader,
  }

  /* Create the different arrays that will store the information */
-  struct index_writer current_state[swift_type_count];
+  struct csds_hashmap *current_state[swift_type_count];
  struct index_writer parts_created[swift_type_count];
  struct index_writer parts_removed[swift_type_count];
  const size_t default_size = 1024;
@@ -791,9 +722,12 @@ void csds_reader_generate_index_files(const struct csds_reader *reader,

    /* Allocate the arrays for the current state */
    for (int i = 0; i < swift_type_count; i++) {
-      index_writer_init(&current_state[i],
-                        reader->params.approximate_number_particles[i],
-                        reader->params.arrays_maximal_tagged_fraction);
+      current_state[i] =
+          csds_hashmap_new(hashmap_overallocation *
+                           reader->params.approximate_number_particles[i]);
+      if (current_state[i] == NULL) {
+        error("Failed to initialize the hashmap");
+      }
    }

    /* Get the initial state */
@@ -822,13 +756,20 @@ void csds_reader_generate_index_files(const struct csds_reader *reader,
    /* Loop over all the particle types */
    for (int i = 0; i < swift_type_count; i++) {
      /* Allocate the array for the current state */
-      index_writer_init(&current_state[i], index.nparts[i],
-                        reader->params.arrays_maximal_tagged_fraction);
+      current_state[i] =
+          csds_hashmap_new(hashmap_overallocation * index.nparts[i]);
+      if (current_state[i] == NULL) {
+        error("Failed to initialize the hashmap");
+      }

      /* Copy the index file into the arrays. */
-      struct index_data *data = csds_index_get_data(&index, i);
-      memcpy(current_state[i].data, data,
-             index.nparts[i] * sizeof(struct index_data));
+      for (size_t p = 0; p < index.nparts[i]; p++) {
+        struct index_data *data = csds_index_get_data(&index, i);
+        void *out = (void *)csds_hashmap_set(current_state[i], data + p);
+        if (out != NULL) {
+          error("Already found a particle with the same ID");
+        }
+      }
    }

    /* Set the last offset read */
@@ -878,7 +819,7 @@ void csds_reader_generate_index_files(const struct csds_reader *reader,

  /* Free the memory */
  for (int type = 0; type < swift_type_count; type++) {
-    index_writer_free(&current_state[type]);
+    csds_hashmap_free(current_state[type]);
    index_writer_free(&parts_created[type]);
    index_writer_free(&parts_removed[type]);
  }

--- a/src/quick_sort.c
+++ b/src/quick_sort.c
@@ -36,7 +36,7 @@ void quick_sort(struct index_data *data, size_t N) {
  struct index_data temp;

  /* Allocate a stack of operations */
-  int stack_size = log(N) + 1;
+  int stack_size = log(N) + 10;
  struct qstack *qstack =
      (struct qstack *)malloc(sizeof(struct qstack) * stack_size);

@@ -45,6 +45,7 @@ void quick_sort(struct index_data *data, size_t N) {
  qstack[0].hi = N - 1;
  qpos = 0;
  while (qpos >= 0) {
+    if (qpos >= stack_size) error("Quick sort stack too small");
    lo = qstack[qpos].lo;
    hi = qstack[qpos].hi;
    qpos -= 1;
@@ -97,22 +98,26 @@ void quick_sort(struct index_data *data, size_t N) {
      if (j > (lo + hi) / 2) {
        if (lo < j) {
          qpos += 1;
+          if (qpos >= stack_size) error("Quick sort stack too small");
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
        if (i < hi) {
          qpos += 1;
+          if (qpos >= stack_size) error("Quick sort stack too small");
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
        }
      } else {
        if (i < hi) {
          qpos += 1;
+          if (qpos >= stack_size) error("Quick sort stack too small");
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
        }
        if (lo < j) {
          qpos += 1;
+          if (qpos >= stack_size) error("Quick sort stack too small");
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }

--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -21,9 +21,11 @@ AM_LDFLAGS = -L../../src/.libs/ ../src/.libs/libcsds.a $(HDF5_LDFLAGS) $(HDF5_LI

 # List of programs and scripts to run in the test suite
 TESTS = testLogfileHeader testLogfileReader testTimeArray testQuickSort testVirtualReality
+TESTS += testHashmap

 # List of test programs to compile
 check_PROGRAMS = testLogfileHeader testLogfileReader testTimeArray testQuickSort testVirtualReality
+check_PROGRAMS += testHashmap

 # Rebuild tests when SWIFT is updated.
 $(check_PROGRAMS): ../../src/.libs/libswiftsim.a ../src/.libs/libcsds.a
@@ -34,6 +36,7 @@ testLogfileReader_SOURCES = testLogfileReader.c
 testTimeArray_SOURCES = testTimeArray.c
 testQuickSort_SOURCES = testQuickSort.c
 testVirtualReality_SOURCES = testVirtualReality.c
+testHashmap_SOURCES = testHashmap.c

 # Files necessary for distribution
 EXTRA_DIST = testLogfileHeader.yml testLogfileReader.yml
--- a/tests/testHashmap.c
+++ b/tests/testHashmap.c
+/*
+ * The file was obtained from https://github.com/tidwall/hashmap.c
+ * and was slightly adapted.
+ */
+// Copyright 2020 Joshua J Baker. All rights reserved.
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+/* CSDS header */
+#include "csds_hashmap.h"
+
+/* SWIFT headers */
+#include "error.h"
+
+#define N 10000
+
+static void shuffle(struct index_data *array, size_t numels) {
+  struct index_data tmp;
+  for (size_t i = 0; i < numels - 1; i++) {
+    int j = i + rand() / (RAND_MAX / (numels - i) + 1);
+    tmp = array[j];
+    array[j] = array[i];
+    array[i] = tmp;
+  }
+}
+
+static size_t deepcount(struct csds_hashmap *map) {
+  size_t count = 0;
+  for (size_t i = 0; i < map->nbuckets; i++) {
+    if (bucket_at(map, i)->dib) {
+      count++;
+    }
+  }
+  return count;
+}
+
+static void all(void) {
+  struct index_data *vals;
+  vals = malloc(N * sizeof(struct index_data));
+  if (vals == NULL) {
+    error("Failed to allocate the index array");
+  }
+  for (int i = 0; i < N; i++) {
+    vals[i].id = i;
+    vals[i].offset = i;
+  }
+
+  struct csds_hashmap *map;
+
+  map = csds_hashmap_new(0);
+  if (map == NULL) error("Failed to allocate the hashmap");
+
+  shuffle(vals, N);
+
+  /* Test addition of particles */
+  for (size_t i = 0; i < N; i++) {
+    assert(map->count == i);
+    assert(map->count == csds_hashmap_count(map));
+    assert(map->count == deepcount(map));
+
+    assert(!csds_hashmap_get(map, vals[i].id));
+    assert(!csds_hashmap_delete(map, vals[i].id));
+    assert(!csds_hashmap_set(map, &vals[i]));
+  }
+
+  /* Test getting the particles */
+  for (size_t i = 0; i < N; i++) {
+    struct index_data *data = csds_hashmap_get(map, vals[i].id);
+    assert(data);
+    assert(data->offset == vals[i].offset);
+    assert(data->id == vals[i].id);
+  }
+
+  /* Write the particles inside a file */
+  const char *filename = "testHashmap.bin";
+  FILE *f = fopen(filename, "wb");
+  if (f == NULL) error("Failed to open file %s", filename);
+  csds_hashmap_write(map, f);
+  fclose(f);
+
+  /* Read the particles from the file */
+  f = fopen(filename, "rb");
+  if (f == NULL) error("Failed to open file %s", filename);
+  struct index_data *test =
+      (struct index_data *)malloc(N * sizeof(struct index_data));
+  if (test == NULL) error("Failed to allocate array");
+  size_t count = fread(test, sizeof(struct index_data), N, f);
+  assert(count == N);
+  fclose(f);
+
+  /* Test the i/o */
+  for (size_t i = 0; i < N; i++) {
+    struct index_data *data = csds_hashmap_get(map, test[i].id);
+    assert(data);
+    assert(data->id == test[i].id);
+    assert(data->offset == test[i].offset);
+  }
+
+  /* Test deleting the particles */
+  for (size_t i = 0; i < N; i++) {
+    assert(csds_hashmap_delete(map, vals[i].id));
+  }
+
+  free(test);
+  csds_hashmap_free(map);
+}
+
+#define bench(name, N, code)                                                  \
+  {                                                                           \
+    {                                                                         \
+      if (strlen(name) > 0) {                                                 \
+        printf("%-14s ", name);                                               \
+      }                                                                       \
+      clock_t begin = clock();                                                \
+      for (int i = 0; i < N; i++) {                                           \
+        (code);                                                               \
+      }                                                                       \
+      clock_t end = clock();                                                  \
+      double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC;           \
+      printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", N, elapsed_secs, \
+             elapsed_secs / (double)N * 1e9, (double)N / elapsed_secs);       \
+      printf("\n");                                                           \
+    }                                                                         \
+  }
+
+static void benchmarks(void) {
+  struct index_data *vals = malloc(N * sizeof(struct index_data));
+  for (int i = 0; i < N; i++) {
+    vals[i].id = i;
+    vals[i].offset = i;
+  }
+
+  struct csds_hashmap *map;
+  map = csds_hashmap_new(0);
+  if (map == NULL) {
+    error("Failed to initialize the hashmap");
+  }
+
+  shuffle(vals, N);
+  bench("set", N, {
+    struct index_data *v = csds_hashmap_set(map, &vals[i]);
+    assert(!v);
+  });
+
+  shuffle(vals, N);
+  bench("get", N, {
+    struct index_data *v = csds_hashmap_get(map, vals[i].id);
+    assert(v && v->id == vals[i].id);
+  });
+
+  shuffle(vals, N);
+  bench("delete", N, {
+    struct index_data *v = csds_hashmap_delete(map, vals[i].id);
+    assert(v && v->id == vals[i].id);
+  });
+
+  csds_hashmap_free(map);
+
+  map = csds_hashmap_new(N);
+
+  shuffle(vals, N);
+  bench("set (cap)", N, {
+    struct index_data *v = csds_hashmap_set(map, &vals[i]);
+    assert(!v);
+  });
+
+  shuffle(vals, N);
+  bench("get (cap)", N, {
+    struct index_data *v = csds_hashmap_get(map, vals[i].id);
+    assert(v && v->id == vals[i].id);
+  });
+
+  shuffle(vals, N);
+  bench("delete (cap)", N, {
+    struct index_data *v = csds_hashmap_delete(map, vals[i].id);
+    assert(v && v->id == vals[i].id);
+  });
+
+  csds_hashmap_free(map);
+
+  free(vals);
+}
+
+int main(void) {
+  all();
+  benchmarks();
+}
--- a/tests/testLogfileReader.c
+++ b/tests/testLogfileReader.c
@@ -242,7 +242,8 @@ int main(int argc, char *argv[]) {
  strcat(basename, "_0000");
  csds_reader_init(&reader, basename, /* verbose */ 1,
                   /* number_threads */ 1,
-                   /* number_index*/ 5);
+                   /* number_index*/ 5,
+                   /* restart */ 0);

  /*
    Finally check everything.

--- a/tests/testVirtualReality.c
+++ b/tests/testVirtualReality.c
@@ -67,7 +67,8 @@ int main(int argc, char *argv[]) {
  strcat(basename, "_0000");
  csds_reader_init(&reader, basename,
                   /* Verbose */ 2, /* number_threads */ 1,
-                   /* number_index */ 5);
+                   /* number_index */ 5,
+                   /* restart */ 0);

  /* Read the time limits */
  double begin = csds_reader_get_time_begin(&reader);