/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2019 James Willis (james.s.willis@durham.ac.uk)
* Pedro Gonnet (pedro.gonnet@gmail.com)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
******************************************************************************/
/*
* Generic hashmap manipulation functions
*
* Originally by Elliot C Back -
* http://elliottback.com/wp/hashmap-implementation-in-c/
*
* Modified by Pete Warden to fix a serious performance problem, support strings
* as keys and removed thread synchronization - http://petewarden.typepad.com
*/
#include "hashmap.h"
#include "error.h"
#include "memuse.h"
#include
#include
#include
#include
#define INITIAL_NUM_CHUNKS (1)
#define HASHMAP_GROWTH_FACTOR (2)
#define HASHMAP_MAX_FILL_RATIO (1.0)
#define HASHMAP_MAX_CHUNK_FILL_RATIO (0.5)
/**
* @brief Pre-allocate a number of chunks for the graveyard.
*/
void hashmap_allocate_chunks(hashmap_t *m, int num_chunks) {
/* Allocate a fresh set of chunks. */
hashmap_chunk_t *alloc;
if ((alloc = (hashmap_chunk_t *)swift_calloc(
"hashmap", num_chunks, sizeof(hashmap_chunk_t))) == NULL) {
error("Unable to allocate chunks.");
}
/* Hook up the alloc, so that we can clean it up later. */
if (m->allocs_count == m->allocs_size) {
m->allocs_size *= 2;
void **new_allocs =
(void **)swift_malloc("hashmap", sizeof(void *) * m->allocs_size);
if (new_allocs == NULL) error("Unable to allocate new chunks.");
memcpy(new_allocs, m->allocs, sizeof(void *) * m->allocs_count);
swift_free("hashmap", m->allocs);
m->allocs = new_allocs;
}
m->allocs[m->allocs_count++] = alloc;
/* Link the chunks together. */
for (int k = 0; k < num_chunks - 1; k++) {
alloc[k].next = &alloc[k + 1];
}
/* Last chunk points to current graveyard. */
alloc[num_chunks - 1].next = m->graveyard;
/* Graveyard points to first new chunk. */
m->graveyard = alloc;
}
void hashmap_init(hashmap_t *m) {
/* Allocate the first (empty) list of chunks. */
m->nr_chunks = INITIAL_NUM_CHUNKS;
if ((m->chunks = (hashmap_chunk_t **)swift_calloc(
"hashmap", m->nr_chunks, sizeof(hashmap_chunk_t *))) == NULL) {
error("Unable to allocate hashmap chunks.");
}
/* The graveyard is currently empty. */
m->graveyard = NULL;
m->allocs = NULL;
/* Init the array of allocations. */
m->allocs_size = HASHMAP_ALLOCS_INITIAL_SIZE;
if ((m->allocs = (void **)swift_malloc(
"hashmap", sizeof(void *) * m->allocs_size)) == NULL) {
error("Unable to allocate allocs pointer array.");
}
m->allocs_count = 0;
/* Set initial sizes. */
m->table_size = m->nr_chunks * HASHMAP_ELEMENTS_PER_CHUNK;
m->size = 0;
/* Inform the men. */
if (HASHMAP_DEBUG_OUTPUT) {
message(
"Created hash table of size: %zu each element is %zu bytes. Allocated "
"%zu empty chunks.",
m->table_size * sizeof(hashmap_element_t), sizeof(hashmap_element_t),
m->nr_chunks);
}
}
/**
* @brief Put a used chunk back into the recycling bin.
*/
void hashmap_release_chunk(hashmap_t *m, hashmap_chunk_t *chunk) {
/* Clear all the chunk's data. */
memset(chunk, 0, sizeof(hashmap_chunk_t));
/* Hook it up with the other stiffs in the graveyard. */
chunk->next = m->graveyard;
m->graveyard = chunk;
}
/**
* @brief Return a new chunk, either recycled or freshly allocated.
*/
hashmap_chunk_t *hashmap_get_chunk(hashmap_t *m) {
int num_chunks = m->nr_chunks * HASHMAP_ALLOC_SIZE_FRACTION;
if (!num_chunks) num_chunks = 1;
if (m->graveyard == NULL) {
hashmap_allocate_chunks(m, num_chunks);
}
hashmap_chunk_t *res = (hashmap_chunk_t *)m->graveyard;
m->graveyard = (hashmap_chunk_t *)res->next;
res->next = NULL;
return res;
}
/**
* @brief Looks for the given key and retuns a pointer to the corresponding
* element.
*
* The returned element is either the one that already existed in the hashmap,
* or a newly-reseverd element initialized to zero.
*
* If the hashmap is full, NULL is returned.
*
* We use `rand_r` as a hashing function. The key is first hashed to obtain an
* initial global position. If there is a collision, the hashing function is
* re-applied to the key to obtain a new offset *within the same bucket*. This
* is repeated for at most HASHMAP_MAX_CHAIN_LENGTH steps, at which point
* insertion fails.
*/
hashmap_element_t *hashmap_find(hashmap_t *m, hashmap_key_t key, int create_new,
int *chain_length, int *created_new_element) {
/* If full, return immediately */
if (create_new && m->size > m->table_size * HASHMAP_MAX_FILL_RATIO) {
if (HASHMAP_DEBUG_OUTPUT) {
message("hashmap is too full (%zu of %zu elements used), re-hashing.",
m->size, m->table_size);
}
return NULL;
}
/* We will use rand_r as our hash function. */
unsigned int curr = (unsigned int)key;
/* Get offsets to the entry, its chunk, it's mask, etc. */
const size_t offset = rand_r(&curr) % m->table_size;
const size_t chunk_offset = offset / HASHMAP_ELEMENTS_PER_CHUNK;
size_t offset_in_chunk = offset - chunk_offset * HASHMAP_ELEMENTS_PER_CHUNK;
/* Allocate the chunk if needed. */
if (m->chunks[chunk_offset] == NULL) {
/* Quit here if we don't want to create a new entry. */
if (!create_new) return NULL;
/* Get a new chunk for this offset. */
m->chunks[chunk_offset] = hashmap_get_chunk(m);
}
hashmap_chunk_t *chunk = m->chunks[chunk_offset];
/* Count the number of free elements in this chunk and bail if it is too low.
*/
int chunk_count = 0;
for (int k = 0; k < HASHMAP_MASKS_PER_CHUNK; k++) {
chunk_count += __builtin_popcountll(chunk->masks[k]);
}
if (create_new &&
chunk_count > HASHMAP_ELEMENTS_PER_CHUNK * HASHMAP_MAX_CHUNK_FILL_RATIO) {
if (HASHMAP_DEBUG_OUTPUT) {
message(
"chunk %zu is too full (%i of %i elements used, fill ratio is "
"%.2f%%), re-hashing.",
chunk_offset, chunk_count, HASHMAP_ELEMENTS_PER_CHUNK,
(100.0 * m->size) / m->table_size);
}
return NULL;
}
/* Linear probing (well, not really, but whatever). */
for (int i = 0; i < HASHMAP_MAX_CHAIN_LENGTH; i++) {
/* Record the chain_length, if not NULL. */
if (chain_length) *chain_length = i;
/* Compute the offsets within the masks of this chunk. */
const int mask_offset = offset_in_chunk / HASHMAP_BITS_PER_MASK;
const int offset_in_mask =
offset_in_chunk - mask_offset * HASHMAP_BITS_PER_MASK;
/* Is the offset empty? */
hashmap_mask_t search_mask = ((hashmap_mask_t)1) << offset_in_mask;
if (!(chunk->masks[mask_offset] & search_mask)) {
/* Quit here if we don't want to create a new element. */
if (!create_new) return NULL;
/* Mark this element as taken and increase the size counter. */
chunk->masks[mask_offset] |= search_mask;
m->size += 1;
if (created_new_element) *created_new_element = 1;
/* Set the key. */
chunk->data[offset_in_chunk].key = key;
chunk->data[offset_in_chunk].value.value_array2_dbl[0] = -FLT_MAX;
chunk->data[offset_in_chunk].value.value_array2_dbl[1] = -FLT_MAX;
chunk->data[offset_in_chunk].value.value_array2_dbl[2] = -FLT_MAX;
/* Return a pointer to the new element. */
return &chunk->data[offset_in_chunk];
}
/* Does the offset by chance contain the key we are looking for? */
else if (chunk->data[offset_in_chunk].key == key) {
return &chunk->data[offset_in_chunk];
}
/* None of the above, so this is a collision. Re-hash, but within the same
chunk. I guess this is Hopscotch Hashing? */
else {
offset_in_chunk = rand_r(&curr) % HASHMAP_ELEMENTS_PER_CHUNK;
}
}
/* We lucked out, so return nothing. */
if (HASHMAP_DEBUG_OUTPUT) {
message("maximum chain length exceeded, re-hashing.");
}
return NULL;
}
/**
* @brief Grows the hashmap and rehashes all the elements
*/
void hashmap_grow(hashmap_t *m, size_t new_size) {
/* Hold on to the old data. */
const size_t old_table_size = m->table_size;
hashmap_chunk_t **old_chunks = m->chunks;
/* Re-allocate the chunk array. */
if (new_size == 0) new_size = m->table_size * HASHMAP_GROWTH_FACTOR;
m->nr_chunks =
(new_size + HASHMAP_ELEMENTS_PER_CHUNK - 1) / HASHMAP_ELEMENTS_PER_CHUNK;
m->table_size = m->nr_chunks * HASHMAP_ELEMENTS_PER_CHUNK;
if (HASHMAP_DEBUG_OUTPUT) {
message("Increasing hash table size from %zu (%zu kb) to %zu (%zu kb).",
old_table_size, old_table_size * sizeof(hashmap_element_t) / 1024,
m->table_size, m->table_size * sizeof(hashmap_element_t) / 1024);
}
if ((m->chunks = (hashmap_chunk_t **)swift_calloc(
"hashmap", m->nr_chunks, sizeof(hashmap_chunk_t *))) == NULL) {
error("Unable to allocate hashmap chunks.");
}
/* Reset size. */
m->size = 0;
/* Buffer of stray elements, in case we get overflows while re-hashing. */
hashmap_element_t *strays = NULL;
size_t strays_count = 0;
size_t strays_size = 0;
/* Iterate over the chunks and add their entries to the new table. */
for (size_t cid = 0; cid < old_table_size / HASHMAP_ELEMENTS_PER_CHUNK;
cid++) {
/* Skip empty chunks. */
hashmap_chunk_t *chunk = old_chunks[cid];
if (!chunk) continue;
/* Loop over the masks in this chunk. */
for (int mid = 0; mid < HASHMAP_MASKS_PER_CHUNK; mid++) {
/* Skip empty masks. */
if (chunk->masks[mid] == 0) continue;
/* Loop over the mask entries. */
for (int eid = 0; eid < HASHMAP_BITS_PER_MASK; eid++) {
hashmap_mask_t element_mask = ((hashmap_mask_t)1) << eid;
if (chunk->masks[mid] & element_mask) {
hashmap_element_t *element =
&chunk->data[mid * HASHMAP_BITS_PER_MASK + eid];
/* Copy the element over to the new hashmap. */
hashmap_element_t *new_element =
hashmap_find(m, element->key, /*create_new=*/1,
/*chain_length=*/NULL, /*created_new_element=*/NULL);
if (new_element) {
new_element->value = element->value;
}
/* If copying failed, then we have an overflow in the new hashmap.
If this happens, store the offending element in the strays buffer
for now. */
else {
/* (Re)allocate strays buffer? */
if (strays_count == strays_size) {
hashmap_element_t *temp_buff;
strays_size = strays_size ? strays_size * 2 : 10;
if ((temp_buff = (hashmap_element_t *)swift_malloc(
"hashmap_strays",
sizeof(hashmap_element_t) * strays_size)) == NULL)
error("Failed to (re)allocate strays buffer.");
memcpy(temp_buff, strays,
sizeof(hashmap_element_t) * strays_count);
swift_free("hashmap_strays", strays);
strays = temp_buff;
}
strays[strays_count++] = *element;
}
}
}
}
/* We're through with this chunk, recycle it. */
hashmap_release_chunk(m, chunk);
}
/* Free the old list of chunks. */
swift_free("hashmap", old_chunks);
/* If we have any strays, add them back to the hashmap. This will inevitably
trigger a rehashing, but that's not our problem. */
if (strays_count) {
for (size_t k = 0; k < strays_count; k++) {
hashmap_put(m, strays[k].key, strays[k].value);
}
swift_free("hashmap_strays", strays);
}
}
void hashmap_put(hashmap_t *m, hashmap_key_t key, hashmap_value_t value) {
/* Try to find an element for the given key. */
hashmap_element_t *element =
hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL,
/*created_new_element=*/NULL);
/* Loop around, trying to find our place in the world. */
while (!element) {
hashmap_grow(m, 0);
element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL,
/*created_new_element=*/NULL);
}
/* Set the value. */
element->value = value;
}
hashmap_value_t *hashmap_get(hashmap_t *m, hashmap_key_t key) {
/* Look for the given key. */
hashmap_element_t *element =
hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL,
/*created_new_element=*/NULL);
while (!element) {
hashmap_grow(m, 0);
element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL,
/*created_new_element=*/NULL);
}
return &element->value;
}
hashmap_value_t *hashmap_get_new(hashmap_t *m, hashmap_key_t key,
int *created_new_element) {
/* Look for the given key. */
hashmap_element_t *element = hashmap_find(
m, key, /*create_new=*/1, /*chain_length=*/NULL, created_new_element);
while (!element) {
hashmap_grow(m, 0);
element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL,
created_new_element);
}
return &element->value;
}
hashmap_value_t *hashmap_lookup(hashmap_t *m, hashmap_key_t key) {
hashmap_element_t *element =
hashmap_find(m, key, /*create_new=*/0, /*chain_length=*/NULL,
/*created_new_element=*/NULL);
return element ? &element->value : NULL;
}
void hashmap_iterate(hashmap_t *m, hashmap_mapper_t f, void *data) {
/* Loop over the chunks. */
for (size_t cid = 0; cid < m->nr_chunks; cid++) {
hashmap_chunk_t *chunk = m->chunks[cid];
if (!chunk) continue;
/* Loop over the masks. */
for (int mid = 0; mid < HASHMAP_MASKS_PER_CHUNK; mid++) {
hashmap_mask_t mask = chunk->masks[mid];
if (!mask) continue;
/* Loop over each element in the mask. */
for (int eid = 0;
eid < HASHMAP_BITS_PER_MASK &&
mid * HASHMAP_BITS_PER_MASK + eid < HASHMAP_ELEMENTS_PER_CHUNK;
eid++) {
/* If the element exists, call the function on it. */
if (mask & (((hashmap_mask_t)1) << eid)) {
hashmap_element_t *element =
&chunk->data[mid * HASHMAP_BITS_PER_MASK + eid];
f(element->key, &element->value, data);
}
}
}
}
}
void hashmap_free(hashmap_t *m) {
/* Free the list of active chunks. Note that the actual chunks will be freed
as part of the allocs below. */
swift_free("hashmap", m->chunks);
/* Re-set some pointers and values, just in case. */
m->chunks = NULL;
m->graveyard = NULL;
m->size = 0;
m->table_size = 0;
m->nr_chunks = 0;
/* Free the chunk allocs. */
for (size_t k = 0; k < m->allocs_count; k++) {
swift_free("hashmap", m->allocs[k]);
}
swift_free("hashmap", m->allocs);
}
size_t hashmap_size(hashmap_t *m) {
if (m != NULL)
return m->size;
else
return 0;
}
#if HASHMAP_DEBUG_OUTPUT
void hashmap_count_chain_lengths(hashmap_key_t key, hashmap_value_t *value,
void *data) {
hashmap_t *m = (hashmap_t *)data;
int count = 0;
hashmap_find(m, key, /*create_entry=*/0, &count,
/*created_new_element=*/NULL);
m->chain_length_counts[count] += 1;
}
#endif
void hashmap_print_stats(hashmap_t *m) {
/* Basic stats. */
message("size: %zu, table_size: %zu, nr_chunks: %zu.", m->size, m->table_size,
m->nr_chunks);
/* Count the number of populated chunks, graveyard chunks, and allocs. */
int chunk_counter = 0;
for (size_t k = 0; k < m->nr_chunks; k++) {
if (m->chunks[k]) chunk_counter += 1;
}
int graveyard_counter = 0;
for (hashmap_chunk_t *finger = m->graveyard; finger != NULL;
finger = (hashmap_chunk_t *)finger->next) {
graveyard_counter += 1;
}
message(
"populated chunks: %i (%zu kb), graveyard chunks: %i (%zu kb), allocs: "
"%zu",
chunk_counter, sizeof(hashmap_chunk_t) * chunk_counter / 1024,
graveyard_counter, sizeof(hashmap_chunk_t) * graveyard_counter / 1024,
m->allocs_count);
/* Print fill ratios. */
message("element-wise fill ratio: %.2f%%, chunk-wise fill ratio: %.2f%%",
(100.0 * m->size) / m->table_size,
(100.0 * chunk_counter) / m->nr_chunks);
#if HASHMAP_DEBUG_OUTPUT
/* Compute the chain lengths. */
for (int k = 0; k < HASHMAP_MAX_CHAIN_LENGTH; k++) {
m->chain_length_counts[k] = 0;
}
hashmap_iterate(m, hashmap_count_chain_lengths, m);
message("chain lengths:");
for (int k = 0; k < HASHMAP_MAX_CHAIN_LENGTH; k++) {
message(" chain_length_counts[%i]: %zu (%.2f%%)", k,
m->chain_length_counts[k],
(100.0 * m->chain_length_counts[k]) / m->size);
}
#endif
/* Compute chunk fill ratios. */
size_t chunk_fill_ratio_counts[11];
for (int k = 0; k < 11; k++) {
chunk_fill_ratio_counts[k] = 0;
}
for (size_t k = 0; k < m->nr_chunks; k++) {
hashmap_chunk_t *chunk = m->chunks[k];
if (!chunk) {
chunk_fill_ratio_counts[0] += 1;
} else {
int count = 0;
for (int j = 0; j < HASHMAP_MASKS_PER_CHUNK; j++) {
count += __builtin_popcountll(chunk->masks[j]);
}
chunk_fill_ratio_counts[(int)(10 * count / HASHMAP_MAX_CHUNK_FILL_RATIO /
HASHMAP_ELEMENTS_PER_CHUNK)] += 1;
}
}
message("chunk fill ratios:");
for (int k = 0; k < 11; k++) {
message(" %3i%% - %3i%%: %zu (%.2f%%)",
(int)(k * 10 * HASHMAP_MAX_CHUNK_FILL_RATIO),
(int)((k + 1) * 10 * HASHMAP_MAX_CHUNK_FILL_RATIO),
chunk_fill_ratio_counts[k],
(100.0 * chunk_fill_ratio_counts[k]) / m->nr_chunks);
}
/* Print struct sizes. */
message("sizeof(hashmap_element_t): %zu", sizeof(hashmap_element_t));
message("sizeof(hashmap_chunk_t): %zu", sizeof(hashmap_chunk_t));
message("HASHMAP_TARGET_CHUNK_BYTES: %i", HASHMAP_TARGET_CHUNK_BYTES);
message("HASHMAP_BITS_PER_ELEMENT: %i", HASHMAP_BITS_PER_ELEMENT);
message("HASHMAP_ELEMENTS_PER_CHUNK: %i", HASHMAP_ELEMENTS_PER_CHUNK);
message("HASHMAP_MASKS_PER_CHUNK: %i", HASHMAP_MASKS_PER_CHUNK);
}