diff --git a/.gitignore b/.gitignore index 93fe6cebf3419e49523b014edb9f3b44ac346ca3..9d56de007c89b8f5459305a9e26e0b919ab6e1b5 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ examples/swift_mpi examples/fof examples/fof_mpi examples/*/*/*.xmf +examples/*/*/*.dat examples/*/*/*.png examples/*/*/*.mp4 examples/*/*/*.txt diff --git a/examples/main_fof.c b/examples/main_fof.c index e1e2d73e1452b001f99f913752124d57c0275a8c..6cee578385997e3acf732d6ad373eff4c060a275 100644 --- a/examples/main_fof.c +++ b/examples/main_fof.c @@ -1018,38 +1018,24 @@ int main(int argc, char *argv[]) { #endif /* Perform first FOF search after the first snapshot dump. */ - fof_search_tree(&s); + //fof_search_tree(&s); #ifdef WITH_MPI MPI_Barrier(MPI_COMM_WORLD); #endif } + /* Dump the task data using the given frequency. */ + if (dump_tasks) { #ifdef SWIFT_DEBUG_TASKS - char dumpfile[32]; - snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", 1); - FILE *file_thread; - file_thread = fopen(dumpfile, "w"); - /* Add some information to help with the plots */ - fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2, - -1, -1, 1, e.tic_step, e.toc_step, e.updates, e.g_updates, - e.s_updates, 0, cpufreq); - for (int l = 0; l < e.sched.nr_tasks; l++) { - if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) { - fprintf( - file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n", - e.sched.tasks[l].rid, e.sched.tasks[l].type, e.sched.tasks[l].subtype, - (e.sched.tasks[l].cj == NULL), e.sched.tasks[l].tic, - e.sched.tasks[l].toc, - (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->hydro.count, - (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->hydro.count, - (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->grav.count, - (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->grav.count, - e.sched.tasks[l].sid); - } + task_dump_all(&e, 0); +#endif + + /* Generate the task statistics. */ + char dumpfile[40]; + snprintf(dumpfile, 40, "thread_stats-step%d.dat", 0); + task_dump_stats(dumpfile, &e, /* header = */ 0, /* allranks = */ 1); } - fclose(file_thread); -#endif // SWIFT_DEBUG_TASKS #ifdef SWIFT_DEBUG_THREADPOOL /* Dump the task data using the given frequency. */ diff --git a/src/Makefile.am b/src/Makefile.am index 3b9c3b3f64ed665affe03c0757a7b8fd6aeebd9b..d2cb5180baae63413bcd16c3d6a645a875599501 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -51,7 +51,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h \ logger_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h fof.h \ star_formation_struct.h star_formation.h velociraptor_struct.h velociraptor_io.h \ - random.h + random.h c_hashmap/hashmap.h # source files for EAGLE cooling EAGLE_COOLING_SOURCES = @@ -70,7 +70,8 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ - outputlist.c velociraptor_dummy.c logger_io.c fof.c $(EAGLE_COOLING_SOURCES) + outputlist.c velociraptor_dummy.c logger_io.c fof.c $(EAGLE_COOLING_SOURCES) \ + c_hashmap/hashmap.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/c_hashmap/hashmap.c b/src/c_hashmap/hashmap.c index c436dc7fea9ce28f0f7ec1edd90495f9b43b71cd..5d63af9ac69b4598fc869fac3eccd02767d7fe59 100644 --- a/src/c_hashmap/hashmap.c +++ b/src/c_hashmap/hashmap.c @@ -1,398 +1,470 @@ /* * Generic map implementation. */ -#include "hashmap.h" - -#include <stdlib.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> -#define INITIAL_SIZE (256) -#define MAX_CHAIN_LENGTH (8) - -/* We need to keep keys and values */ -typedef struct _hashmap_element{ - char* key; - int in_use; - any_t data; -} hashmap_element; +#include "../error.h" +#include "hashmap.h" -/* A hashmap has some maximum size and current size, - * as well as the data to hold. */ -typedef struct _hashmap_map{ - int table_size; - int size; - hashmap_element *data; -} hashmap_map; +#define INITIAL_NUM_CHUNKS (1) +#define HASHMAP_GROWTH_FACTOR (2) +#define HASHMAP_MAX_FILL_RATIO (1.0) +#define HASHMAP_MAX_CHUNK_FILL_RATIO (0.5) -/* - * Return an empty hashmap, or NULL on failure. +/** + * @brief Pre-allocate a number of chunks for the graveyard. */ -map_t hashmap_new(void) { - hashmap_map* m = (hashmap_map*) malloc(sizeof(hashmap_map)); - if(!m) goto err; - - m->data = (hashmap_element*) calloc(INITIAL_SIZE, sizeof(hashmap_element)); - if(!m->data) goto err; - - m->table_size = INITIAL_SIZE; - m->size = 0; - - return m; - err: - if (m) - hashmap_free(m); - return NULL; +void hashmap_allocate_chunks(hashmap_t *m, int num_chunks) { + /* Allocate a fresh set of chunks. */ + hashmap_chunk_t *alloc; + if ((alloc = (hashmap_chunk_t *)calloc(num_chunks, + sizeof(hashmap_chunk_t))) == NULL) { + error("Unable to allocate chunks."); + } + + /* Hook up the alloc, so that we can clean it up later. */ + if (m->allocs_count == m->allocs_size) { + m->allocs_size *= 2; + void **new_allocs = (void **)malloc(sizeof(void *) * m->allocs_size); + memcpy(new_allocs, m->allocs, sizeof(void *) * m->allocs_count); + free(m->allocs); + m->allocs = new_allocs; + } + m->allocs[m->allocs_count++] = alloc; + + /* Link the chunks together. */ + for (int k = 0; k < num_chunks - 1; k++) { + alloc[k].next = &alloc[k + 1]; + } + + /* Last chunk points to current graveyard. */ + alloc[num_chunks - 1].next = m->graveyard; + + /* Graveyard points to first new chunk. */ + m->graveyard = alloc; } -/* The implementation here was originally done by Gary S. Brown. I have - borrowed the tables directly, and made some minor changes to the - crc32-function (including changing the interface). //ylo */ - - /* ============================================================= */ - /* COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or */ - /* code or tables extracted from it, as desired without restriction. */ - /* */ - /* First, the polynomial itself and its table of feedback terms. The */ - /* polynomial is */ - /* X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 */ - /* */ - /* Note that we take it "backwards" and put the highest-order term in */ - /* the lowest-order bit. The X^32 term is "implied"; the LSB is the */ - /* X^31 term, etc. The X^0 term (usually shown as "+1") results in */ - /* the MSB being 1. */ - /* */ - /* Note that the usual hardware shift register implementation, which */ - /* is what we're using (we're merely optimizing it by doing eight-bit */ - /* chunks at a time) shifts bits into the lowest-order term. In our */ - /* implementation, that means shifting towards the right. Why do we */ - /* do it this way? Because the calculated CRC must be transmitted in */ - /* order from highest-order term to lowest-order term. UARTs transmit */ - /* characters in order from LSB to MSB. By storing the CRC this way, */ - /* we hand it to the UART in the order low-byte to high-byte; the UART */ - /* sends each low-bit to hight-bit; and the result is transmission bit */ - /* by bit from highest- to lowest-order term without requiring any bit */ - /* shuffling on our part. Reception works similarly. */ - /* */ - /* The feedback terms table consists of 256, 32-bit entries. Notes: */ - /* */ - /* The table can be generated at runtime if desired; code to do so */ - /* is shown later. It might not be obvious, but the feedback */ - /* terms simply represent the results of eight shift/xor opera- */ - /* tions for all combinations of data and CRC register values. */ - /* */ - /* The values must be right-shifted by eight bits by the "updcrc" */ - /* logic; the shift must be unsigned (bring in zeroes). On some */ - /* hardware you could probably optimize the shift in assembler by */ - /* using byte-swap instructions. */ - /* polynomial $edb88320 */ - /* */ - /* -------------------------------------------------------------------- */ - -static unsigned long crc32_tab[] = { - 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, - 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, - 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, - 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, - 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, - 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, - 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, - 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, - 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, - 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, - 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, - 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, - 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, - 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, - 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, - 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, - 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, - 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, - 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, - 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, - 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, - 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, - 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, - 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, - 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, - 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, - 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, - 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, - 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, - 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, - 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, - 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, - 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, - 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, - 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, - 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, - 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, - 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, - 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, - 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, - 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, - 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, - 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, - 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, - 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, - 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, - 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, - 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, - 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, - 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, - 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, - 0x2d02ef8dL - }; - -/* Return a 32-bit CRC of the contents of the buffer. */ - -unsigned long crc32(const unsigned char *s, unsigned int len) -{ - unsigned int i; - unsigned long crc32val; - - crc32val = 0; - for (i = 0; i < len; i ++) - { - crc32val = - crc32_tab[(crc32val ^ s[i]) & 0xff] ^ - (crc32val >> 8); - } - return crc32val; +void hashmap_init(hashmap_t *m) { + /* Allocate the first (empty) list of chunks. */ + m->nr_chunks = INITIAL_NUM_CHUNKS; + if ((m->chunks = (hashmap_chunk_t **)calloc( + m->nr_chunks, sizeof(hashmap_chunk_t *))) == NULL) { + error("Unable to allocate hashmap chunks."); + } + + /* The graveyard is currently empty. */ + m->graveyard = NULL; + m->allocs = NULL; + + /* Init the array of allocations. */ + m->allocs_size = HASHMAP_ALLOCS_INITIAL_SIZE; + if ((m->allocs = (void **)malloc(sizeof(void *) * m->allocs_size)) == NULL) { + error("Unable to allocate allocs pointer array."); + } + m->allocs_count = 0; + + /* Set initial sizes. */ + m->table_size = m->nr_chunks * HASHMAP_ELEMENTS_PER_CHUNK; + m->size = 0; + + /* Inform the men. */ + if (HASHMAP_DEBUG_OUTPUT) { + message( + "Created hash table of size: %zu each element is %zu bytes. Allocated " + "%zu empty chunks.", + m->table_size * sizeof(hashmap_element_t), sizeof(hashmap_element_t), + m->nr_chunks); + } } -/* - * Hashing function for a string +/** + * @brief Put a used chunk back into the recycling bin. */ -unsigned int hashmap_hash_int(hashmap_map * m, char* keystring){ - - unsigned long key = crc32((unsigned char*)(keystring), strlen(keystring)); - - /* Robert Jenkins' 32 bit Mix Function */ - key += (key << 12); - key ^= (key >> 22); - key += (key << 4); - key ^= (key >> 9); - key += (key << 10); - key ^= (key >> 2); - key += (key << 7); - key ^= (key >> 12); +void hashmap_release_chunk(hashmap_t *m, hashmap_chunk_t *chunk) { + /* Clear all the chunk's data. */ + memset(chunk, 0, sizeof(hashmap_chunk_t)); - /* Knuth's Multiplicative Method */ - key = (key >> 3) * 2654435761; - - return key % m->table_size; + /* Hook it up with the other stiffs in the graveyard. */ + chunk->next = m->graveyard; + m->graveyard = chunk; } -/* - * Return the integer of the location in data - * to store the point to the item, or MAP_FULL. +/** + * @brief Return a new chunk, either recycled or freshly allocated. */ -int hashmap_hash(map_t in, char* key){ - int curr; - int i; +hashmap_chunk_t *hashmap_get_chunk(hashmap_t *m) { + int num_chunks = m->nr_chunks * HASHMAP_ALLOC_SIZE_FRACTION; + if (!num_chunks) num_chunks = 1; + if (m->graveyard == NULL) { + hashmap_allocate_chunks(m, num_chunks); + } + + hashmap_chunk_t *res = m->graveyard; + m->graveyard = res->next; + res->next = NULL; + + return res; +} - /* Cast the hashmap */ - hashmap_map* m = (hashmap_map *) in; +/** + * @brief Looks for the given key and retuns a pointer to the corresponding + * element. + * + * The returned element is either the one that already existed in the hashmap, + * or a newly-reseverd element initialized to zero. + * + * If the hashmap is full, NULL is returned. + * + * We use `rand_r` as a hashing function. The key is first hashed to obtain an + * initial global position. If there is a collision, the hashing function is + * re-applied to the key to obtain a new offset *within the same bucket*. This + * is repeated for at most HASHMAP_MAX_CHAIN_LENGTH steps, at which point + * insertion fails. + */ +hashmap_element_t *hashmap_find(hashmap_t *m, hashmap_key_t key, int create_new, + int *chain_length, int *created_new_element) { + /* If full, return immediately */ + if (create_new && m->size > m->table_size * HASHMAP_MAX_FILL_RATIO) { + if (HASHMAP_DEBUG_OUTPUT) { + message("hashmap is too full (%zu of %zu elements used), re-hashing.", + m->size, m->table_size); + } + return NULL; + } + + /* We will use rand_r as our hash function. */ + unsigned int curr = (unsigned int)key; + + /* Get offsets to the entry, its chunk, it's mask, etc. */ + const size_t offset = rand_r(&curr) % m->table_size; + const size_t chunk_offset = offset / HASHMAP_ELEMENTS_PER_CHUNK; + size_t offset_in_chunk = offset - chunk_offset * HASHMAP_ELEMENTS_PER_CHUNK; + + /* Allocate the chunk if needed. */ + if (m->chunks[chunk_offset] == NULL) { + /* Quit here if we don't want to create a new entry. */ + if (!create_new) return NULL; + + /* Get a new chunk for this offset. */ + m->chunks[chunk_offset] = hashmap_get_chunk(m); + } + hashmap_chunk_t *chunk = m->chunks[chunk_offset]; + + /* Count the number of free elements in this chunk and bail if it is too low. + */ + int chunk_count = 0; + for (int k = 0; k < HASHMAP_MASKS_PER_CHUNK; k++) { + chunk_count += __builtin_popcountll(chunk->masks[k]); + } + if (create_new && + chunk_count > HASHMAP_ELEMENTS_PER_CHUNK * HASHMAP_MAX_CHUNK_FILL_RATIO) { + if (HASHMAP_DEBUG_OUTPUT) { + message( + "chunk %zu is too full (%i of %i elements used, fill ratio is " + "%.2f%%), re-hashing.", + chunk_offset, chunk_count, HASHMAP_ELEMENTS_PER_CHUNK, + (100.0 * m->size) / m->table_size); + } + return NULL; + } + + /* Linear probing (well, not really, but whatever). */ + for (int i = 0; i < HASHMAP_MAX_CHAIN_LENGTH; i++) { + /* Record the chain_length, if not NULL. */ + if (chain_length) *chain_length = i; + + /* Compute the offsets within the masks of this chunk. */ + const int mask_offset = offset_in_chunk / HASHMAP_BITS_PER_MASK; + const int offset_in_mask = + offset_in_chunk - mask_offset * HASHMAP_BITS_PER_MASK; + + /* Is the offset empty? */ + hashmap_mask_t search_mask = ((hashmap_mask_t)1) << offset_in_mask; + if (!(chunk->masks[mask_offset] & search_mask)) { + /* Quit here if we don't want to create a new element. */ + if (!create_new) return NULL; + + /* Mark this element as taken and increase the size counter. */ + chunk->masks[mask_offset] |= search_mask; + m->size += 1; + if(created_new_element) *created_new_element = 1; + + /* Set the key. */ + chunk->data[offset_in_chunk].key = key; + + /* Return a pointer to the new element. */ + return &chunk->data[offset_in_chunk]; + } - /* If full, return immediately */ - if(m->size >= (m->table_size/2)) return MAP_FULL; + /* Does the offset by chance contain the key we are looking for? */ + else if (chunk->data[offset_in_chunk].key == key) { + return &chunk->data[offset_in_chunk]; + } - /* Find the best index */ - curr = hashmap_hash_int(m, key); + /* None of the above, so this is a collision. Re-hash, but within the same + chunk. I guess this is Hopscotch Hashing? */ + else { + offset_in_chunk = rand_r(&curr) % HASHMAP_ELEMENTS_PER_CHUNK; + } + } - /* Linear probing */ - for(i = 0; i< MAX_CHAIN_LENGTH; i++){ - if(m->data[curr].in_use == 0) - return curr; + /* We lucked out, so return nothing. */ + if (HASHMAP_DEBUG_OUTPUT) { + message("maximum chain length exceeded, re-hashing."); + } + return NULL; +} - if(m->data[curr].in_use == 1 && (strcmp(m->data[curr].key,key)==0)) - return curr; +/** + * @brief Grows the hashmap and rehashes all the elements + */ +void hashmap_grow(hashmap_t *m) { + /* Hold on to the old data. */ + const size_t old_table_size = m->table_size; + hashmap_chunk_t **old_chunks = m->chunks; + + /* Re-allocate the chunk array. */ + m->table_size *= HASHMAP_GROWTH_FACTOR; + m->nr_chunks = (m->table_size + HASHMAP_ELEMENTS_PER_CHUNK - 1) / + HASHMAP_ELEMENTS_PER_CHUNK; + m->table_size = m->nr_chunks * HASHMAP_ELEMENTS_PER_CHUNK; + + if (HASHMAP_DEBUG_OUTPUT) { + message("Increasing hash table size from %zu (%zu kb) to %zu (%zu kb).", + old_table_size, old_table_size * sizeof(hashmap_element_t) / 1024, + m->table_size, m->table_size * sizeof(hashmap_element_t) / 1024); + } + + if ((m->chunks = (hashmap_chunk_t **)calloc( + m->nr_chunks, sizeof(hashmap_chunk_t *))) == NULL) { + error("Unable to allocate hashmap chunks."); + } + + /* Reset size. */ + m->size = 0; + + /* Iterate over the chunks and add their entries to the new table. */ + for (size_t cid = 0; cid < old_table_size / HASHMAP_ELEMENTS_PER_CHUNK; + cid++) { + /* Skip empty chunks. */ + hashmap_chunk_t *chunk = old_chunks[cid]; + if (!chunk) continue; + + /* Loop over the masks in this chunk. */ + for (int mid = 0; mid < HASHMAP_MASKS_PER_CHUNK; mid++) { + /* Skip empty masks. */ + if (chunk->masks[mid] == 0) continue; + + /* Loop over the mask entries. */ + for (int eid = 0; eid < HASHMAP_BITS_PER_MASK; eid++) { + hashmap_mask_t element_mask = ((hashmap_mask_t)1) << eid; + if (chunk->masks[mid] & element_mask) { + hashmap_element_t *element = + &chunk->data[mid * HASHMAP_BITS_PER_MASK + eid]; + + /* Copy the element over to the new hashmap. */ + hashmap_element_t *new_element = hashmap_find( + m, element->key, /*create_new=*/1, /*chain_length=*/NULL, /*created_new_element=*/NULL); + if (!new_element) { + /* TODO(pedro): Deal with this type of failure more elegantly. */ + error("Failed to re-hash element."); + } + new_element->value = element->value; + } + } + } - curr = (curr + 1) % m->table_size; - } + /* We're through with this chunk, recycle it. */ + hashmap_release_chunk(m, chunk); + } - return MAP_FULL; + /* Free the old list of chunks. */ + free(old_chunks); } -/* - * Doubles the size of the hashmap, and rehashes all the elements - */ -int hashmap_rehash(map_t in){ - int i; - int old_size; - hashmap_element* curr; - - /* Setup the new elements */ - hashmap_map *m = (hashmap_map *) in; - printf("Rehashing hashamp. Increasing size from: %ld to %ld.\n",m->table_size * sizeof(hashmap_element), 2 * m->table_size * sizeof(hashmap_element)); - hashmap_element* temp = (hashmap_element *) - calloc(2 * m->table_size, sizeof(hashmap_element)); - if(!temp) return MAP_OMEM; - - /* Update the array */ - curr = m->data; - m->data = temp; - - /* Update the size */ - old_size = m->table_size; - m->table_size = 2 * m->table_size; - m->size = 0; - - /* Rehash the elements */ - for(i = 0; i < old_size; i++){ - int status; - - if (curr[i].in_use == 0) - continue; - - status = hashmap_put(m, curr[i].key, curr[i].data); - if (status != MAP_OK) - return status; - } - - free(curr); - - return MAP_OK; +void hashmap_put(hashmap_t *m, hashmap_key_t key, hashmap_value_t value) { + + /* Try to find an element for the given key. */ + hashmap_element_t *element = + hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, /*created_new_element=*/NULL); + + /* Loop around, trying to find our place in the world. */ + while (!element) { + hashmap_grow(m); + element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, /*created_new_element=*/NULL); + } + + /* Set the value. */ + element->value = value; } -/* - * Add a pointer to the hashmap with some key - */ -int hashmap_put(map_t in, char* key, any_t value){ - int index; - hashmap_map* m; - - /* Cast the hashmap */ - m = (hashmap_map *) in; - - /* Find a place to put our value */ - index = hashmap_hash(in, key); - while(index == MAP_FULL){ - if (hashmap_rehash(in) == MAP_OMEM) { - return MAP_OMEM; - } - index = hashmap_hash(in, key); - } - - /* Set the data */ - m->data[index].data = value; - m->data[index].key = key; - m->data[index].in_use = 1; - m->size++; - - return MAP_OK; +hashmap_value_t *hashmap_get(hashmap_t *m, hashmap_key_t key) { + + /* Look for the given key. */ + hashmap_element_t *element = + hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, /*created_new_element=*/NULL); + while (!element) { + hashmap_grow(m); + element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, /*created_new_element=*/NULL); + } + return &element->value; } -/* - * Get your pointer out of the hashmap with a key - */ -int hashmap_get(map_t in, char* key, any_t *arg){ - int curr; - int i; - hashmap_map* m; - - /* Cast the hashmap */ - m = (hashmap_map *) in; - - /* Find data location */ - curr = hashmap_hash_int(m, key); - - /* Linear probing, if necessary */ - for(i = 0; i<MAX_CHAIN_LENGTH; i++){ - - int in_use = m->data[curr].in_use; - if (in_use == 1){ - if (strcmp(m->data[curr].key,key)==0){ - *arg = (m->data[curr].data); - return MAP_OK; - } - } - - curr = (curr + 1) % m->table_size; - } - - *arg = NULL; +hashmap_value_t *hashmap_get_new(hashmap_t *m, hashmap_key_t key, int *created_new_element) { + /* Look for the given key. */ + hashmap_element_t *element = + hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, created_new_element); + while (!element) { + hashmap_grow(m); + element = hashmap_find(m, key, /*create_new=*/1, /*chain_length=*/NULL, created_new_element); + } + return &element->value; +} - /* Not found */ - return MAP_MISSING; +hashmap_value_t *hashmap_lookup(hashmap_t *m, hashmap_key_t key) { + hashmap_element_t *element = + hashmap_find(m, key, /*create_new=*/0, /*chain_length=*/NULL, /*created_new_element=*/NULL); + return element ? &element->value : NULL; } -/* - * Iterate the function parameter over each element in the hashmap. The - * additional any_t argument is passed to the function as its first - * argument and the hashmap element is the second. - */ -int hashmap_iterate(map_t in, PFany f, any_t item) { - int i; - - /* Cast the hashmap */ - hashmap_map* m = (hashmap_map*) in; - - /* On empty hashmap, return immediately */ - if (hashmap_length(m) <= 0) - return MAP_MISSING; - - /* Linear probing */ - for(i = 0; i< m->table_size; i++) - if(m->data[i].in_use != 0) { - any_t data = (any_t) (m->data[i].data); - int status = f(item, data); - if (status != MAP_OK) { - return status; - } - } - - return MAP_OK; +void hashmap_iterate(hashmap_t *m, hashmap_mapper_t f, void *data) { + /* Loop over the chunks. */ + for (size_t cid = 0; cid < m->nr_chunks; cid++) { + hashmap_chunk_t *chunk = m->chunks[cid]; + if (!chunk) continue; + + /* Loop over the masks. */ + for (int mid = 0; mid < HASHMAP_MASKS_PER_CHUNK; mid++) { + hashmap_mask_t mask = chunk->masks[mid]; + if (!mask) continue; + + /* Loop over each element in the mask. */ + for (int eid = 0; + eid < HASHMAP_BITS_PER_MASK && + mid * HASHMAP_BITS_PER_MASK + eid < HASHMAP_ELEMENTS_PER_CHUNK; + eid++) { + /* If the element exists, call the function on it. */ + if (mask & (((hashmap_mask_t)1) << eid)) { + hashmap_element_t *element = + &chunk->data[mid * HASHMAP_BITS_PER_MASK + eid]; + f(element->key, &element->value, data); + } + } + } + } } -/* - * Remove an element with that key from the map - */ -int hashmap_remove(map_t in, char* key){ - int i; - int curr; - hashmap_map* m; - - /* Cast the hashmap */ - m = (hashmap_map *) in; - - /* Find key */ - curr = hashmap_hash_int(m, key); - - /* Linear probing, if necessary */ - for(i = 0; i<MAX_CHAIN_LENGTH; i++){ - - int in_use = m->data[curr].in_use; - if (in_use == 1){ - if (strcmp(m->data[curr].key,key)==0){ - /* Blank out the fields */ - m->data[curr].in_use = 0; - m->data[curr].data = NULL; - m->data[curr].key = NULL; - - /* Reduce the size */ - m->size--; - return MAP_OK; - } - } - curr = (curr + 1) % m->table_size; - } - - /* Data not found */ - return MAP_MISSING; +void hashmap_free(hashmap_t *m) { + /* Free the list of active chunks. Note that the actual chunks will be freed + as part of the allocs below. */ + free(m->chunks); + + /* Re-set some pointers and values, just in case. */ + m->chunks = NULL; + m->graveyard = NULL; + m->size = 0; + m->table_size = 0; + m->nr_chunks = 0; + + /* Free the chunk allocs. */ + for (size_t k = 0; k < m->allocs_count; k++) { + free(m->allocs[k]); + } + free(m->allocs); } -/* Deallocate the hashmap */ -void hashmap_free(map_t in){ - hashmap_map* m = (hashmap_map*) in; - free(m->data); - free(m); +size_t hashmap_size(hashmap_t *m) { + if (m != NULL) + return m->size; + else + return 0; } -/* Return the length of the hashmap */ -int hashmap_length(map_t in){ - hashmap_map* m = (hashmap_map *) in; - if(m != NULL) return m->size; - else return 0; +#if HASHMAP_DEBUG_OUTPUT +void hashmap_count_chain_lengths(hashmap_key_t key, hashmap_value_t *value, + void *data) { + hashmap_t *m = (hashmap_t *)data; + int count = 0; + hashmap_find(m, key, /*create_entry=*/0, &count, /*created_new_element=*/NULL); + m->chain_length_counts[count] += 1; +} +#endif + +void hashmap_print_stats(hashmap_t *m) { + /* Basic stats. */ + message("size: %zu, table_size: %zu, nr_chunks: %zu.", m->size, m->table_size, + m->nr_chunks); + + /* Count the number of populated chunks, graveyard chunks, and allocs. */ + int chunk_counter = 0; + for (size_t k = 0; k < m->nr_chunks; k++) { + if (m->chunks[k]) chunk_counter += 1; + } + int graveyard_counter = 0; + for (hashmap_chunk_t *finger = m->graveyard; finger != NULL; + finger = finger->next) { + graveyard_counter += 1; + } + message( + "populated chunks: %i (%zu kb), graveyard chunks: %i (%zu kb), allocs: " + "%zu", + chunk_counter, sizeof(hashmap_chunk_t) * chunk_counter / 1024, + graveyard_counter, sizeof(hashmap_chunk_t) * graveyard_counter / 1024, + m->allocs_count); + + /* Print fill ratios. */ + message("element-wise fill ratio: %.2f%%, chunk-wise fill ratio: %.2f%%", + (100.0 * m->size) / m->table_size, + (100.0 * chunk_counter) / m->nr_chunks); + +#if HASHMAP_DEBUG_OUTPUT + /* Compute the chain lengths. */ + for (int k = 0; k < HASHMAP_MAX_CHAIN_LENGTH; k++) { + m->chain_length_counts[k] = 0; + } + hashmap_iterate(m, hashmap_count_chain_lengths, m); + message("chain lengths:"); + for (int k = 0; k < HASHMAP_MAX_CHAIN_LENGTH; k++) { + message(" chain_length_counts[%i]: %zu (%.2f%%)", k, + m->chain_length_counts[k], + (100.0 * m->chain_length_counts[k]) / m->size); + } +#endif + + /* Compute chunk fill ratios. */ + size_t chunk_fill_ratio_counts[11]; + for (int k = 0; k < 11; k++) { + chunk_fill_ratio_counts[k] = 0; + } + for (size_t k = 0; k < m->nr_chunks; k++) { + hashmap_chunk_t *chunk = m->chunks[k]; + if (!chunk) { + chunk_fill_ratio_counts[0] += 1; + } else { + int count = 0; + for (int j = 0; j < HASHMAP_MASKS_PER_CHUNK; j++) { + count += __builtin_popcountll(chunk->masks[j]); + } + chunk_fill_ratio_counts[(int)(10 * count / HASHMAP_MAX_CHUNK_FILL_RATIO / + HASHMAP_ELEMENTS_PER_CHUNK)] += 1; + } + } + message("chunk fill ratios:"); + for (int k = 0; k < 11; k++) { + message(" %3i%% - %3i%%: %zu (%.2f%%)", + (int)(k * 10 * HASHMAP_MAX_CHUNK_FILL_RATIO), + (int)((k + 1) * 10 * HASHMAP_MAX_CHUNK_FILL_RATIO), + chunk_fill_ratio_counts[k], + (100.0 * chunk_fill_ratio_counts[k]) / m->nr_chunks); + } + + /* Print struct sizes. */ + message("sizeof(hashmap_element_t): %zu", sizeof(hashmap_element_t)); + message("sizeof(hashmap_chunk_t): %zu", sizeof(hashmap_chunk_t)); + message("HASHMAP_TARGET_CHUNK_BYTES: %i", HASHMAP_TARGET_CHUNK_BYTES); + message("HASHMAP_BITS_PER_ELEMENT: %i", HASHMAP_BITS_PER_ELEMENT); + message("HASHMAP_ELEMENTS_PER_CHUNK: %i", HASHMAP_ELEMENTS_PER_CHUNK); + message("HASHMAP_MASKS_PER_CHUNK: %i", HASHMAP_MASKS_PER_CHUNK); } diff --git a/src/c_hashmap/hashmap.h b/src/c_hashmap/hashmap.h index 72b9f605ef6aeca3d3a69a04007239f2064cb9d0..7df5bb3421451d31175a1d635b7e9f497d157adc 100644 --- a/src/c_hashmap/hashmap.h +++ b/src/c_hashmap/hashmap.h @@ -1,81 +1,157 @@ /* * Generic hashmap manipulation functions * - * Originally by Elliot C Back - http://elliottback.com/wp/hashmap-implementation-in-c/ + * Originally by Elliot C Back - + * http://elliottback.com/wp/hashmap-implementation-in-c/ * - * Modified by Pete Warden to fix a serious performance problem, support strings as keys - * and removed thread synchronization - http://petewarden.typepad.com + * Modified by Pete Warden to fix a serious performance problem, support strings + * as keys and removed thread synchronization - http://petewarden.typepad.com */ #ifndef __HASHMAP_H__ #define __HASHMAP_H__ -#define MAP_MISSING -3 /* No such element */ -#define MAP_FULL -2 /* Hashmap is full */ -#define MAP_OMEM -1 /* Out of Memory */ -#define MAP_OK 0 /* OK */ +/* Some standard headers. */ +#include <stdbool.h> +#include <stddef.h> -/* - * any_t is a pointer. This allows you to put arbitrary structures in - * the hashmap. - */ -typedef void *any_t; +// Type used for chunk bitmasks. +typedef size_t hashmap_mask_t; -/* - * PFany is a pointer to a function that can take two any_t arguments - * and return an integer. Returns status code.. - */ -typedef int (*PFany)(any_t, any_t); +#define HASHMAP_BITS_PER_MASK ((int)sizeof(hashmap_mask_t) * 8) -/* - * map_t is a pointer to an internally maintained data structure. - * Clients of this package do not need to know how hashmaps are - * represented. They see and manipulate only map_t's. +// Type used for the hashmap keys (must have a valid '==' operation). +#ifndef hashmap_key_t +#define hashmap_key_t size_t +#endif + +// Type used for the hashmap values (must have a valid '==' operation). +#ifndef hashmap_value_t +#define hashmap_value_t size_t +#endif + +/* We need to keep keys and values */ +typedef struct _hashmap_element { + hashmap_key_t key; + hashmap_value_t value; +} hashmap_element_t; + +/* Make sure a chunk fits in a given size. */ +#define HASHMAP_TARGET_CHUNK_BYTES (4 * 1024) +#define HASHMAP_BITS_PER_ELEMENT ((int)sizeof(hashmap_element_t) * 8 + 1) +#define HASHMAP_ELEMENTS_PER_CHUNK \ + ((HASHMAP_TARGET_CHUNK_BYTES * 8) / HASHMAP_BITS_PER_ELEMENT) +#define HASHMAP_MASKS_PER_CHUNK \ + ((HASHMAP_ELEMENTS_PER_CHUNK + HASHMAP_BITS_PER_MASK - 1) / \ + HASHMAP_BITS_PER_MASK) + +#define HASHMAP_ALLOCS_INITIAL_SIZE (8) +#define HASHMAP_ALLOC_SIZE_FRACTION (0.1) + +#define HASHMAP_MAX_CHAIN_LENGTH (HASHMAP_ELEMENTS_PER_CHUNK / 8) +#ifndef HASHMAP_DEBUG_OUTPUT +#define HASHMAP_DEBUG_OUTPUT (0) +#endif // HASHMAP_DEBUG_OUTPUT + +/* A chunk of hashmap_element, with the corresponding bitmask. */ +typedef struct _hashmap_chunk { + union { + hashmap_mask_t masks[HASHMAP_MASKS_PER_CHUNK]; + void *next; + }; + hashmap_element_t data[HASHMAP_ELEMENTS_PER_CHUNK]; +} hashmap_chunk_t; + +/* A hashmap has some maximum size and current size, + * as well as the data to hold. */ +typedef struct _hashmap { + size_t table_size; + size_t size; + size_t nr_chunks; + hashmap_chunk_t * + *chunks; // Pointer to chunks in use, but not densely populated. + hashmap_chunk_t + *graveyard; // Pointer to allocated, but currently unused chunks. + + void **allocs; // Pointers to allocated blocks of chunks. + size_t allocs_size; // Size of the allocs array. + size_t allocs_count; // Number of elements in the allocs array. + +#if HASHMAP_DEBUG_OUTPUT + /* Chain lengths, used for debugging only. */ + size_t chain_length_counts[HASHMAP_MAX_CHAIN_LENGTH]; +#endif +} hashmap_t; + +/** + * Pointer to a function that can take a key, a pointer to a value, and a + * void pointer extra data payload. */ -typedef any_t map_t; +typedef void (*hashmap_mapper_t)(hashmap_key_t, hashmap_value_t *, void *); -/* - * Return an empty hashmap. Returns NULL if empty. -*/ -extern map_t hashmap_new(void); +/** + * @brief Initialize a hashmap. + */ +void hashmap_init(hashmap_t *m); -/* - * Iteratively call f with argument (item, data) for - * each element data in the hashmap. The function must - * return a map status code. If it returns anything other - * than MAP_OK the traversal is terminated. f must - * not reenter any hashmap functions, or deadlock may arise. +/** + * @brief Add a key/value pair to the hashmap, overwriting whatever was + * previously there. */ -extern int hashmap_iterate(map_t in, PFany f, any_t item); +extern void hashmap_put(hashmap_t *m, hashmap_key_t key, hashmap_value_t value); -/* - * Add an element to the hashmap. Return MAP_OK or MAP_OMEM. +/** + * @brief Get the value for a given key. If no value exists a new one will be + * created. + * + * Note that the returned pointer is volatile and will be invalidated if the + * hashmap is re-hashed! */ -extern int hashmap_put(map_t in, char* key, any_t value); +extern hashmap_value_t *hashmap_get(hashmap_t *m, hashmap_key_t key); -/* - * Get an element from the hashmap. Return MAP_OK or MAP_MISSING. +/** + * @brief Get the value for a given key. If no value exists a new one will be + * created. Return a flag indicating whether a new element has been added. + * + * Note that the returned pointer is volatile and will be invalidated if the + * hashmap is re-hashed! */ -extern int hashmap_get(map_t in, char* key, any_t *arg); +extern hashmap_value_t *hashmap_get_new(hashmap_t *m, hashmap_key_t key, int *created_new_element); -/* - * Remove an element from the hashmap. Return MAP_OK or MAP_MISSING. +/** + * @brief Look for the given key and return a pointer to its value or NULL if + * it is not in the hashmap. + * + * Note that the returned pointer is volatile and will be invalidated if the + * hashmap is re-hashed! */ -extern int hashmap_remove(map_t in, char* key); +extern hashmap_value_t *hashmap_lookup(hashmap_t *m, hashmap_key_t key); -/* - * Get any element. Return MAP_OK or MAP_MISSING. - * remove - should the element be removed from the hashmap +/** + * @brief Iterate the function parameter over each element in the hashmap. + * + * The function `f` takes three arguments, the first and second are the element + * key and a pointer to the correspondig value, respectively, while the third + * is the `void *data` argument. */ -extern int hashmap_get_one(map_t in, any_t *arg, int remove); +extern void hashmap_iterate(hashmap_t *m, hashmap_mapper_t f, void *data); -/* - * Free the hashmap +/** + * @brief De-allocate memory associated with this hashmap, clears all the + * entries. + * + * After a call to `hashmap_free`, the hashmap cna be re-initialized with + * `hashmap_init`. */ -extern void hashmap_free(map_t in); +extern void hashmap_free(hashmap_t *m); -/* +/** * Get the current size of a hashmap */ -extern int hashmap_length(map_t in); +extern size_t hashmap_size(hashmap_t *m); + +/** + * @brief Print all sorts of stats on the given hashmap. + */ +void hashmap_print_stats(hashmap_t *m); #endif /*__HASHMAP_H__*/ diff --git a/src/c_hashmap/main.c b/src/c_hashmap/main.c index d8022948b5989e2035d10b3711337cbfb2046f29..348f483a0959548ad1f6e8cf4793ddd7b74bf804 100644 --- a/src/c_hashmap/main.c +++ b/src/c_hashmap/main.c @@ -10,7 +10,7 @@ #define KEY_MAX_LENGTH (256) #define KEY_PREFIX ("somekey") -#define KEY_COUNT (1024*1024) +#define KEY_COUNT (26 * 1024 * 1024) typedef struct data_struct_s { diff --git a/src/engine.c b/src/engine.c index 1fb269bd3ade8c86ba4d73b8e9df459d0c20d9e5..22648ea4a6bf0c8801c82805d94c599993811074 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2203,8 +2203,10 @@ void engine_rebuild(struct engine *e, int repartitioned, } #endif + fof_search_tree(e->s); + /* Re-build the tasks. */ - engine_maketasks(e); + //engine_maketasks(e); /* Make the list of top-level cells that have tasks */ space_list_useful_top_level_cells(e->s); @@ -2851,20 +2853,20 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, /* No time integration. We just want the density and ghosts */ engine_skip_force_and_kick(e); - struct scheduler *sched = &e->sched; - struct task *tasks = sched->tasks; + //struct scheduler *sched = &e->sched; + //struct task *tasks = sched->tasks; /* Activate the send and receive tasks for the gparts. */ - for (int i = 0; i < sched->nr_tasks; i++) { + //for (int i = 0; i < sched->nr_tasks; i++) { - struct task *t = &tasks[i]; + // struct task *t = &tasks[i]; - t->skip = 1; + // t->skip = 1; - if (t->type == task_type_fof_self || t->type == task_type_fof_pair) { - t->skip = 0; - } - } + // if (t->type == task_type_fof_self || t->type == task_type_fof_pair) { + // t->skip = 0; + // } + //} /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -2890,7 +2892,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, /* Now, launch the calculation */ TIMER_TIC; - engine_launch(e); + //engine_launch(e); TIMER_TOC(timer_runners); /* Apply some conversions (e.g. internal energy -> entropy) */ @@ -2954,7 +2956,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, /* Run the 0th time-step */ TIMER_TIC2; - engine_launch(e); + //engine_launch(e); TIMER_TOC2(timer_runners); #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -3223,7 +3225,7 @@ void engine_step(struct engine *e) { /* Start all the tasks. */ TIMER_TIC; - engine_launch(e); + //engine_launch(e); TIMER_TOC(timer_runners); #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -3361,7 +3363,7 @@ void engine_check_for_dumps(struct engine *e) { /* Perform a FOF search. */ if (e->run_fof) { - fof_search_tree(e->s); + //fof_search_tree(e->s); e->run_fof = 0; } diff --git a/src/engine.h b/src/engine.h index c3f8694ab42a1428c3765b52ad155a6efe97b1ce..36d6f91b9c050cedfa33474022fc8dce7b48c566 100644 --- a/src/engine.h +++ b/src/engine.h @@ -466,6 +466,7 @@ void engine_pin(void); void engine_unpin(void); void engine_clean(struct engine *e); int engine_estimate_nr_tasks(const struct engine *e); +void engine_print_task_counts(const struct engine *e); /* Function prototypes, engine_maketasks.c. */ void engine_maketasks(struct engine *e); diff --git a/src/fof.c b/src/fof.c index eef1acc2e2925b356fd6dc601809eb9382846723..b67b47b1c7bd45878e385d3b0e5a0469fea98c55 100644 --- a/src/fof.c +++ b/src/fof.c @@ -38,7 +38,7 @@ #include "engine.h" #include "proxy.h" #include "threadpool.h" -#include "c_hashmap/hashmap.c" +#include "c_hashmap/hashmap.h" #ifdef WITH_MPI MPI_Datatype fof_mpi_type; @@ -46,14 +46,8 @@ MPI_Datatype group_length_mpi_type; #endif size_t node_offset; -#define UNION_BY_SIZE_OVER_MPI 1 -#define KEY_MAX_LENGTH (256) - -/* Hash map element. */ -typedef struct data_struct_s { - char key_string[KEY_MAX_LENGTH]; - size_t root; -} data_struct_t; +#define UNION_BY_SIZE_OVER_MPI (1) +#define FOF_COMPRESS_PATHS_MIN_LENGTH (2) /* Initialises parameters for the FOF search. */ void fof_init(struct space *s) { @@ -136,9 +130,9 @@ void fof_init(struct space *s) { for (size_t i = 0; i < nr_local_gparts; i++) { s->fof_data.group_index[i] = i; s->gparts[i].group_id = default_id; + s->fof_data.group_size[i] = 1; } - bzero(s->fof_data.group_size, nr_local_gparts * sizeof(size_t)); bzero(s->fof_data.group_mass, nr_local_gparts * sizeof(double)); bzero(s->fof_data.group_CoM, nr_local_gparts * sizeof(struct fof_CoM)); @@ -217,21 +211,18 @@ __attribute__((always_inline)) INLINE static size_t fof_find( const size_t i, size_t *group_index) { size_t root = i; + size_t tree_depth = 0; while (root != group_index[root]) { #ifdef PATH_HALVING atomic_cas(&group_index[root], group_index[root], group_index[group_index[root]]); #endif root = group_index[root]; + tree_depth++; } - /* Perform path compression. */ - // int index = i; - // while(index != root) { - // int next = group_index[index]; - // group_index[index] = root; - // index = next; - //} + /* Only perform path compression on trees with a depth of FOF_COMPRESS_PATHS_MIN_LENGTH or higher. */ + if(tree_depth >= FOF_COMPRESS_PATHS_MIN_LENGTH) atomic_cas(&group_index[i], group_index[i], root); return root; } @@ -384,46 +375,34 @@ __attribute__((always_inline)) INLINE static int is_local( } #ifdef WITH_MPI -/* Add a group to the hash map. */ +/* Add a group to the hash table. */ __attribute__((always_inline)) INLINE static void hashmap_add_group( - const size_t group_id, const size_t group_offset, const map_t *mymap) { + const size_t group_id, const size_t group_offset, hashmap_t *map) { - data_struct_t* value; - char key_string[KEY_MAX_LENGTH]; + int created_new_element = 0; + hashmap_value_t *value = hashmap_get_new(map, group_id, &created_new_element); - snprintf(key_string, KEY_MAX_LENGTH, "%zu", group_id); + if(value != NULL) { - /* Try and retrieve the group from the hash map. */ - int error = hashmap_get(*mymap, key_string, (void**)(&value)); - - /* Add the group to the hash map if it is not already an element. */ - if(error == MAP_MISSING) { - value = malloc(sizeof(data_struct_t)); - snprintf(value->key_string, KEY_MAX_LENGTH, "%zu", group_id); - value->root = group_offset; - error = hashmap_put(*mymap, value->key_string, value); - if(error != MAP_OK) error("Issue with hash table. Error code: %d", error); + /* If the element is a new entry set its value. */ + if(created_new_element) { + *value = group_offset; + } } - else if (error != MAP_OK) error("Issue with hash table. Error code: %d", error); -} + else error("Couldn't find key (%zu) or create new one.", group_id); -/* Find a group in the hash map. */ -__attribute__((always_inline)) INLINE static int hashmap_find_group_offset( - const size_t group_id, const map_t *mymap) { + } - data_struct_t* value; - char key_string[KEY_MAX_LENGTH]; +/* Find a group in the hash table. */ +__attribute__((always_inline)) INLINE static size_t hashmap_find_group_offset( + const size_t group_id, hashmap_t *map) { - snprintf(key_string, KEY_MAX_LENGTH, "%zu", group_id); + hashmap_value_t *value = hashmap_get(map, group_id); - /* Try and retrieve the group from the hash map. */ - int error = hashmap_get(*mymap, key_string, (void**)(&value)); - if(error == MAP_MISSING) { - error("Group %zu should already be in the hash table.", group_id); - } - else if (error != MAP_OK) error("Issue with hash table. Error code: %d", error); + if(value == NULL) + error("Couldn't find key (%zu) or create new one.", group_id); - return value->root; + return (size_t)(*value); } #endif @@ -823,54 +802,75 @@ void fof_search_pair_cells_foreign(struct space *s, struct cell *ci, * @param num_elements Chunk size. * @param extra_data Pointer to a #space. */ -void fof_search_tree_mapper(void *map_data, int num_elements, +void fof_calc_group_props_mapper(void *map_data, int num_elements, void *extra_data) { /* Retrieve mapped data. */ struct space *s = (struct space *)extra_data; - int *local_cells = (int *)map_data; + struct gpart *gparts = (struct gpart *)map_data; + size_t *group_index = s->fof_data.group_index; + size_t *group_size = s->fof_data.group_size; - const size_t nr_local_cells = s->nr_local_cells; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const double search_r2 = s->l_x2; + /* Offset into gparts array. */ + ptrdiff_t gparts_offset = (ptrdiff_t)(gparts - s->gparts); + + size_t *const group_index_offset = group_index + gparts_offset; - /* Make a list of cell offsets into the local top-level cell array. */ - int *const offset = - s->cell_index + (ptrdiff_t)(local_cells - s->local_cells_top); + /* Create hash table. */ + hashmap_t map; + hashmap_init(&map); - /* Loop over cells and find which cells are in range of each other to perform + /* Loop over particles and find which cells are in range of each other to perform * the FOF search. */ for (int ind = 0; ind < num_elements; ind++) { - /* Get the cell. */ - struct cell *restrict ci = &s->cells_top[local_cells[ind]]; + hashmap_key_t root = (hashmap_key_t)fof_find(group_index_offset[ind], group_index); + const size_t gpart_index = gparts_offset + ind; + + /* Only add particles which aren't the root of a group. Stops groups of size 1 being added to the hash table. */ + if(root != gpart_index) { + hashmap_value_t *size = hashmap_get(&map, root); - /* Only perform FOF search on local cells. */ - if (ci->nodeID == engine_rank) { + if(size != NULL) (*size)++; + else error("Couldn't find key (%zu) or create new one.", root); + } - /* Skip empty cells. */ - if (ci->grav.count == 0) continue; + } + + if (map.size > 0) { + /* Iterate over the chunks and find elements in use. */ + for (size_t cid = 0; cid < map.table_size / HASHMAP_ELEMENTS_PER_CHUNK; cid++) { - /* Perform FOF search on local particles within the cell. */ - rec_fof_search_self(ci, s, dim, search_r2); + hashmap_chunk_t *chunk = map.chunks[cid]; - /* Loop over all top-level cells skipping over the cells already searched. - */ - for (size_t cjd = offset[ind] + 1; cjd < nr_local_cells; cjd++) { + /* Skip empty chunks. */ + if (chunk == NULL) continue; - struct cell *restrict cj = &s->cells_top[s->local_cells_top[cjd]]; + /* Loop over the masks in this chunk. */ + for (int mid = 0; mid < HASHMAP_MASKS_PER_CHUNK; mid++) { - /* Only perform FOF search on local cells. */ - if (cj->nodeID == engine_rank) { + hashmap_mask_t mask = chunk->masks[mid]; - /* Skip empty cells. */ - if (cj->grav.count == 0) continue; + /* Skip empty masks. */ + if (mask == 0) continue; - rec_fof_search_pair(ci, cj, s, dim, search_r2); + /* Loop over the mask entries. */ + for (int eid = 0; eid < HASHMAP_BITS_PER_MASK; eid++) { + hashmap_mask_t element_mask = ((hashmap_mask_t)1) << eid; + + if (mask & element_mask) { + hashmap_element_t *element = + &chunk->data[mid * HASHMAP_BITS_PER_MASK + eid]; + + atomic_add(&group_size[element->key], element->value); + + } } } } } + hashmap_free(&map); + } #ifdef WITH_MPI @@ -1289,7 +1289,9 @@ size_t fof_search_foreign_cells(struct space *s, size_t **local_roots) { bzero(global_group_CoM, global_group_list_size * sizeof(struct fof_CoM)); /* Create hash table. */ - map_t mymap = hashmap_new(); + hashmap_t map; + hashmap_init(&map); + /* Store each group ID and its properties. */ for (int i = 0; i < global_group_link_count; i++) { @@ -1302,17 +1304,16 @@ size_t fof_search_foreign_cells(struct space *s, size_t **local_roots) { global_group_CoM[group_count].y += global_group_links[i].group_i_CoM.y; global_group_CoM[group_count].z += global_group_links[i].group_i_CoM.z; global_group_id[group_count] = group_i; - hashmap_add_group(group_i, group_count++, &mymap); - + hashmap_add_group(group_i, group_count++, &map); + global_group_size[group_count] += global_group_links[i].group_j_size; global_group_mass[group_count] += global_group_links[i].group_j_mass; global_group_CoM[group_count].x += global_group_links[i].group_j_CoM.x; global_group_CoM[group_count].y += global_group_links[i].group_j_CoM.y; global_group_CoM[group_count].z += global_group_links[i].group_j_CoM.z; global_group_id[group_count] = group_j; - hashmap_add_group(group_j, group_count++, &mymap); - - } + hashmap_add_group(group_j, group_count++, &map); + } message("Global list compression took: %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1341,8 +1342,8 @@ size_t fof_search_foreign_cells(struct space *s, size_t **local_roots) { for (int i = 0; i < global_group_link_count; i++) { /* Use the hash table to find the group offsets in the index array. */ - int find_i = hashmap_find_group_offset(global_group_links[i].group_i, &mymap); - int find_j = hashmap_find_group_offset(global_group_links[i].group_j, &mymap); + size_t find_i = hashmap_find_group_offset(global_group_links[i].group_i, &map); + size_t find_j = hashmap_find_group_offset(global_group_links[i].group_j, &map); /* Use the offset to find the group's root. */ size_t root_i = fof_find(find_i, global_group_index); @@ -1375,6 +1376,8 @@ size_t fof_search_foreign_cells(struct space *s, size_t **local_roots) { #endif } + hashmap_free(&map); + message("global_group_index construction took: %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1490,6 +1493,8 @@ void fof_search_tree(struct space *s) { message("Searching %zu gravity particles for links with l_x2: %lf", nr_gparts, s->l_x2); + + message("Size of hash table element: %ld", sizeof(hashmap_element_t)); node_offset = 0; @@ -1516,86 +1521,45 @@ void fof_search_tree(struct space *s) { ticks tic = getticks(); - /* Perform local FOF using the threadpool. */ - // threadpool_map(&s->e->threadpool, fof_search_tree_mapper, - // s->local_cells_top, - // s->nr_local_cells, sizeof(int), 1, s); - - message("Local FOF took: %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); - - struct fof_CoM *group_bc = NULL; - int *com_set = NULL; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - - /* Allocate and initialise a group boundary condition array. */ - if (posix_memalign((void **)&group_bc, 32, - nr_gparts * sizeof(struct fof_CoM)) != 0) - error("Failed to allocate list of group CoM for FOF search."); - - if (posix_memalign((void **)&com_set, 32, nr_gparts * sizeof(int)) != 0) - error("Failed to allocate list of whether the CoM has been initialised."); - - bzero(group_bc, nr_gparts * sizeof(struct fof_CoM)); - bzero(com_set, nr_gparts * sizeof(int)); - - /* Calculate the total number of particles in each group, group mass, group ID - * and group CoM. */ - for (size_t i = 0; i < nr_gparts; i++) { - size_t root = fof_find(i, group_index); - - group_size[root]++; - group_mass[root] += gparts[i].mass; + engine_maketasks(s->e); + //engine_marktasks(s->e); + + struct scheduler *sched = &s->e->sched; + struct task *tasks = sched->tasks; - double x = gparts[i].x[0]; - double y = gparts[i].x[1]; - double z = gparts[i].x[2]; + /* Activate the self and pair FOF tasks. */ + for (int i = 0; i < sched->nr_tasks; i++) { - /* Use the CoM location set by the first particle added to the group. */ - if (com_set[root]) { + struct task *t = &tasks[i]; - /* Periodically wrap particle positions if they are located on the other - * side of the domain than the CoM location. */ - if (group_bc[root].x > 0.5 * dim[0] && x < 0.5 * dim[0]) - x += dim[0]; - else if (group_bc[root].x == 0.0 && x > 0.5 * dim[0]) - x -= dim[0]; + if (t->type == task_type_fof_self || t->type == task_type_fof_pair) { + scheduler_activate(sched, t); + } - if (group_bc[root].y > 0.5 * dim[1] && y < 0.5 * dim[1]) - y += dim[1]; - else if (group_bc[root].y == 0.0 && y > 0.5 * dim[1]) - y -= dim[1]; + } - if (group_bc[root].z > 0.5 * dim[2] && z < 0.5 * dim[2]) - z += dim[2]; - else if (group_bc[root].z == 0.0 && z > 0.5 * dim[2]) - z -= dim[2]; + engine_print_task_counts(s->e); - } else { + message("Making FOF tasks took: %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); - com_set[root] = 1; + tic = getticks(); - /* Use the first particle to set the CoM location in the domain. */ - if (x > 0.5 * dim[0]) - group_bc[root].x = dim[0]; - else - group_bc[root].x = 0.0; + engine_launch(s->e); - if (y > 0.5 * dim[1]) - group_bc[root].y = dim[1]; - else - group_bc[root].y = 0.0; + message("Local FOF took: %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); - if (z > 0.5 * dim[2]) - group_bc[root].z = dim[2]; - else - group_bc[root].z = 0.0; - } + ticks tic_calc_group_size = getticks(); - group_CoM[root].x += gparts[i].mass * x; - group_CoM[root].y += gparts[i].mass * y; - group_CoM[root].z += gparts[i].mass * z; - } + threadpool_map(&s->e->threadpool, fof_calc_group_props_mapper, + gparts, nr_gparts, sizeof(struct gpart), nr_gparts / s->e->nr_threads, s); + + message("FOF calc group size took (scaling): %.3f %s.", + clocks_from_ticks(getticks() - tic_calc_group_size), clocks_getunit()); + + message("FOF search took (scaling): %.3f %s.", + clocks_from_ticks(getticks() - tic_total), clocks_getunit()); free(group_bc); free(com_set); diff --git a/src/space.c b/src/space.c index 09e064b9866be99eb66d3e8a8aaf09ae3dfd8667..19148a73976efcc0f10d6bb8e2800a8fe92f2ef9 100644 --- a/src/space.c +++ b/src/space.c @@ -490,14 +490,6 @@ void space_regrid(struct space *s, int verbose) { error("Failed to allocate indices of local top-level cells with tasks."); bzero(s->local_cells_with_tasks_top, s->nr_cells * sizeof(int)); - /* Allocate and initialise array of cell indices. */ - if (posix_memalign((void **)&s->cell_index, 32, - s->nr_cells * sizeof(int)) != 0) - error("Failed to allocate list of cells for FOF search."); - - /* Set cell index into list of top-level cells. */ - for (int i = 0; i < s->nr_cells; i++) s->cell_index[i] = i; - /* Allocate the indices of cells with particles */ if (posix_memalign((void **)&s->cells_with_particles_top, SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) diff --git a/src/space.h b/src/space.h index f4d2196de74b6bf90712aa1dfbbaca9d1adea731..3802ab9e8bfa3edd3368f1aa1aad25b6b10196b6 100644 --- a/src/space.h +++ b/src/space.h @@ -241,9 +241,6 @@ struct space { /*! The FOF group data. */ struct fof fof_data; - /*! List of cell indices. */ - int *cell_index; - /*! The group information returned by VELOCIraptor for each #gpart. */ struct velociraptor_gpart_data *gpart_group_data; diff --git a/tests/Makefile.am b/tests/Makefile.am index f18b6c44c63f6e394bc2b47616f86da5dbcd54e2..db4e9dbe3de6956e36aebeb2fd738e4492b493ac 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -41,7 +41,7 @@ check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \ testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \ testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \ - test27cellsStars_subset testCooling + test27cellsStars_subset testCooling testHashmap # Rebuild tests when SWIFT is updated. $(check_PROGRAMS): ../src/.libs/libswiftsim.a @@ -132,6 +132,8 @@ testUtilities_SOURCES = testUtilities.c testCooling_SOURCES = testCooling.c +testHashmap_SOURCES = testHashmap.c + # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testActivePair.sh \ test27cells.sh test27cellsPerturbed.sh testParser.sh testPeriodicBC.sh \ diff --git a/tests/testHashmap.c b/tests/testHashmap.c new file mode 100644 index 0000000000000000000000000000000000000000..4a2663e4c9d2b870bbbd8b20cc203cc35afb8843 --- /dev/null +++ b/tests/testHashmap.c @@ -0,0 +1,62 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2019 James Willis (james.s.willis@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "swift.h" + +#include "../src/c_hashmap/hashmap.h" + +#define NUM_KEYS (26 * 1000 * 1000) + +int main(int argc, char *argv[]) { + + hashmap_t m; + + message("Initialising hash table..."); + hashmap_init(&m); + + message("Populating hash table..."); + for(hashmap_value_t key=0; key<NUM_KEYS; key++) { + hashmap_put(&m, key, key); + } + + message("Dumping hashmap stats."); + hashmap_print_stats(&m); + + message("Retrieving elements from the hash table..."); + for(hashmap_value_t key=0; key<NUM_KEYS; key++) { + hashmap_value_t value = *hashmap_lookup(&m, key); + + if(value != key) error("Incorrect value (%zu) found for key: %zu", value, key); + //else message("Retrieved element, Key: %zu Value: %zu", key, value); + } + + message("Checking for invalid key..."); + if(hashmap_lookup(&m, NUM_KEYS + 1) != NULL) error("Key: %d shouldn't exist or be created.", NUM_KEYS + 1); + + message("Checking hash table size..."); + if(m.size != NUM_KEYS) error("The no. of elements stored in the hash table are not equal to the no. of keys. No. of elements: %zu, no. of keys: %d", m.size, NUM_KEYS); + + message("Freeing hash table..."); + hashmap_free(&m); + +} diff --git a/tools/task_plots/plot_tasks.py b/tools/task_plots/plot_tasks.py index 51ad46d0c054aada6c050732470d5bc8f1fcd453..d50a19e6a3110f28121a5e7b2129e0f7fd69cc94 100755 --- a/tools/task_plots/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -256,8 +256,6 @@ colours = [ "sienna", "aquamarine", "bisque", - "blue", - "green", "lightgreen", "brown", "purple", @@ -283,6 +281,8 @@ colours = [ "firebrick", "magenta", "hotpink", + "blue", + "green", "pink", "orange", "lightgreen", @@ -442,11 +442,13 @@ for rank in ranks: toc = int(data[line, toccol]) / CPU_CLOCK tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc - if ( - "self" in tasktype - or "pair" in tasktype - or "recv" in tasktype - or "send" in tasktype + if ("fof" in tasktype): + tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype] + elif( + "self" in tasktype + or "pair" in tasktype + or "recv" in tasktype + or "send" in tasktype ): fulltype = tasktype + "/" + subtype if fulltype in SUBCOLOURS: