diff --git a/doc/RTD/source/AnalysisTools/index.rst b/doc/RTD/source/AnalysisTools/index.rst index 7ba9f6256d841c7bb3bec42abab6a5c22ba4083e..f7f2f979666270ce371b532b6eab7bad3a23c9bd 100644 --- a/doc/RTD/source/AnalysisTools/index.rst +++ b/doc/RTD/source/AnalysisTools/index.rst @@ -58,24 +58,13 @@ can be made by calling the ``memuse_log_allocation()`` function directly. The output files are called ``memuse_report-step<n>.dat`` or ``memuse_report-rank<m>-step<n>.dat`` if running using MPI. These have a line -for each allocation or free that records the time, memory address, step, -whether an allocation or free, the label and when an allocation, the amount of -memory. The comments in this file also record the actual memory use of the -process (including threads) as reported by the operating system at the end of -the step. - -To post process these files into a memory used timeline and get a report of -the peak memory use, as well as the memory still in use at the end of the step -there is an basic analysis script ``analyse_memuse_logs.py`` and two wrappers -that process a directory of logs, these are ``./process_memuse_logs.sh`` and -``./process_memuse_logs_MPI.sh`` for non-MPI and MPI runs respectively. - -Note that the process scripts process each step individually and also process -all the logs as a single sequence. When interpreting these some care should be -taken as they are not all the memory allocated, just important allocations in -SWIFT and when looking at a single step the context of any previous steps is -not used, so you only see allocations made in that step and the effect of any -matching frees (so allocations made in previous steps that are freed in this -step will not be understood and will be ignored, you need the global analysis -to understand that). +for each allocation or free that records the time, step, whether an allocation +or free, the label, the amount of memory allocated or freed and the total of +all (labelled) memory in use at that time. + +Comments at the end of this file also record the actual memory use of the +process (including threads), as reported by the operating system at the end of +the step, and the total memory still in use per label. Note this includes +memory still active from previous steps and the total memory is also continued +from the previous dump. diff --git a/src/cache.h b/src/cache.h index e5a62f33b3eb492f9da6e0e98ed767d6b8de32dd..e2e5185d5ac8e12761037eee1a79a26f827ad843 100644 --- a/src/cache.h +++ b/src/cache.h @@ -346,7 +346,7 @@ __attribute__((always_inline)) INLINE int cache_read_particles_subset_self( */ __attribute__((always_inline)) INLINE void cache_read_particles_subset_pair( const struct cell *restrict const ci, struct cache *restrict const ci_cache, - const struct entry *restrict sort_i, int *first_pi, int *last_pi, + const struct sort_entry *restrict sort_i, int *first_pi, int *last_pi, const double *loc, const int flipped) { #if defined(GADGET2_SPH) @@ -608,9 +608,10 @@ __attribute__((always_inline)) INLINE int cache_read_force_particles( __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( const struct cell *restrict const ci, const struct cell *restrict const cj, struct cache *restrict const ci_cache, - struct cache *restrict const cj_cache, const struct entry *restrict sort_i, - const struct entry *restrict sort_j, const double *restrict const shift, - int *first_pi, int *last_pj) { + struct cache *restrict const cj_cache, + const struct sort_entry *restrict sort_i, + const struct sort_entry *restrict sort_j, + const double *restrict const shift, int *first_pi, int *last_pj) { /* Make the number of particles to be read a multiple of the vector size. * This eliminates serial remainder loops where possible when populating the @@ -860,8 +861,9 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted_force( const struct cell *const ci, const struct cell *const cj, struct cache *const ci_cache, struct cache *const cj_cache, - const struct entry *restrict sort_i, const struct entry *restrict sort_j, - const double *const shift, int *first_pi, int *last_pj) { + const struct sort_entry *restrict sort_i, + const struct sort_entry *restrict sort_j, const double *const shift, + int *first_pi, int *last_pj) { /* Make the number of particles to be read a multiple of the vector size. * This eliminates serial remainder loops where possible when populating the diff --git a/src/cell.h b/src/cell.h index 4f8bed5fa5e337842bf37a0b382e3915a806a474..7500522f1737cacd43aa2cf4d0cb52eb53bec68b 100644 --- a/src/cell.h +++ b/src/cell.h @@ -332,7 +332,7 @@ struct cell { struct xpart *xparts; /*! Pointer for the sorted indices. */ - struct entry *sort[13]; + struct sort_entry *sort[13]; /*! Super cell, i.e. the highest-level parent cell that has a hydro * pair/self tasks */ @@ -600,7 +600,7 @@ struct cell { float dx_max_sort_old; /*! Pointer for the sorted indices. */ - struct entry *sort[13]; + struct sort_entry *sort[13]; /*! Bit mask of sort directions that will be needed in the next timestep. */ uint16_t requires_sorts; @@ -1261,8 +1261,8 @@ __attribute__((always_inline)) INLINE static void cell_malloc_hydro_sorts( * on the same dimensions), so we need separate allocations per dimension. */ for (int j = 0; j < 13; j++) { if ((flags & (1 << j)) && c->hydro.sort[j] == NULL) { - if ((c->hydro.sort[j] = (struct entry *)swift_malloc( - "hydro.sort", sizeof(struct entry) * (count + 1))) == NULL) + if ((c->hydro.sort[j] = (struct sort_entry *)swift_malloc( + "hydro.sort", sizeof(struct sort_entry) * (count + 1))) == NULL) error("Failed to allocate sort memory."); } } @@ -1299,8 +1299,8 @@ __attribute__((always_inline)) INLINE static void cell_malloc_stars_sorts( * on the same dimensions), so we need separate allocations per dimension. */ for (int j = 0; j < 13; j++) { if ((flags & (1 << j)) && c->stars.sort[j] == NULL) { - if ((c->stars.sort[j] = (struct entry *)swift_malloc( - "stars.sort", sizeof(struct entry) * (count + 1))) == NULL) + if ((c->stars.sort[j] = (struct sort_entry *)swift_malloc( + "stars.sort", sizeof(struct sort_entry) * (count + 1))) == NULL) error("Failed to allocate sort memory."); } } diff --git a/src/memuse.c b/src/memuse.c index 9b29b377554e724337f19caa35baa5f0dd2f8f47..7a58f71b189f885d76a986398c490cfcf81aebb5 100644 --- a/src/memuse.c +++ b/src/memuse.c @@ -42,17 +42,22 @@ #ifdef SWIFT_MEMUSE_REPORTS +/* The initial size and increment of the log entries buffer. */ +#define MEMUSE_INITLOG 1000000 + +/* A megabyte for conversions. */ +#define MEGABYTE 1048576.0 + +/* Maximum length of label in log entry. */ +#define MEMUSE_MAXLABLEN 32 + /* Also recorded in logger. */ extern int engine_rank; extern int engine_current_step; /* Entry for logger of memory allocations and deallocations in a step. */ -#define MEMUSE_MAXLAB 64 struct memuse_log_entry { - /* Rank in action. */ - int rank; - /* Step of action. */ int step; @@ -62,23 +67,291 @@ struct memuse_log_entry { /* Memory allocated in bytes. */ size_t size; - /* Address of memory. */ - void *ptr; + /* Address of memory. Use union as easy way to convert into an array of + * bytes. */ + union { + void *ptr; + uint8_t vptr[sizeof(uintptr_t)]; + }; /* Relative time of this action. */ ticks dtic; /* Label associated with the memory. */ - char label[MEMUSE_MAXLAB + 1]; + char label[MEMUSE_MAXLABLEN + 1]; + + /* Whether log is still active, i.e. not matched with a free or allocation. */ + int active; }; -/* The log of allocations and frees. */ +/* The log of allocations and frees. All volatile as accessed from threads + * that use the value to synchronise. */ static struct memuse_log_entry *volatile memuse_log = NULL; static volatile size_t memuse_log_size = 0; static volatile size_t memuse_log_count = 0; +static volatile size_t memuse_old_count = 0; static volatile size_t memuse_log_done = 0; -#define MEMUSE_INITLOG 1000000 +/* Current sum of memory in use. Only used in dumping. */ +static size_t memuse_current = 0; + +/* Label usage gathering struct. Only used in dumping. */ +struct memuse_labelled_item { + size_t sum; + size_t count; +}; + +/* A radix node, this has a single byte key and a pointer to some related + * resource. It also holds a sorted list of children, if any. */ +struct memuse_rnode { + + /* Byte key of this node. */ + uint8_t keypart; + + /* Value of this node, if set. */ + void *ptr; + + /* Sorted pointers to children of this node. */ + struct memuse_rnode **children; + unsigned int count; +}; + +/* Persistent radix trie root node. Holds active logs between dumps. */ +static struct memuse_rnode *memuse_rnode_root; +static int memuse_rnode_root_init = 1; + +#ifdef MEMUSE_RNODE_DUMP +/** + * @brief Dump a representation of the radix tree rooted at a node to stdout. + * + * @param depth the depth of the node in the tree, root is 0. + * @param node the node at which to start dumping. + * @param full if not zero then nodes that are not storing a value + * are also reported. + */ +static void memuse_rnode_dump(int depth, struct memuse_rnode *node, int full) { + + /* Value of the full key, to this depth. Assumes full key is a pointer, + * so uncomment when using strings. */ + static union { + //uint8_t key[MEMUSE_MAXLABLEN]; + //char ptr[MEMUSE_MAXLABLEN]; + uint8_t key[sizeof(uintptr_t)]; + void *ptr; + } keyparts = {0}; + + /* Record keypart at this depth. Root has no keypart. */ + if (depth != 0) keyparts.key[depth - 1] = node->keypart; + + //if (node->ptr != NULL || full) { + // keyparts.key[depth] = '\0'; + // + // /* Gather children's keys if full. */ + // char fullkey[MEMUSE_MAXLABLEN]; + // if (full) { + // for (size_t k = 0; k < node->count; k++) { + // fullkey[k] = node->children[k]->keypart; + // } + // fullkey[node->count] = '\0'; + // printf("dump @ depth: %d keypart: %d key: %s value: %p fullkey: %s\n", + // depth, node->keypart, keyparts.ptr, node->ptr, fullkey); + // } else { + // printf("dump @ depth: %d keypart: %d key: %s value: %p\n", depth, + // node->keypart, keyparts.ptr, node->ptr); + // } + //} + + if (node->ptr != NULL || full) { + printf("dump @ depth: %d keypart: %d key: %p value: %p\n", depth, + node->keypart, keyparts.ptr, node->ptr); + } + + /* Recurse to all children. */ + for (size_t k = 0; k < node->count; k++) { + memuse_rnode_dump(depth + 1, node->children[k], full); + } +} +#endif + +/** + * @brief Return the position of a keypart for a list of children. + * If not found returns where it would be inserted. + * + * @param keypart the keypart to locate. + * @param children the list of sorted children. + * @param count the number of children + * + * @return the index of key or where it should be inserted. + */ +static unsigned int memuse_rnode_bsearch(uint8_t keypart, + struct memuse_rnode **children, + unsigned int count) { + + /* Search for lower bound. */ + unsigned int lower = 0; + unsigned int upper = count; + while (lower < upper) { + unsigned int middle = (upper + lower) / 2; + if (keypart > children[middle]->keypart) + lower = middle + 1; + else + upper = middle; + } + return lower; +} + +/** + * @brief Insert a child, if needed, into a list of children. Assumes + * we have sufficient room. + * + * @param child the child to insert, if needed. + * @param children the list of sorted children. + * @param count the number of children + */ +static void memuse_rnode_binsert_child(struct memuse_rnode *child, + struct memuse_rnode **children, + unsigned int *count) { + unsigned int pos = 0; + if (*count > 0) { + + /* Find the child or insertion point. */ + pos = memuse_rnode_bsearch(child->keypart, children, *count); + + /* If not found move all children to make a space, unless we're inserting + * after the end. */ + if (pos < *count && children[pos]->keypart != child->keypart) { + memmove(&children[pos + 1], &children[pos], + (*count - pos) * sizeof(struct memuse_rnode *)); + } + } + + /* Insert new child */ + children[pos] = child; + *count += 1; +} + +/** + * @brief Add a child rnode to an rnode. Making sure we have room and keeping + * the sort order. + * + * @param node the parent node. + * @param child the node to add to the parent, + */ +static void memuse_rnode_add_child(struct memuse_rnode *node, + struct memuse_rnode *child) { + + /* Extend the children list to include a new entry .*/ + void *mem = realloc(node->children, + (node->count + 1) * sizeof(struct memuse_rnode *)); + if (mem == NULL) error("Failed to reallocate rnodes\n"); + node->children = mem; + + /* Insert the new child. */ + memuse_rnode_binsert_child(child, node->children, &node->count); +} + +/** + * @brief Find a child of a node with the given key part. + * + * @param node the node to search. + * @param keypart the key part of the child. + * @return NULL if not found. + */ +static struct memuse_rnode *memuse_rnode_lookup(const struct memuse_rnode *node, + uint8_t keypart) { + + /* Locate the key, or where it would be inserted. */ + if (node->count > 0) { + unsigned int index = memuse_rnode_bsearch(keypart, node->children, + node->count); + if (index < node->count && keypart == node->children[index]->keypart) { + return node->children[index]; + } + } + return NULL; +} + +/** + * @brief insert a child into a node's children list and add a pointer, iff + * this is the destination node for the given key. + * + * @param node the parent node. + * @param depth the depth of the parent node. + * @param key the full key of the eventual leaf node. + * @param keylen the numbers of bytes in the full key. + * @param value pointer that will be stored as the value of the leaf node. + */ +static void memuse_rnode_insert_child(struct memuse_rnode *node, uint8_t depth, + uint8_t *key, uint8_t keylen, + void *value) { + + /* Check if keypart this already exists at this level and add new child if + * not. */ + uint8_t keypart = key[depth]; + struct memuse_rnode *child = memuse_rnode_lookup(node, keypart); + if (child == NULL) { + child = calloc(1, sizeof(struct memuse_rnode)); + child->keypart = keypart; + memuse_rnode_add_child(node, child); + } + + /* Are we at the lowest level yet? */ + depth++; + if (depth == keylen) { + /* Our destination node. */ + +#if SWIFT_DEBUG_CHECKS + if (child->ptr != NULL) + message("Overwriting rnode value: %p with %p", child->ptr, value); +#endif + child->ptr = value; + return; + } + + /* Down we go to the next level. */ + memuse_rnode_insert_child(child, depth, key, keylen, value); + return; +} + +/** + * @brief Find a child node for the given full key. + * + * @param node the current parent node. + * @param depth the depth of the parent node, 0 for first call. + * @param key the full key of the expected child node. + * @param keylen the number of bytes in the key. + */ +static struct memuse_rnode *memuse_rnode_find_child(struct memuse_rnode *node, + uint8_t depth, uint8_t *key, + uint8_t keylen) { + uint8_t keypart = key[depth]; + struct memuse_rnode *child = NULL; + if (node->count > 0) child = memuse_rnode_lookup(node, keypart); + if (child != NULL && (depth + 1) < keylen) { + return memuse_rnode_find_child(child, depth + 1, key, keylen); + } + return child; +} + +/** + * @brief Free all resources associated with a node. + * + * @param node the rnode. + */ +static void memuse_rnode_cleanup(struct memuse_rnode *node) { + + if (!node) return; + + for (size_t k = 0; k < node->count; k++) { + memuse_rnode_cleanup(node->children[k]); + free(node->children[k]); + } + if (node->count > 0) free(node->children); +} + +/** + * @brief reallocate the entries log if space is needed. + */ static void memuse_log_reallocate(size_t ind) { if (ind == 0) { @@ -94,7 +367,8 @@ static void memuse_log_reallocate(size_t ind) { } else { struct memuse_log_entry *new_log; if ((new_log = (struct memuse_log_entry *)malloc( - sizeof(struct memuse_log_entry) * memuse_log_size * 2)) == NULL) + sizeof(struct memuse_log_entry) * + (memuse_log_size + MEMUSE_INITLOG))) == NULL) error("Failed to re-allocate memuse log."); /* Wait for all writes to the old buffer to complete. */ @@ -107,8 +381,8 @@ static void memuse_log_reallocate(size_t ind) { free(memuse_log); memuse_log = new_log; - /* Last action. */ - memuse_log_size *= 2; + /* Last action, releases waiting threads. */ + atomic_add(&memuse_log_size, MEMUSE_INITLOG); } } @@ -123,6 +397,7 @@ static void memuse_log_reallocate(size_t ind) { */ void memuse_log_allocation(const char *label, void *ptr, int allocated, size_t size) { + size_t ind = atomic_inc(&memuse_log_count); /* If we are at the current size we need more space. */ @@ -133,14 +408,14 @@ void memuse_log_allocation(const char *label, void *ptr, int allocated, ; /* Record the log. */ - memuse_log[ind].rank = engine_rank; memuse_log[ind].step = engine_current_step; memuse_log[ind].allocated = allocated; memuse_log[ind].size = size; memuse_log[ind].ptr = ptr; - strncpy(memuse_log[ind].label, label, MEMUSE_MAXLAB); - memuse_log[ind].label[MEMUSE_MAXLAB] = '\0'; + strncpy(memuse_log[ind].label, label, MEMUSE_MAXLABLEN); + memuse_log[ind].label[MEMUSE_MAXLABLEN] = '\0'; memuse_log[ind].dtic = getticks() - clocks_start_ticks; + memuse_log[ind].active = 1; atomic_inc(&memuse_log_done); } @@ -152,30 +427,210 @@ void memuse_log_allocation(const char *label, void *ptr, int allocated, void memuse_log_dump(const char *filename) { /* Skip if nothing allocated this step. */ - if (memuse_log_count == 0) return; + if (memuse_log_count == memuse_old_count) return; + + //ticks tic = getticks(); + + /* Create the radix tree. If not already done. */ + if (memuse_rnode_root_init) { + memuse_rnode_root = + (struct memuse_rnode *)calloc(1, sizeof(struct memuse_rnode)); + memuse_rnode_root_init = 0; + } + + /* Stop any new logs from being processed while we are dumping. + * Remember to not abort with error() in this section, that is recursive + * with the exit handler. */ + size_t log_count = memuse_log_count; + size_t old_count = memuse_old_count; /* Open the output file. */ FILE *fd; - if ((fd = fopen(filename, "w")) == NULL) - error("Failed to create memuse log file '%s'.", filename); + if ((fd = fopen(filename, "w")) == NULL) { + message("Failed to create memuse log file '%s', logs not dumped.", + filename); + return; + } /* Write a header. */ - fprintf(fd, "# Current use: %s\n", memuse_process(1)); - fprintf(fd, "# cpufreq: %lld\n", clocks_get_cpufreq()); - fprintf(fd, "# dtic adr rank step allocated label size\n"); + fprintf(fd, "# dtic step label size sum\n"); + + size_t memuse_maxmem = memuse_current; + for (size_t k = old_count; k < log_count; k++) { + + /* Check if this address has already been recorded. */ + struct memuse_rnode *child = memuse_rnode_find_child( + memuse_rnode_root, 0, memuse_log[k].vptr, sizeof(uintptr_t)); + + if (child != NULL && child->ptr != NULL) { + + /* Found the allocation, this should be the free. */ + if (memuse_log[k].allocated) { + + /* Allocated twice, this is an error, but we cannot abort as that will + * attempt another memory dump, so just complain. */ +#if SWIFT_DEBUG_CHECKS + message("Allocated the same address twice (%s: %zd)", + memuse_log[k].label, memuse_log[k].size); +#endif + continue; + } + + /* Free, update the size to remove the allocation. */ + struct memuse_log_entry *oldlog = (struct memuse_log_entry *)child->ptr; + memuse_log[k].size = -oldlog->size; + + /* And deactivate this key. */ + child->ptr = NULL; + + /* And mark this as matched. */ + memuse_log[k].active = 0; + oldlog->active = 0; + + } else if (child == NULL && memuse_log[k].allocated) { + + /* Not found, so new allocation which we store the log against the + * address. */ + memuse_rnode_insert_child(memuse_rnode_root, 0, memuse_log[k].vptr, + sizeof(uintptr_t), &memuse_log[k]); - for (size_t k = 0; k < memuse_log_count; k++) { - fprintf(fd, "%lld %p %d %d %d %s %zd\n", memuse_log[k].dtic, - memuse_log[k].ptr, memuse_log[k].rank, memuse_log[k].step, - memuse_log[k].allocated, memuse_log[k].label, memuse_log[k].size); + } else if (child == NULL && !memuse_log[k].allocated) { + + /* Unmatched free, OK if NULL. */ +#if SWIFT_DEBUG_CHECKS + if (memuse_log[k].ptr != NULL) { + message("Unmatched non-NULL free: %s", memuse_log[k].label); + } +#endif + continue; + } else if (memuse_log[k].allocated) { + + /* Must be previously released allocation with same address, so we + * store. */ + memuse_rnode_insert_child(memuse_rnode_root, 0, memuse_log[k].vptr, + sizeof(uintptr_t), &memuse_log[k]); + + } else { + /* Should not happen ... */ + message("weird memory log record for label '%s' skipped", + memuse_log[k].label); + continue; + } + + /* Keep maximum and rolling sum. */ + memuse_current += memuse_log[k].size; + if (memuse_current > memuse_maxmem) memuse_maxmem = memuse_current; + + /* And output. */ + fprintf(fd, "%lld %d %s %zd %zd\n", memuse_log[k].dtic, + memuse_log[k].step, memuse_log[k].label, + memuse_log[k].size, memuse_current); } - /* Clear the log. */ - memuse_log_count = 0; +#ifdef MEMUSE_RNODE_DUMP + /* Debug dump of tree. */ + //memuse_rnode_dump(0, memuse_rnode_root, 0); +#endif + + /* Now we find all the still active nodes and gather their sizes against the + * labels. */ + struct memuse_rnode *activernodes = + (struct memuse_rnode *)calloc(1, sizeof(struct memuse_rnode)); + size_t newcount = 0; + struct memuse_rnode *labellednodes = + (struct memuse_rnode *)calloc(1, sizeof(struct memuse_rnode)); + size_t *lindices = (size_t *)calloc(log_count, sizeof(size_t)); + size_t lcount = 0; + for (size_t k = 0; k < log_count; k++) { + + /* Only allocations are stored also is it active? */ + if (memuse_log[k].allocated && memuse_log[k].active) { + + /* Look for this label in our tree. */ + struct memuse_rnode *labelchild = memuse_rnode_find_child( + labellednodes, 0, (uint8_t *)memuse_log[k].label, + strlen(memuse_log[k].label)); + struct memuse_labelled_item *item = NULL; + if (labelchild == NULL || labelchild->ptr == NULL) { + + /* New, so create an instance to keep the count. */ + item = (struct memuse_labelled_item *)calloc( + 1, sizeof(struct memuse_labelled_item)); + item->sum = 0; + item->count = 0; + memuse_rnode_insert_child(labellednodes, 0, + (uint8_t *)memuse_log[k].label, + strlen(memuse_log[k].label), item); + + /* Keep for indexing next time. */ + lindices[lcount] = newcount; + lcount++; + } else { + item = (struct memuse_labelled_item *)labelchild->ptr; + } + + /* And increment sum. */ + item->sum += memuse_log[k].size; + item->count++; + + /* Keep this in new log entry tree. Move to head. */ + memcpy(&memuse_log[newcount], &memuse_log[k], + sizeof(struct memuse_log_entry)); + memuse_rnode_insert_child(activernodes, 0, memuse_log[newcount].vptr, + sizeof(uintptr_t), &memuse_log[newcount]); + newcount++; + } + } + + /* And move all active logs to a clean new tree for next time. */ + memuse_log_count = newcount; + memuse_old_count = newcount; + memuse_rnode_cleanup(memuse_rnode_root); + free(memuse_rnode_root); + memuse_rnode_root = activernodes; + + /* Now dump the labelled counts. */ + fprintf(fd, "# Memory use by label:\n"); + fprintf(fd, "## %30s %16s %16s\n", "label", "MB", "numactive"); + fprintf(fd, "##\n"); + + size_t total_mem = 0; + for (size_t k = 0; k < lcount; k++) { + size_t ind = lindices[k]; + + /* Find this entry. */ + struct memuse_rnode *labelchild = memuse_rnode_find_child( + labellednodes, 0, (uint8_t *)memuse_log[ind].label, + strlen(memuse_log[ind].label)); + struct memuse_labelled_item *item = + (struct memuse_labelled_item *)labelchild->ptr; + fprintf(fd, "## %30s %16.3f %16zd\n", memuse_log[ind].label, + item->sum / MEGABYTE, item->count); + total_mem += item->sum; + + /* Don't need this again. */ + free(item); + } + fprintf(fd, "##\n"); + fprintf(fd, "# Total memory still in use : %.3f (MB)\n", + total_mem / MEGABYTE); + fprintf(fd, "# Peak memory usage : %.3f (MB)\n", + memuse_maxmem / MEGABYTE); + fprintf(fd, "#\n"); + fprintf(fd, "# Memory use by process (all/system): %s\n", memuse_process(1)); + fprintf(fd, "# cpufreq: %lld\n", clocks_get_cpufreq()); + + /* Clean up tree. */ + memuse_rnode_cleanup(labellednodes); + free(labellednodes); + free(lindices); /* Close the file. */ fflush(fd); fclose(fd); + + //message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + // clocks_getunit()); } /** diff --git a/src/runner.c b/src/runner.c index c1ac634eb752798005d551994a9306e3075052bc..49302fe08c572ce4f372d882c6a0c2c944c14412 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1194,13 +1194,13 @@ void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { * @param sort The entries * @param N The number of entries. */ -void runner_do_sort_ascending(struct entry *sort, int N) { +void runner_do_sort_ascending(struct sort_entry *sort, int N) { struct { short int lo, hi; } qstack[10]; int qpos, i, j, lo, hi, imin; - struct entry temp; + struct sort_entry temp; float pivot; /* Sort parts in cell_i in decreasing order with quicksort */ @@ -1305,7 +1305,7 @@ RUNNER_CHECK_SORTS(stars) void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, int cleanup, int clock) { - struct entry *fingers[8]; + struct sort_entry *fingers[8]; const int count = c->hydro.count; const struct part *parts = c->hydro.parts; struct xpart *xparts = c->hydro.xparts; @@ -1417,7 +1417,7 @@ void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, } /* For each entry in the new sort list. */ - struct entry *finger = c->hydro.sort[j]; + struct sort_entry *finger = c->hydro.sort[j]; for (int ind = 0; ind < count; ind++) { /* Copy the minimum into the new sort array. */ @@ -1496,7 +1496,7 @@ void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, /* Verify the sorting. */ for (int j = 0; j < 13; j++) { if (!(flags & (1 << j))) continue; - struct entry *finger = c->hydro.sort[j]; + struct sort_entry *finger = c->hydro.sort[j]; for (int k = 1; k < count; k++) { if (finger[k].d < finger[k - 1].d) error("Sorting failed, ascending array."); @@ -1538,7 +1538,7 @@ void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, int cleanup, int clock) { - struct entry *fingers[8]; + struct sort_entry *fingers[8]; const int count = c->stars.count; struct spart *sparts = c->stars.parts; float buff[8]; @@ -1649,7 +1649,7 @@ void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, } /* For each entry in the new sort list. */ - struct entry *finger = c->stars.sort[j]; + struct sort_entry *finger = c->stars.sort[j]; for (int ind = 0; ind < count; ind++) { /* Copy the minimum into the new sort array. */ @@ -1722,7 +1722,7 @@ void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, /* Verify the sorting. */ for (int j = 0; j < 13; j++) { if (!(flags & (1 << j))) continue; - struct entry *finger = c->stars.sort[j]; + struct sort_entry *finger = c->stars.sort[j]; for (int k = 1; k < count; k++) { if (finger[k].d < finger[k - 1].d) error("Sorting failed, ascending array."); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 635d41a95d320dd99eb806c9aec61127e8c7e42d..a69489ed29c4cbf51fa25243944206efe4b00188 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -681,7 +681,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, const float H = cosmo->H; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; const float dxj = cj->hydro.dx_max_sort; /* Parts are on the left? */ @@ -984,8 +984,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->hydro.sort[sid]; - const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Some constants used to checks that the parts are in the right frame */ @@ -1231,8 +1231,8 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { #ifdef SWIFT_DEBUG_CHECKS /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->hydro.sort[sid]; - const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; /* Check that the dx_max_sort values in the cell are indeed an upper bound on particle movement. */ @@ -1309,8 +1309,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - struct entry *restrict sort_i = ci->hydro.sort[sid]; - struct entry *restrict sort_j = cj->hydro.sort[sid]; + struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Some constants used to checks that the parts are in the right frame */ @@ -1350,7 +1350,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const double shift_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]}; int count_active_i = 0, count_active_j = 0; - struct entry *restrict sort_active_i = NULL, *restrict sort_active_j = NULL; + struct sort_entry *restrict sort_active_i = NULL, + *restrict sort_active_j = NULL; if (cell_is_all_active_hydro(ci, e)) { /* If everybody is active don't bother copying */ @@ -1358,7 +1359,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, count_active_i = count_i; } else if (cell_is_active_hydro(ci, e)) { if (posix_memalign((void **)&sort_active_i, SWIFT_CACHE_ALIGNMENT, - sizeof(struct entry) * count_i) != 0) + sizeof(struct sort_entry) * count_i) != 0) error("Failed to allocate active sortlists."); /* Collect the active particles in ci */ @@ -1376,7 +1377,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, count_active_j = count_j; } else if (cell_is_active_hydro(cj, e)) { if (posix_memalign((void **)&sort_active_j, SWIFT_CACHE_ALIGNMENT, - sizeof(struct entry) * count_j) != 0) + sizeof(struct sort_entry) * count_j) != 0) error("Failed to allocate active sortlists."); /* Collect the active particles in cj */ @@ -1778,8 +1779,8 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { #ifdef SWIFT_DEBUG_CHECKS /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->hydro.sort[sid]; - const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; /* Check that the dx_max_sort values in the cell are indeed an upper bound on particle movement. */ diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h index 4fca3be9463e5bc65d1b46967daa87fddad42959..7e9780def83bbdbab83a431a757a52f3ba51d2e4 100644 --- a/src/runner_doiact_stars.h +++ b/src/runner_doiact_stars.h @@ -324,8 +324,8 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, if (do_ci_stars) { /* Pick-out the sorted lists. */ - const struct entry *restrict sort_j = cj->hydro.sort[sid]; - const struct entry *restrict sort_i = ci->stars.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_i = ci->stars.sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Some constants used to checks that the parts are in the right frame */ @@ -452,8 +452,8 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, if (do_cj_stars) { /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->hydro.sort[sid]; - const struct entry *restrict sort_j = cj->stars.sort[sid]; + const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->stars.sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Some constants used to checks that the parts are in the right frame */ @@ -640,7 +640,7 @@ void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, if (count_j == 0) return; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; const float dxj = cj->hydro.dx_max_sort; /* Sparts are on the left? */ @@ -1129,7 +1129,7 @@ void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) { #define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid) \ ({ \ - const struct entry *restrict sort_j = cj->TYPE.sort[sid]; \ + const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid]; \ \ for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 182e81e99c442cf5e27405ea71321e22e7f374e3..68f34b0d3b8fc9c79097522f8a1618f86957612e 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -240,8 +240,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions( * * @param ci #cell pointer to ci * @param cj #cell pointer to cj - * @param sort_i #entry array for particle distance in ci - * @param sort_j #entry array for particle distance in cj + * @param sort_i #sort_entry array for particle distance in ci + * @param sort_j #sort_entry array for particle distance in cj * @param dx_max maximum particle movement allowed in cell * @param rshift cutoff shift * @param hi_max Maximal smoothing length in cell ci @@ -260,10 +260,11 @@ __attribute__((always_inline)) INLINE static void storeInteractions( */ __attribute__((always_inline)) INLINE static void populate_max_index_density( const struct cell *ci, const struct cell *cj, - const struct entry *restrict sort_i, const struct entry *restrict sort_j, - const float dx_max, const float rshift, const double hi_max, - const double hj_max, const double di_max, const double dj_min, - int *max_index_i, int *max_index_j, int *init_pi, int *init_pj, + const struct sort_entry *restrict sort_i, + const struct sort_entry *restrict sort_j, const float dx_max, + const float rshift, const double hi_max, const double hj_max, + const double di_max, const double dj_min, int *max_index_i, + int *max_index_j, int *init_pi, int *init_pj, const timebin_t max_active_bin, const int active_ci, const int active_cj) { const struct part *restrict parts_i = ci->hydro.parts; @@ -398,8 +399,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( * * @param ci #cell pointer to ci * @param cj #cell pointer to cj - * @param sort_i #entry array for particle distance in ci - * @param sort_j #entry array for particle distance in cj + * @param sort_i #sort_entry array for particle distance in ci + * @param sort_j #sort_entry array for particle distance in cj * @param dx_max maximum particle movement allowed in cell * @param rshift cutoff shift * @param hi_max_raw Maximal smoothing length in cell ci @@ -419,12 +420,12 @@ __attribute__((always_inline)) INLINE static void populate_max_index_density( */ __attribute__((always_inline)) INLINE static void populate_max_index_force( const struct cell *ci, const struct cell *cj, - const struct entry *restrict sort_i, const struct entry *restrict sort_j, - const float dx_max, const float rshift, const double hi_max_raw, - const double hj_max_raw, const double h_max, const double di_max, - const double dj_min, int *max_index_i, int *max_index_j, int *init_pi, - int *init_pj, const timebin_t max_active_bin, const int active_ci, - const int active_cj) { + const struct sort_entry *restrict sort_i, + const struct sort_entry *restrict sort_j, const float dx_max, + const float rshift, const double hi_max_raw, const double hj_max_raw, + const double h_max, const double di_max, const double dj_min, + int *max_index_i, int *max_index_j, int *init_pi, int *init_pj, + const timebin_t max_active_bin, const int active_ci, const int active_cj) { const struct part *restrict parts_i = ci->hydro.parts; const struct part *restrict parts_j = cj->hydro.parts; @@ -570,7 +571,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_force( * @param runner_shift_x The runner_shift in the x direction. * @param runner_shift_y The runner_shift in the y direction. * @param runner_shift_z The runner_shift in the z direction. - * @param sort_j #entry array for particle distance in cj + * @param sort_j #sort_entry array for particle distance in cj * @param max_index_i array to hold the maximum distances of pi particles into * #cell cj * @param flipped Flag to check whether the cells have been flipped or not. @@ -582,7 +583,8 @@ __attribute__((always_inline)) INLINE static int populate_max_index_subset( int *restrict ind, const double *total_ci_shift, const float dxj, const double di_shift_correction, const double runner_shift_x, const double runner_shift_y, const double runner_shift_z, - const struct entry *restrict sort_j, int *max_index_i, const int flipped) { + const struct sort_entry *restrict sort_j, int *max_index_i, + const int flipped) { /* The cell is on the right so read the particles * into the cache from the start of the cell. */ @@ -1320,8 +1322,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->hydro.sort[sid]; - const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; /* Get some other useful values. */ const int count_i = ci->hydro.count; @@ -1726,7 +1728,7 @@ void runner_dopair_subset_density_vec(struct runner *r, const int count_j = cj->hydro.count; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; const float dxj = cj->hydro.dx_max_sort; /* Get both particle caches from the runner and re-allocate @@ -2075,8 +2077,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->hydro.sort[sid]; - const struct entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; /* Get some other useful values. */ const int count_i = ci->hydro.count; diff --git a/src/sort_part.h b/src/sort_part.h index 74116d7a8cada31c0663d5c5b70cfa978b11af8b..4da81215fcadccf33c10922b8f291b9452231fa0 100644 --- a/src/sort_part.h +++ b/src/sort_part.h @@ -23,7 +23,7 @@ /** * @brief Entry in a list of sorted indices. */ -struct entry { +struct sort_entry { /*! Distance on the axis */ float d; diff --git a/tools/analyse_memuse_logs.py b/tools/analyse_memuse_logs.py deleted file mode 100755 index 4026c73d1e13a4484975fe3902e508493dc838cb..0000000000000000000000000000000000000000 --- a/tools/analyse_memuse_logs.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python -""" -Usage: - process_memuse.py [options] memuse_report1.dat [memuse_report2.dat] ... - -Parse the output of a run of SWIFT to convert the memuse output dumps into a -timeseries of memory use. Also outputs use in memory per labelled type. - -This file is part of SWIFT. -Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published -by the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -""" - -from collections import OrderedDict -import argparse -import sys - -# Command-line arguments. -parser = argparse.ArgumentParser(description="Analyse memory usage reports") - -parser.add_argument("memuse_report", nargs='+', - help="Memory usage reports (order by step if using more than one)") -parser.add_argument( - "-b", - "--blacklist", - dest="blacklist", - help="substring of allocations to ignore (maybe be repeated)", - default=None, - action='append' -) -args = parser.parse_args() - -memuse = OrderedDict() -labels = {} -totalmem = 0 -process_use = "" -peak = 0.0 - -for filename in args.memuse_report: - sys.stderr.write("## Processing: " + filename + "\n") - with open(filename) as infile: - print '# {:<18s} {:>30s} {:>9s} {:>9s} {:s}'.format("tic", "label", "allocated", "step", "MB") - for line in infile: - if line[0] == "#": - if "# Current use:" in line: - process_use = line[14:-1] - else: - tic, adr, rank, step, allocated, label, size = line.split() - - # Skip blacklisted allocations, these can swamp the signal... - if args.blacklist != None: - skip = False - for item in args.blacklist: - if item in label: - skip = True - break - if skip: - continue - - rank = int(rank) - step = int(step) - allocated = int(allocated) - size = int(size) - - doprint = True - if allocated == 1: - # Allocation. - totalmem = totalmem + size - if not adr in memuse: - memuse[adr] = [size] - labels[adr] = label - else: - memuse[adr].append(size) - else: - # Free, locate allocation. - if adr in memuse: - allocs = memuse[adr] - totalmem = totalmem - allocs[0] - if len(allocs) > 1: - memuse[adr] = allocs[1:] - else: - del memuse[adr] - else: - # Unmatched free, complain and skip. - #print "### unmatched free: ", label, adr - doprint = False - if doprint: - if totalmem > peak: - peak = totalmem - print '{:<20s} {:>30s} {:9d} {:9d} {:.3f}'.format(tic, label, allocated, step, totalmem/(1048576.0)) - sys.stderr.write("## Finished ingestion of: " + filename + "\n") - -totals = {} -numactive = {} -for adr in labels: - # If any remaining allocations. - if adr in memuse: - if labels[adr] in totals: - totals[labels[adr]] = totals[labels[adr]] + memuse[adr][0] - numactive[labels[adr]] = numactive[labels[adr]] + 1 - else: - totals[labels[adr]] = memuse[adr][0] - numactive[labels[adr]] = 1 - -print "# Memory use by label:" -print "## ", '{:<30s} {:>16s} {:>16s}'.format("label", "MB", "numactive") -print "## " -total = 0.0 -for label in sorted(totals): - mem = totals[label]/(1048576.0) - total = total + mem - print "## ", '{:<30s} {:16.3f} {:16d}'.format(label, mem, numactive[label]) -print "## " -print "# Total memory still in use : ", '{:.3f}'.format(total), " (MB)" -print "# Peak memory usage : ", '{:.3f}'.format(peak/1048576.0), " (MB)" -if process_use != "": - print "#" - print "# Memory use by process (all/system):", process_use -sys.exit(0) diff --git a/tools/process_memuse_logs.sh b/tools/process_memuse_logs.sh deleted file mode 100755 index c86efe9ceca388afcb8d7236c0a2a4e403a66083..0000000000000000000000000000000000000000 --- a/tools/process_memuse_logs.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash -# -# Usage: -# process_memuse_logs nprocess -# -# Description: -# Process all the memuse report files in the current directory. -# Creating an analysis for step step and one for all the steps. -# -# The input files are created by a run configured for memuse reporting -# (--enable-memuse-reports) should be named "memuse_report-step<n>.dat" -# in the current directory. -# -# All located files will be processed using "nprocess" concurrent -# processes. The output for each step will be named memuse_report_step<n>.log -# and the overall analysis will be called memuse_report_all.log. -# -# This file is part of SWIFT: -# -# Copyright (C) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) -# All Rights Reserved. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -# Handle command-line -if test "$1" == ""; then - echo "Usage: $0 nprocess" - exit 1 -fi -NPROCS=$1 - -# Locate script. -SCRIPTHOME=$(dirname "$0") - -# Find all report files. Use version sort to get into correct order. -files=$(ls -v memuse_report-step*.dat) -if test $? != 0; then - echo "Failed to find any memuse report files" - exit 1 -fi - -# Construct list of input and output names. -list="" -for f in $files; do - output=$(echo $f| sed 's,.dat,.log,') - list="$list $f $output" -done - -# And process them. -echo "Processing memuse report files..." -echo $list | xargs -P $NPROCS -n 2 /bin/bash -c "${SCRIPTHOME}/analyse_memuse_logs.py \$0 > \$1" - -# Now process the overall file, if more than one file given. -n=$(echo $list| wc -w) -if test $n -gt 2; then - echo "Processing _all_ memuse report files..." - ${SCRIPTHOME}/analyse_memuse_logs.py $files > memuse_report-all.log -fi - -echo "Finished" - -exit diff --git a/tools/process_memuse_logs_MPI.sh b/tools/process_memuse_logs_MPI.sh deleted file mode 100755 index 77a949d18432690fcb93f883eca5edff2ea19d92..0000000000000000000000000000000000000000 --- a/tools/process_memuse_logs_MPI.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -# -# Usage: -# process_memuse_logs_MPI rank nprocess -# -# Description: -# Process all the memuse report files in the current directory that -# are output from the given rank. -# Creating an analysis for each step and one for all the steps. -# -# The input files are created by a run configured for memuse reporting -# (--enable-memuse-reports) should be named "memuse_report-rank<n>-step<m>.dat" -# in the current directory. -# -# All located files will be processed using "nprocess" concurrent -# processes. The output for each step will be named memuse_report-rank<n>-step<m>.log -# and the overall analysis will be called memuse_report-all-rank<n>.log. -# -# This file is part of SWIFT: -# -# Copyright (C) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) -# All Rights Reserved. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -# Handle command-line -if test "$2" == ""; then - echo "Usage: $0 rank nprocess" - exit 1 -fi -RANK=$1 -NPROCS=$2 - -# Locate script. -SCRIPTHOME=$(dirname "$0") - -# Find all report files. Use version sort to get into correct order. -files=$(ls -v memuse_report-rank${RANK}-step*.dat) -if test $? != 0; then - echo "Failed to find any memuse report files" - exit 1 -fi - -# Construct list of input and output names. -list="" -for f in $files; do - output=$(echo $f| sed 's,.dat,.log,') - list="$list $f $output" -done - -# And process them. -echo "Processing memuse report files..." -echo $list | xargs -P $NPROCS -n 2 /bin/bash -c "${SCRIPTHOME}/analyse_memuse_logs.py \$0 > \$1" - -# Now process the overall file, if more than one file given. -n=$(echo $list| wc -w) -if test $n -gt 2; then - echo "Processing _all_ memuse report files..." - ${SCRIPTHOME}/analyse_memuse_logs.py $files > memuse_report-all-rank${RANK}.log -fi - -echo "Finished" - -exit