diff --git a/configure.ac b/configure.ac index 77f8a65c3913b5c1ae06f4b8ed255b67544fca0c..50619f589b9377001538f3c62870c72249b3f102 100644 --- a/configure.ac +++ b/configure.ac @@ -365,6 +365,18 @@ fi # Check whether we have any of the ARM v8.1 tick timers AX_ASM_ARM_PMCCNTR AX_ASM_ARM_CNTVCT +# See if we want memuse reporting. +AC_ARG_ENABLE([memuse-reports], + [AS_HELP_STRING([--enable-memuse-reports], + [Output reports about significant memory allocations@<:@yes/no@:>@] + )], + [enable_memuse_reports="$enableval"], + [enable_memuse_reports="no"] +) +if test "$enable_memuse_reports" = "yes"; then + AC_DEFINE([SWIFT_MEMUSE_REPORTS],1,[Enable memory usage reports]) +fi + # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN diff --git a/doc/RTD/source/AnalysisTools/index.rst b/doc/RTD/source/AnalysisTools/index.rst index f68fa77536e77df48cd1acc44e98a3408e48b037..9fa94e0baff5732092a704e20ecc12de57d8301f 100644 --- a/doc/RTD/source/AnalysisTools/index.rst +++ b/doc/RTD/source/AnalysisTools/index.rst @@ -1,5 +1,6 @@ .. AnalysisTools Loic Hausammann 20th March 2019 + Peter W. Draper 28th March 2019 .. _analysistools: @@ -33,3 +34,48 @@ or install ``npm`` and then run the following commands http-server . Now you can open the web page ``http://localhost:8080/cell_hierarchy.html``. + +Memory usage reports +-------------------- + +When SWIFT is configured using the ``--enable-memuse-reports`` flag it will +log any calls to allocate or free memory that make use of the +``swift_memalign()``, ``swift_malloc()``, ``swift_calloc()`` and +``swift_free()`` functions and will generate a report at the end of each +step. It will also attempt to dump the current memory use when SWIFT is +aborted by calling the ``error()`` function. Failed memory allocations will be +reported in these logs. + +These functions should be used by developers when allocating significant +amounts of memory -- so don't use these for high frequency small allocations. +Each call to the ``swift_`` functions differs to the standard calls by the +inclusion of a "label", this should match between allocations and frees and +ideally should be a short label that describes the use of the memory, i.e. +"parts", "gparts", "hydro.sort" etc. + +Calls to external libraries that make allocations you'd also like to log +can be made by calling the ``memuse_log_allocation()`` function directly. + +The output files are called ``memuse_report-step<n>.dat`` or +``memuse_report-rank<m>-step<n>.dat`` if running using MPI. These have a line +for each allocation or free that records the time, memory address, step, +whether an allocation or free, the label and when an allocation, the amount of +memory. The comments in this file also record the actual memory use of the +process (including threads) as reported by the operating system at the end of +the step. + +To post process these files into a memory used timeline and get a report of +the peak memory use, as well as the memory still in use at the end of the step +there is an basic analysis script ``analyse_memuse_logs.py`` and two wrappers +that process a directory of logs, these are ``./process_memuse_logs.sh`` and +``./process_memuse_logs_MPI.sh`` for non-MPI and MPI runs respectively. + +Note that the process scripts process each step individually and also process +all the logs as a single sequence. When interpreting these some care should be +taken as they are not all the memory allocated, just important allocations in +SWIFT and when looking at a single step the context of any previous steps is +not used, so you only see allocations made in that step and the effect of any +matching frees (so allocations made in previous steps that are freed in this +step will not be understood and will be ignored, you need the global analysis +to understand that). + diff --git a/examples/main.c b/examples/main.c index 6fc5b433719822558d531f4ed2691e7127139a79..3b2b976d94374ce44f74cd92dea8f7d594610e95 100644 --- a/examples/main.c +++ b/examples/main.c @@ -120,8 +120,9 @@ int main(int argc, char *argv[]) { if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS) error("Call to MPI_Comm_rank failed with error %i.", res); - /* Make sure messages are stamped with the correct rank. */ + /* Make sure messages are stamped with the correct rank and step. */ engine_rank = myrank; + engine_current_step = 0; if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) != MPI_SUCCESS) @@ -1035,6 +1036,19 @@ int main(int argc, char *argv[]) { /* unused parameters */ parser_write_params_to_file(params, "unused_parameters.yml", 0); + /* Dump memory use report if collected for the 0 step. */ +#ifdef SWIFT_MEMUSE_REPORTS + { + char dumpfile[40]; +#ifdef WITH_MPI + snprintf(dumpfile, 40, "memuse_report-rank%d-step%d.dat", engine_rank, 0); +#else + snprintf(dumpfile, 40, "memuse_report-step%d.dat", 0); +#endif // WITH_MPI + memuse_log_dump(dumpfile); + } +#endif + /* Main simulation loop */ /* ==================== */ int force_stop = 0, resubmit = 0; @@ -1082,6 +1096,20 @@ int main(int argc, char *argv[]) { task_dump_stats(dumpfile, &e, /* header = */ 0, /* allranks = */ 1); } + /* Dump memory use report if collected. */ +#ifdef SWIFT_MEMUSE_REPORTS + { + char dumpfile[40]; +#ifdef WITH_MPI + snprintf(dumpfile, 40, "memuse_report-rank%d-step%d.dat", engine_rank, + j + 1); +#else + snprintf(dumpfile, 40, "memuse_report-step%d.dat", j + 1); +#endif // WITH_MPI + memuse_log_dump(dumpfile); + } +#endif + #ifdef SWIFT_DEBUG_THREADPOOL /* Dump the task data using the given frequency. */ if (dump_threadpool && (dump_threadpool == 1 || j % dump_threadpool == 1)) { diff --git a/src/Makefile.am b/src/Makefile.am index fd18996f61242db0ec37f4df1da9e617b622a729..d7e4249a7ff67132505e3a7df8a134d4cd8b266c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -51,7 +51,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h \ logger_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h \ star_formation_struct.h star_formation.h star_formation_iact.h \ - velociraptor_struct.h velociraptor_io.h random.h + velociraptor_struct.h velociraptor_io.h random.h memuse.h # source files for EAGLE cooling EAGLE_COOLING_SOURCES = @@ -70,7 +70,8 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ - outputlist.c velociraptor_dummy.c logger_io.c $(EAGLE_COOLING_SOURCES) + outputlist.c velociraptor_dummy.c logger_io.c memuse.c \ + $(EAGLE_COOLING_SOURCES) # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/cell.c b/src/cell.c index 577d3e32a4210093a48ec733260994f5e6846b58..675c30066da886645b25fd527991a58f8e754e85 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1815,18 +1815,10 @@ void cell_check_multipole(struct cell *c) { void cell_clean(struct cell *c) { /* Hydro */ - for (int i = 0; i < 13; i++) - if (c->hydro.sort[i] != NULL) { - free(c->hydro.sort[i]); - c->hydro.sort[i] = NULL; - } + cell_free_hydro_sorts(c); /* Stars */ - for (int i = 0; i < 13; i++) - if (c->stars.sort[i] != NULL) { - free(c->stars.sort[i]); - c->stars.sort[i] = NULL; - } + cell_free_stars_sorts(c); /* Recurse */ for (int k = 0; k < 8; k++) @@ -4324,10 +4316,7 @@ void cell_clear_stars_sort_flags(struct cell *c, const int is_super) { #ifdef SWIFT_DEBUG_CHECKS if (c != c->hydro.super) error("Cell is not a super-cell!!!"); #endif - - for (int i = 0; i < 13; i++) { - free(c->stars.sort[i]); - } + cell_free_stars_sorts(c); } /* Indicate that the cell is not sorted and cancel the pointer sorting arrays. diff --git a/src/cell.h b/src/cell.h index a378fa971f2ca6da18bba40ffc5e160bef941aa8..4d751bc43b5d41037ad873a2df3acc3dc5150796 100644 --- a/src/cell.h +++ b/src/cell.h @@ -35,6 +35,7 @@ #include "lock.h" #include "multipole.h" #include "part.h" +#include "sort_part.h" #include "space.h" #include "task.h" #include "timeline.h" @@ -252,6 +253,7 @@ struct cell { /*! Pointer for the sorted indices. */ struct entry *sort[13]; + struct entry *sortptr; /*! Super cell, i.e. the highest-level parent cell that has a hydro * pair/self tasks */ @@ -541,6 +543,7 @@ struct cell { /*! Pointer for the sorted indices. */ struct entry *sort[13]; + struct entry *sortptr; /*! Bit-mask indicating the sorted directions */ unsigned int sorted; @@ -1062,4 +1065,102 @@ __attribute__((always_inline)) INLINE static void cell_ensure_tagged( #endif // WITH_MPI } +/** + * @brief Allocate hydro sort memory for cell. + * + * @param c The #cell that will require sorting. + * @param flags Cell flags. + */ +__attribute__((always_inline)) INLINE static void cell_malloc_hydro_sorts( + struct cell *c, int flags) { + + /* Count the memory needed for all active dimensions. */ + int count = 0; + for (int j = 0; j < 13; j++) { + if ((flags & (1 << j)) && c->hydro.sort[j] == NULL) + count += (c->hydro.count + 1); + } + + /* Allocate as a single chunk. */ + struct entry *memptr = NULL; + if ((memptr = (struct entry *)swift_malloc( + "hydro.sort", sizeof(struct entry) * count)) == NULL) + error("Failed to allocate sort memory."); + + c->hydro.sortptr = memptr; + + /* And attach spans as needed. */ + for (int j = 0; j < 13; j++) { + if ((flags & (1 << j)) && c->hydro.sort[j] == NULL) { + c->hydro.sort[j] = memptr; + memptr += (c->hydro.count + 1); + } + } +} + +/** + * @brief Free hydro sort memory for cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static void cell_free_hydro_sorts( + struct cell *c) { + + /* Note only one allocation for the dimensions. */ + if (c->hydro.sortptr != NULL) { + swift_free("hydro.sort", c->hydro.sortptr); + c->hydro.sortptr = NULL; + for (int i = 0; i < 13; i++) c->hydro.sort[i] = NULL; + } +} + +/** + * @brief Allocate stars sort memory for cell. + * + * @param c The #cell that will require sorting. + * @param flags Cell flags. + */ +__attribute__((always_inline)) INLINE static void cell_malloc_stars_sorts( + struct cell *c, int flags) { + + /* Count the memory needed for all active dimensions. */ + int count = 0; + for (int j = 0; j < 13; j++) { + if ((flags & (1 << j)) && c->stars.sort[j] == NULL) + count += (c->stars.count + 1); + } + + /* Allocate as a single chunk. */ + struct entry *memptr = NULL; + if ((memptr = (struct entry *)swift_malloc( + "stars.sort", sizeof(struct entry) * count)) == NULL) + error("Failed to allocate sort memory."); + + c->stars.sortptr = memptr; + + /* And attach spans as needed. */ + for (int j = 0; j < 13; j++) { + if ((flags & (1 << j)) && c->stars.sort[j] == NULL) { + c->stars.sort[j] = memptr; + memptr += (c->stars.count + 1); + } + } +} + +/** + * @brief Free stars sort memory for cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static void cell_free_stars_sorts( + struct cell *c) { + + /* Note only one allocation for the dimensions. */ + if (c->stars.sortptr != NULL) { + swift_free("stars.sort", c->stars.sortptr); + c->stars.sortptr = NULL; + for (int i = 0; i < 13; i++) c->stars.sort[i] = NULL; + } +} + #endif /* SWIFT_CELL_H */ diff --git a/src/clocks.c b/src/clocks.c index 49297f5db1cc10a3d9f4537c5900610dded7ffba..16af01938d8f4e6cb21490af3288fd64e1a93876 100644 --- a/src/clocks.c +++ b/src/clocks.c @@ -42,8 +42,9 @@ /* The CPU frequency used to convert ticks to seconds. */ static unsigned long long clocks_cpufreq = 0; -/* Ticks when the CPU frequency was initialised. Used in elapsed. */ -static ticks clocks_start = 0; +/* Ticks when the CPU frequency was initialised, this marks the start of + * time. */ +ticks clocks_start_ticks = 0; /* The units of any returned times. */ static const char *clocks_units[] = {"ms", "~ms"}; @@ -106,7 +107,7 @@ void clocks_set_cpufreq(unsigned long long freq) { } else { clocks_estimate_cpufreq(); } - clocks_start = getticks(); + clocks_start_ticks = getticks(); } /** @@ -258,7 +259,7 @@ const char *clocks_get_timesincestart(void) { static char buffer[40]; sprintf(buffer, "[%07.1f]", - clocks_diff_ticks(getticks(), clocks_start) / 1000.0); + clocks_diff_ticks(getticks(), clocks_start_ticks) / 1000.0); return buffer; } @@ -271,7 +272,7 @@ const char *clocks_get_timesincestart(void) { * @result the time since the start of the execution */ double clocks_get_hours_since_start(void) { - return clocks_diff_ticks(getticks(), clocks_start) / (3600. * 1000.0); + return clocks_diff_ticks(getticks(), clocks_start_ticks) / (3600. * 1000.0); } /** diff --git a/src/clocks.h b/src/clocks.h index ce08167bd504d47a76542870791057881c6d2f17..d306268674fc85c722e71a6bf8c0095341ba4e1a 100644 --- a/src/clocks.h +++ b/src/clocks.h @@ -37,6 +37,9 @@ struct clocks_time { #endif }; +/* Ticks used as the start of time. */ +extern ticks clocks_start_ticks; + void clocks_gettime(struct clocks_time *time); double clocks_diff(struct clocks_time *start, struct clocks_time *end); const char *clocks_getunit(void); diff --git a/src/debug.c b/src/debug.c index d2aff378a174ade46b62a3931f78394a0f41ca41..6257f7cf4e62c3db9027c820bb658eb678c0ecf1 100644 --- a/src/debug.c +++ b/src/debug.c @@ -680,69 +680,3 @@ void dumpCellRanks(const char *prefix, struct cell *cells_top, int nr_cells) { } #endif /* HAVE_MPI */ - -/** - * @brief parse the process /proc/self/statm file to get the process - * memory use (in KB). Top field in (). - * - * @param size total virtual memory (VIRT) - * @param resident resident non-swapped memory (RES) - * @param share shared (mmap'd) memory (SHR) - * @param trs text (exe) resident set (CODE) - * @param lrs library resident set - * @param drs data+stack resident set (DATA) - * @param dt dirty pages (nDRT) - */ -void getProcMemUse(long *size, long *resident, long *share, long *trs, - long *lrs, long *drs, long *dt) { - - /* Open the file. */ - FILE *file = fopen("/proc/self/statm", "r"); - if (file != NULL) { - int nscan = fscanf(file, "%ld %ld %ld %ld %ld %ld %ld", size, resident, - share, trs, lrs, drs, dt); - - if (nscan == 7) { - /* Convert pages into bytes. Usually 4096, but could be 512 on some - * systems so take care in conversion to KB. */ - long sz = sysconf(_SC_PAGESIZE); - *size *= sz; - *resident *= sz; - *share *= sz; - *trs *= sz; - *lrs *= sz; - *drs *= sz; - *dt *= sz; - - *size /= 1024; - *resident /= 1024; - *share /= 1024; - *trs /= 1024; - *lrs /= 1024; - *drs /= 1024; - *dt /= 1024; - } else { - error("Failed to read sufficient fields from /proc/self/statm"); - } - fclose(file); - } else { - error("Failed to open /proc/self/statm"); - } -} - -/** - * @brief Print the current memory use of the process. A la "top". - */ -void printProcMemUse(void) { - long size; - long resident; - long share; - long trs; - long lrs; - long drs; - long dt; - getProcMemUse(&size, &resident, &share, &trs, &lrs, &drs, &dt); - printf("## VIRT = %ld , RES = %ld , SHR = %ld , CODE = %ld, DATA = %ld\n", - size, resident, share, trs, drs); - fflush(stdout); -} diff --git a/src/debug.h b/src/debug.h index ec3807c3ba911c6a553aa42d3f8a017662217001..3cafd17b835a1a816e049f3a714bedcaf34d183a 100644 --- a/src/debug.h +++ b/src/debug.h @@ -49,7 +49,4 @@ void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon, idx_t *xadj, void dumpCellRanks(const char *prefix, struct cell *cells_top, int nr_cells); #endif -void getProcMemUse(long *size, long *resident, long *share, long *trs, - long *lrs, long *drs, long *dt); -void printProcMemUse(void); #endif /* SWIFT_DEBUG_H */ diff --git a/src/engine.c b/src/engine.c index 40e0a69f6b839b914310a51671d3bb18cc57214f..8f8a5c5492e1db1d47d9b9b335499eb4a5a4c3cb 100644 --- a/src/engine.c +++ b/src/engine.c @@ -72,6 +72,7 @@ #include "logger_io.h" #include "map.h" #include "memswap.h" +#include "memuse.h" #include "minmax.h" #include "outputlist.h" #include "parallel_io.h" @@ -121,6 +122,9 @@ const char *engine_policy_names[] = {"none", /** The rank of the engine as a global variable (for messages). */ int engine_rank; +/** The current step of the engine as a global variable (for messages). */ +int engine_current_step; + /** * @brief Data collected from the cells at the end of a time-step */ @@ -163,6 +167,7 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { /** * Do the exchange of one type of particles with all the other nodes. * + * @param label a label for the memory allocations of this particle type. * @param counts 2D array with the counts of particles to exchange with * each other node. * @param parts the particle data to exchange @@ -177,15 +182,15 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { * @result new particle data constructed from all the exchanges with the * given alignment. */ -static void *engine_do_redistribute(int *counts, char *parts, +static void *engine_do_redistribute(const char *label, int *counts, char *parts, size_t new_nr_parts, size_t sizeofparts, size_t alignsize, MPI_Datatype mpi_type, int nr_nodes, int nodeID) { /* Allocate a new particle array with some extra margin */ char *parts_new = NULL; - if (posix_memalign( - (void **)&parts_new, alignsize, + if (swift_memalign( + label, (void **)&parts_new, alignsize, sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0) error("Failed to allocate new particle data."); @@ -627,7 +632,7 @@ void engine_redistribute(struct engine *e) { error("Failed to allocate counts temporary buffer."); int *dest; - if ((dest = (int *)malloc(sizeof(int) * nr_parts)) == NULL) + if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL) error("Failed to allocate dest temporary buffer."); /* Simple index of node IDs, used for mappers over nodes. */ @@ -690,7 +695,7 @@ void engine_redistribute(struct engine *e) { threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part, nodes, nr_nodes, sizeof(int), 0, &savelink_data); } - free(dest); + swift_free("dest", dest); /* Get destination of each s-particle */ int *s_counts; @@ -698,7 +703,7 @@ void engine_redistribute(struct engine *e) { error("Failed to allocate s_counts temporary buffer."); int *s_dest; - if ((s_dest = (int *)malloc(sizeof(int) * nr_sparts)) == NULL) + if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL) error("Failed to allocate s_dest temporary buffer."); redist_data.counts = s_counts; @@ -748,7 +753,7 @@ void engine_redistribute(struct engine *e) { threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart, nodes, nr_nodes, sizeof(int), 0, &savelink_data); } - free(s_dest); + swift_free("s_dest", s_dest); /* Get destination of each g-particle */ int *g_counts; @@ -756,7 +761,7 @@ void engine_redistribute(struct engine *e) { error("Failed to allocate g_gcount temporary buffer."); int *g_dest; - if ((g_dest = (int *)malloc(sizeof(int) * nr_gparts)) == NULL) + if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL) error("Failed to allocate g_dest temporary buffer."); redist_data.counts = g_counts; @@ -796,7 +801,7 @@ void engine_redistribute(struct engine *e) { } #endif - free(g_dest); + swift_free("g_dest", g_dest); /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, @@ -861,34 +866,34 @@ void engine_redistribute(struct engine *e) { /* SPH particles. */ void *new_parts = engine_do_redistribute( - counts, (char *)s->parts, nr_parts_new, sizeof(struct part), part_align, - part_mpi_type, nr_nodes, nodeID); - free(s->parts); + "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part), + part_align, part_mpi_type, nr_nodes, nodeID); + swift_free("parts", s->parts); s->parts = (struct part *)new_parts; s->nr_parts = nr_parts_new; s->size_parts = engine_redistribute_alloc_margin * nr_parts_new; /* Extra SPH particle properties. */ - new_parts = engine_do_redistribute(counts, (char *)s->xparts, nr_parts_new, - sizeof(struct xpart), xpart_align, - xpart_mpi_type, nr_nodes, nodeID); - free(s->xparts); + new_parts = engine_do_redistribute( + "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart), + xpart_align, xpart_mpi_type, nr_nodes, nodeID); + swift_free("xparts", s->xparts); s->xparts = (struct xpart *)new_parts; /* Gravity particles. */ - new_parts = engine_do_redistribute(g_counts, (char *)s->gparts, nr_gparts_new, - sizeof(struct gpart), gpart_align, - gpart_mpi_type, nr_nodes, nodeID); - free(s->gparts); + new_parts = engine_do_redistribute( + "gparts", g_counts, (char *)s->gparts, nr_gparts_new, + sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID); + swift_free("gparts", s->gparts); s->gparts = (struct gpart *)new_parts; s->nr_gparts = nr_gparts_new; s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new; /* Star particles. */ - new_parts = engine_do_redistribute(s_counts, (char *)s->sparts, nr_sparts_new, - sizeof(struct spart), spart_align, - spart_mpi_type, nr_nodes, nodeID); - free(s->sparts); + new_parts = engine_do_redistribute( + "sparts", s_counts, (char *)s->sparts, nr_sparts_new, + sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID); + swift_free("sparts", s->sparts); s->sparts = (struct spart *)new_parts; s->nr_sparts = nr_sparts_new; s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new; @@ -1360,15 +1365,15 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; struct part *parts_new = NULL; struct xpart *xparts_new = NULL; - if (posix_memalign((void **)&parts_new, part_align, + if (swift_memalign("parts", (void **)&parts_new, part_align, sizeof(struct part) * s->size_parts) != 0 || - posix_memalign((void **)&xparts_new, xpart_align, + swift_memalign("xparts", (void **)&xparts_new, xpart_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts); memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts); - free(s->parts); - free(s->xparts); + swift_free("parts", s->parts); + swift_free("xparts", s->xparts); s->parts = parts_new; s->xparts = xparts_new; @@ -1379,15 +1384,16 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, } } } + if (offset_sparts + count_sparts_in > s->size_sparts) { message("re-allocating sparts array."); s->size_sparts = (offset_sparts + count_sparts_in) * engine_parts_size_grow; struct spart *sparts_new = NULL; - if (posix_memalign((void **)&sparts_new, spart_align, + if (swift_memalign("sparts", (void **)&sparts_new, spart_align, sizeof(struct spart) * s->size_sparts) != 0) error("Failed to allocate new spart data."); memcpy(sparts_new, s->sparts, sizeof(struct spart) * offset_sparts); - free(s->sparts); + swift_free("sparts", s->sparts); s->sparts = sparts_new; /* Reset the links */ @@ -1397,15 +1403,16 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, } } } + if (offset_gparts + count_gparts_in > s->size_gparts) { message("re-allocating gparts array."); s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; struct gpart *gparts_new = NULL; - if (posix_memalign((void **)&gparts_new, gpart_align, + if (swift_memalign("gparts", (void **)&gparts_new, gpart_align, sizeof(struct gpart) * s->size_gparts) != 0) error("Failed to allocate new gpart data."); memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); - free(s->gparts); + swift_free("gparts", s->gparts); s->gparts = gparts_new; /* Reset the links */ @@ -1658,12 +1665,14 @@ void engine_exchange_proxy_multipoles(struct engine *e) { /* Allocate the buffers for the packed data */ struct gravity_tensors *buffer_send = NULL; - if (posix_memalign((void **)&buffer_send, SWIFT_CACHE_ALIGNMENT, + if (swift_memalign("send_gravity_tensors", (void **)&buffer_send, + SWIFT_CACHE_ALIGNMENT, count_send_cells * sizeof(struct gravity_tensors)) != 0) error("Unable to allocate memory for multipole transactions"); struct gravity_tensors *buffer_recv = NULL; - if (posix_memalign((void **)&buffer_recv, SWIFT_CACHE_ALIGNMENT, + if (swift_memalign("recv_gravity_tensors", (void **)&buffer_recv, + SWIFT_CACHE_ALIGNMENT, count_recv_cells * sizeof(struct gravity_tensors)) != 0) error("Unable to allocate memory for multipole transactions"); @@ -1824,25 +1833,32 @@ void engine_allocate_foreign_particles(struct engine *e) { /* Allocate space for the foreign particles we will receive */ if (count_parts_in > s->size_parts_foreign) { - if (s->parts_foreign != NULL) free(s->parts_foreign); + if (s->parts_foreign != NULL) + swift_free("sparts_foreign", s->parts_foreign); s->size_parts_foreign = engine_foreign_alloc_margin * count_parts_in; - if (posix_memalign((void **)&s->parts_foreign, part_align, + if (swift_memalign("parts_foreign", (void **)&s->parts_foreign, part_align, sizeof(struct part) * s->size_parts_foreign) != 0) error("Failed to allocate foreign part data."); } + /* Allocate space for the foreign particles we will receive */ if (count_gparts_in > s->size_gparts_foreign) { - if (s->gparts_foreign != NULL) free(s->gparts_foreign); + if (s->gparts_foreign != NULL) + swift_free("gparts_foreign", s->gparts_foreign); s->size_gparts_foreign = engine_foreign_alloc_margin * count_gparts_in; - if (posix_memalign((void **)&s->gparts_foreign, gpart_align, + if (swift_memalign("gparts_foreign", (void **)&s->gparts_foreign, + gpart_align, sizeof(struct gpart) * s->size_gparts_foreign) != 0) error("Failed to allocate foreign gpart data."); } + /* Allocate space for the foreign particles we will receive */ if (count_sparts_in > s->size_sparts_foreign) { - if (s->sparts_foreign != NULL) free(s->sparts_foreign); + if (s->sparts_foreign != NULL) + swift_free("sparts_foreign", s->sparts_foreign); s->size_sparts_foreign = engine_foreign_alloc_margin * count_sparts_in; - if (posix_memalign((void **)&s->sparts_foreign, spart_align, + if (swift_memalign("sparts_foreign", (void **)&s->sparts_foreign, + spart_align, sizeof(struct spart) * s->size_sparts_foreign) != 0) error("Failed to allocate foreign spart data."); } @@ -3113,6 +3129,7 @@ void engine_step(struct engine *e) { e->max_active_bin = get_max_active_bin(e->ti_end_min); e->min_active_bin = get_min_active_bin(e->ti_current, e->ti_old); e->step += 1; + engine_current_step = e->step; e->step_props = engine_step_prop_none; /* When restarting, move everyone to the current time. */ @@ -3358,7 +3375,7 @@ void engine_check_for_dumps(struct engine *e) { /* Free the memory allocated for VELOCIraptor i/o. */ if (with_stf && e->snapshot_invoke_stf) { #ifdef HAVE_VELOCIRAPTOR - free(e->s->gpart_group_data); + swift_free("gpart_group_data", e->s->gpart_group_data); e->s->gpart_group_data = NULL; #endif } @@ -3887,17 +3904,18 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->size_parts = s->nr_parts * engine_redistribute_alloc_margin; struct part *parts_new = NULL; struct xpart *xparts_new = NULL; - if (posix_memalign((void **)&parts_new, part_align, + if (swift_memalign("parts", (void **)&parts_new, part_align, sizeof(struct part) * s->size_parts) != 0 || - posix_memalign((void **)&xparts_new, xpart_align, + swift_memalign("xparts", (void **)&xparts_new, xpart_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); + if (s->nr_parts > 0) { memcpy(parts_new, s->parts, sizeof(struct part) * s->nr_parts); memcpy(xparts_new, s->xparts, sizeof(struct xpart) * s->nr_parts); } - free(s->parts); - free(s->xparts); + swift_free("parts", s->parts); + swift_free("xparts", s->xparts); s->parts = parts_new; s->xparts = xparts_new; @@ -3911,12 +3929,13 @@ void engine_split(struct engine *e, struct partition *initial_partition) { (size_t)(s->nr_sparts * engine_redistribute_alloc_margin)); s->size_sparts = s->nr_sparts * engine_redistribute_alloc_margin; struct spart *sparts_new = NULL; - if (posix_memalign((void **)&sparts_new, spart_align, + if (swift_memalign("sparts", (void **)&sparts_new, spart_align, sizeof(struct spart) * s->size_sparts) != 0) error("Failed to allocate new spart data."); + if (s->nr_sparts > 0) memcpy(sparts_new, s->sparts, sizeof(struct spart) * s->nr_sparts); - free(s->sparts); + swift_free("sparts", s->sparts); s->sparts = sparts_new; /* Re-link the gparts to their sparts. */ @@ -3929,12 +3948,13 @@ void engine_split(struct engine *e, struct partition *initial_partition) { (size_t)(s->nr_gparts * engine_redistribute_alloc_margin)); s->size_gparts = s->nr_gparts * engine_redistribute_alloc_margin; struct gpart *gparts_new = NULL; - if (posix_memalign((void **)&gparts_new, gpart_align, + if (swift_memalign("gparts", (void **)&gparts_new, gpart_align, sizeof(struct gpart) * s->size_gparts) != 0) error("Failed to allocate new gpart data."); + if (s->nr_gparts > 0) memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->nr_gparts); - free(s->gparts); + swift_free("gparts", s->gparts); s->gparts = gparts_new; /* Re-link the parts. */ @@ -3992,7 +4012,8 @@ void engine_collect_stars_counter(struct engine *e) { } /* Get all sparticles */ - struct spart *sparts = (struct spart *)malloc(total * sizeof(struct spart)); + struct spart *sparts = + (struct spart *)swift_malloc("sparts", total * sizeof(struct spart)); err = MPI_Allgatherv(e->s->sparts_foreign, e->s->nr_sparts_foreign, spart_mpi_type, sparts, n_sparts_int, displs, spart_mpi_type, MPI_COMM_WORLD); @@ -4027,7 +4048,7 @@ void engine_collect_stars_counter(struct engine *e) { free(n_sparts); free(n_sparts_int); - free(sparts); + swift_free("sparts", sparts); #endif } @@ -4864,9 +4885,10 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, parser_get_opt_param_int(params, "Scheduler:mpi_message_limit", 4) * 1024; /* Allocate and init the threads. */ - if (posix_memalign((void **)&e->runners, SWIFT_CACHE_ALIGNMENT, + if (swift_memalign("runners", (void **)&e->runners, SWIFT_CACHE_ALIGNMENT, e->nr_threads * sizeof(struct runner)) != 0) error("Failed to allocate threads array."); + for (int k = 0; k < e->nr_threads; k++) { e->runners[k].id = k; e->runners[k].e = e; @@ -5371,14 +5393,14 @@ void engine_clean(struct engine *e) { gravity_cache_clean(&e->runners[i].ci_gravity_cache); gravity_cache_clean(&e->runners[i].cj_gravity_cache); } - free(e->runners); + swift_free("runners", e->runners); free(e->snapshot_units); output_list_clean(&e->output_list_snapshots); output_list_clean(&e->output_list_stats); output_list_clean(&e->output_list_stf); - free(e->links); + swift_free("links", e->links); #if defined(WITH_LOGGER) logger_clean(e->logger); free(e->logger); diff --git a/src/engine.h b/src/engine.h index 0e0e9895a8b0d1928e48c52ad760d2303447c24d..16edf7dd2437478d6876272debd197aaeaeba378 100644 --- a/src/engine.h +++ b/src/engine.h @@ -114,6 +114,11 @@ enum engine_step_properties { */ extern int engine_rank; +/** + * @brief The current step as a global variable (for messages). + */ +extern int engine_current_step; + /* Data structure for the engine. */ struct engine { diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index d1858f87ff0bfdfee878f2e53b81e100812fd0a5..35759581b625a27bb95677e9c6e00e415b86e710 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -2289,7 +2289,7 @@ void engine_maketasks(struct engine *e) { #endif /* Free the old list of cell-task links. */ - if (e->links != NULL) free(e->links); + if (e->links != NULL) swift_free("links", e->links); e->size_links = e->sched.nr_tasks * e->links_per_tasks; /* Make sure that we have space for more links than last time. */ @@ -2297,8 +2297,8 @@ void engine_maketasks(struct engine *e) { e->size_links = e->nr_links * engine_rebuild_link_alloc_margin; /* Allocate the new link list */ - if ((e->links = (struct link *)malloc(sizeof(struct link) * e->size_links)) == - NULL) + if ((e->links = (struct link *)swift_malloc( + "links", sizeof(struct link) * e->size_links)) == NULL) error("Failed to allocate cell-task links."); e->nr_links = 0; diff --git a/src/error.h b/src/error.h index d384ec56ba0dc3562160d94911e3e3d3bb786211..de4e9fa44c73d91524dfd307a3ad19b6cad3421f 100644 --- a/src/error.h +++ b/src/error.h @@ -35,13 +35,22 @@ /* Local headers. */ #include "clocks.h" +#include "memuse.h" +/* Use exit when not developing, avoids core dumps. */ #ifdef SWIFT_DEVELOP_MODE #define swift_abort(errcode) abort() #else #define swift_abort(errcode) exit(errcode) #endif +/* If reporting memory usage, try to dump that when exiting in error. */ +#ifdef SWIFT_MEMUSE_REPORTS +#define memdump(rank) memuse_log_dump_error(rank); +#else +#define memdump(rank) +#endif + /** * @brief Error macro. Prints the message given in argument and aborts. * @@ -54,19 +63,23 @@ extern int engine_rank; fprintf(stderr, "[%04i] %s %s:%s():%i: " s "\n", engine_rank, \ clocks_get_timesincestart(), __FILE__, __FUNCTION__, __LINE__, \ ##__VA_ARGS__); \ + memdump(engine_rank); \ MPI_Abort(MPI_COMM_WORLD, -1); \ }) #else +extern int engine_rank; #define error(s, ...) \ ({ \ fflush(stdout); \ fprintf(stderr, "%s %s:%s():%i: " s "\n", clocks_get_timesincestart(), \ __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + memdump(engine_rank); \ swift_abort(1); \ }) #endif #ifdef WITH_MPI +extern int engine_rank; /** * @brief MPI error macro. Prints the message given in argument, * followed by the MPI error string and aborts. @@ -82,6 +95,7 @@ extern int engine_rank; char buf[len]; \ MPI_Error_string(res, buf, &len); \ fprintf(stderr, "%s\n\n", buf); \ + memdump(engine_rank); \ MPI_Abort(MPI_COMM_WORLD, -1); \ }) diff --git a/src/memuse.c b/src/memuse.c new file mode 100644 index 0000000000000000000000000000000000000000..10fd349c0cc4de26b94878b45fd78da19e140f12 --- /dev/null +++ b/src/memuse.c @@ -0,0 +1,280 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/** + * @file memuse.c + * @brief file of routines to report about memory use in SWIFT. + * Note reports are in KB. + */ + +/* Config parameters. */ +#include "../config.h" + +/* Standard includes. */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> + +/* Local defines. */ +#include "memuse.h" + +/* Local includes. */ +#include "atomic.h" +#include "clocks.h" +#include "engine.h" + +#ifdef SWIFT_MEMUSE_REPORTS + +/* Also recorded in logger. */ +extern int engine_rank; +extern int engine_current_step; + +/* Entry for logger of memory allocations and deallocations in a step. */ +#define MEMUSE_MAXLAB 64 +struct memuse_log_entry { + + /* Rank in action. */ + int rank; + + /* Step of action. */ + int step; + + /* Whether allocated or deallocated. */ + int allocated; + + /* Memory allocated in bytes. */ + size_t size; + + /* Address of memory. */ + void *ptr; + + /* Relative time of this action. */ + ticks dtic; + + /* Label associated with the memory. */ + char label[MEMUSE_MAXLAB + 1]; +}; + +/* The log of allocations and frees. */ +static struct memuse_log_entry *memuse_log = NULL; +static volatile size_t memuse_log_size = 0; +static volatile size_t memuse_log_count = 0; +static volatile size_t memuse_log_done = 0; + +#define MEMUSE_INITLOG 1000000 +static void memuse_log_reallocate(size_t ind) { + + if (ind == 0) { + + /* Need to perform initialization. Be generous. */ + if ((memuse_log = (struct memuse_log_entry *)malloc( + sizeof(struct memuse_log_entry) * MEMUSE_INITLOG)) == NULL) + error("Failed to allocate memuse log."); + + /* Last action. */ + memuse_log_size = MEMUSE_INITLOG; + + } else { + struct memuse_log_entry *new_log; + if ((new_log = (struct memuse_log_entry *)malloc( + sizeof(struct memuse_log_entry) * memuse_log_size * 2)) == NULL) + error("Failed to re-allocate memuse log."); + + /* Wait for all writes to the old buffer to complete. */ + while (memuse_log_done < memuse_log_size) + ; + + /* Copy to new buffer. */ + memcpy(new_log, memuse_log, + sizeof(struct memuse_log_entry) * memuse_log_count); + free(memuse_log); + memuse_log = new_log; + + /* Last action. */ + memuse_log_size *= 2; + } +} + +/** + * @brief Log an allocation or deallocation of memory. + * + * @param label the label associated with the memory. + * @param ptr the memory pointer. + * @param allocated whether this is an allocation or deallocation. + * @param size the size in byte of memory allocated, set to 0 when + * deallocating. + */ +void memuse_log_allocation(const char *label, void *ptr, int allocated, + size_t size) { + size_t ind = atomic_inc(&memuse_log_count); + + /* If we are at the current size we need more space. */ + if (ind == memuse_log_size) memuse_log_reallocate(ind); + + /* Other threads wait for space. */ + while (ind > memuse_log_size) + ; + + /* Record the log. */ + memuse_log[ind].rank = engine_rank; + memuse_log[ind].step = engine_current_step; + memuse_log[ind].allocated = allocated; + memuse_log[ind].size = size; + memuse_log[ind].ptr = ptr; + strncpy(memuse_log[ind].label, label, MEMUSE_MAXLAB); + memuse_log[ind].label[MEMUSE_MAXLAB] = '\0'; + memuse_log[ind].dtic = getticks() - clocks_start_ticks; + atomic_inc(&memuse_log_done); +} + +/** + * @brief dump the log to a file and reset, if anything to dump. + * + * @param filename name of file for log dump. + */ +void memuse_log_dump(const char *filename) { + + /* Skip if nothing allocated this step. */ + if (memuse_log_count == 0) return; + + /* Open the output file. */ + FILE *fd; + if ((fd = fopen(filename, "w")) == NULL) + error("Failed to create memuse log file '%s'.", filename); + + /* Write a header. */ + fprintf(fd, "# Current use: %s\n", memuse_process(1)); + fprintf(fd, "# cpufreq: %lld\n", clocks_get_cpufreq()); + fprintf(fd, "# dtic adr rank step allocated label size\n"); + + for (size_t k = 0; k < memuse_log_count; k++) { + fprintf(fd, "%lld %p %d %d %d %s %zd\n", memuse_log[k].dtic, + memuse_log[k].ptr, memuse_log[k].rank, memuse_log[k].step, + memuse_log[k].allocated, memuse_log[k].label, memuse_log[k].size); + } + + /* Clear the log. */ + memuse_log_count = 0; + + /* Close the file. */ + fflush(fd); + fclose(fd); +} + +/** + * @brief dump the log for using the given rank to generate a standard + * name for the output. Used when exiting in error. + * + * @param rank the rank exiting in error. + */ +void memuse_log_dump_error(int rank) { + char filename[60]; + sprintf(filename, "memuse-error-report-rank%d.txt", rank); + memuse_log_dump(filename); +} + +#endif /* SWIFT_MEMUSE_REPORTS */ + +/** + * @brief parse the process /proc/self/statm file to get the process + * memory use (in KB). Top field in (). + * + * @param size total virtual memory (VIRT/VmSize) + * @param resident resident non-swapped memory (RES/VmRSS) + * @param shared shared (mmap'd) memory (SHR, RssFile+RssShmem) + * @param text text (exe) resident set (CODE, note also includes data + * segment, so is considered broken for Linux) + * @param data data+stack resident set (DATA, note also includes library, + * so is considered broken for Linux) + * @param library library resident set (0 for Linux) + * @param dirty dirty pages (nDRT = 0 for Linux) + */ +void memuse_use(long *size, long *resident, long *shared, long *text, + long *data, long *library, long *dirty) { + + /* Open the file. */ + FILE *file = fopen("/proc/self/statm", "r"); + if (file != NULL) { + int nscan = fscanf(file, "%ld %ld %ld %ld %ld %ld %ld", size, resident, + shared, text, library, data, dirty); + + if (nscan == 7) { + /* Convert pages into bytes. Usually 4096, but could be 512 on some + * systems so take care in conversion to KB. */ + long sz = sysconf(_SC_PAGESIZE); + *size *= sz; + *resident *= sz; + *shared *= sz; + *text *= sz; + *library *= sz; + *data *= sz; + *dirty *= sz; + + *size /= 1024; + *resident /= 1024; + *shared /= 1024; + *text /= 1024; + *library /= 1024; + *data /= 1024; + *dirty /= 1024; + } else { + error("Failed to read sufficient fields from /proc/self/statm"); + } + fclose(file); + } else { + error("Failed to open /proc/self/statm"); + } +} + +/** + * @brief Return a string with the current memory use of the process described. + * + * Not thread safe. + * + * @param inmb if true then report in MB, not KB. + * + * @result the memory use of the process, note make a copy if not used + * immediately. + */ +const char *memuse_process(int inmb) { + static char buffer[256]; + long size; + long resident; + long shared; + long text; + long library; + long data; + long dirty; + memuse_use(&size, &resident, &shared, &text, &data, &library, &dirty); + + if (inmb) { + snprintf(buffer, 256, + "VIRT = %.3f SHR = %.3f CODE = %.3f DATA = %.3f " + "RES = %.3f (MB)", + size / 1024.0, shared / 1024.0, text / 1024.0, data / 1024.0, + resident / 1024.0); + } else { + snprintf(buffer, 256, + "VIRT = %ld SHR = %ld CODE = %ld DATA = %ld " + "RES = %ld (KB)", + size, shared, text, data, resident); + } + return buffer; +} diff --git a/src/memuse.h b/src/memuse.h new file mode 100644 index 0000000000000000000000000000000000000000..751b7195aa8ad705d03b2490876993331e495c15 --- /dev/null +++ b/src/memuse.h @@ -0,0 +1,140 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_MEMUSE_H +#define SWIFT_MEMUSE_H + +/* Config parameters. */ +#include "../config.h" + +/* Includes. */ +#include <stdlib.h> + +/* API. */ +void memuse_use(long *size, long *resident, long *shared, long *text, + long *data, long *library, long *dirty); +const char *memuse_process(int inmb); + +#ifdef SWIFT_MEMUSE_REPORTS +void memuse_log_dump(const char *filename); +void memuse_log_dump_error(int rank); +void memuse_log_allocation(const char *label, void *ptr, int allocated, + size_t size); +#else + +/* No-op when not reporting. */ +#define memuse_log_allocation(label, ptr, allocated, size) +#endif + +/** + * @brief allocate aligned memory. The use and results are the same as the + * posix_memalign function. This function should be used for any + * significant allocations and consistently labelled. + * + * @param label a symbolic label for the memory, i.e. "parts". + * @param memptr pointer to the allocated memory. + * @param alignment alignment boundary. + * @param size the quantity of bytes to allocate. + * @result zero on success, otherwise an error code. + */ +__attribute__((always_inline)) inline int swift_memalign(const char *label, + void **memptr, + size_t alignment, + size_t size) { + int result = posix_memalign(memptr, alignment, size); +#ifdef SWIFT_MEMUSE_REPORTS + if (result == 0) { + memuse_log_allocation(label, *memptr, 1, size); + } else { + /* Failed allocations are interesting as well. */ + memuse_log_allocation(label, NULL, -1, size); + } +#endif + return result; +} + +/** + * @brief allocate memory. The use and results are the same as the + * malloc function. This function should be used for any + * _significant_ allocations and consistently labelled. + * Do not use this function for small or high frequency + * allocations in production code. + * + * @param label a symbolic label for the memory, i.e. "parts". + * @param size the quantity of bytes to allocate. + * @result pointer to the allocated memory or NULL on failure. + */ +__attribute__((always_inline)) inline void *swift_malloc(const char *label, + size_t size) { + void *memptr = malloc(size); +#ifdef SWIFT_MEMUSE_REPORTS + if (memptr != NULL) { + memuse_log_allocation(label, memptr, 1, size); + } else { + /* Failed allocations are interesting as well. */ + memuse_log_allocation(label, NULL, -1, size); + } +#endif + return memptr; +} + +/** + * @brief allocate zeroed memory. The use and results are the same as the + * calloc function. This function should be used for any + * _significant_ allocations and consistently labelled. + * Do not use this function for small or high frequency + * allocations in production code. + * + * @param label a symbolic label for the memory, i.e. "parts". + * @param nmemb number of element to allocate. + * @param size the size of each element in bytes. + * @result pointer to the allocated memory or NULL on failure. + */ +__attribute__((always_inline)) inline void *swift_calloc(const char *label, + size_t nmemb, + size_t size) { + void *memptr = calloc(nmemb, size); +#ifdef SWIFT_MEMUSE_REPORTS + if (memptr != NULL) { + memuse_log_allocation(label, memptr, 1, size * nmemb); + } else { + /* Failed allocations are interesting as well. */ + memuse_log_allocation(label, NULL, -1, size * nmemb); + } +#endif + return memptr; +} + +/** + * @brief free aligned memory. The use and results are the same as the + * free function. The label should match a prior call to swift_memalign + * or swift_malloc. + * + * @param label a symbolic label for the memory, i.e. "parts". + * @param ptr pointer to the allocated memory. + */ +__attribute__((always_inline)) inline void swift_free(const char *label, + void *ptr) { + free(ptr); +#ifdef SWIFT_MEMUSE_REPORTS + memuse_log_allocation(label, ptr, 0, 0); +#endif + return; +} + +#endif /* SWIFT_MEMUSE_H */ diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index e7005b083c94e20f5218923e443f71464ab383e1..bf0519842eb620e0ff4d8cd0cda05a9395c9722f 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -389,6 +389,8 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * N * N * (N_half + 1)); if (frho == NULL) error("Error allocating memory for transform of density mesh"); + memuse_log_allocation("fftw_frho", frho, 1, + sizeof(fftw_complex) * N * N * (N_half + 1)); /* Prepare the FFT library */ fftw_plan forward_plan = fftw_plan_dft_r2c_3d( @@ -537,6 +539,7 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, /* Clean-up the mess */ fftw_destroy_plan(forward_plan); fftw_destroy_plan(inverse_plan); + memuse_log_allocation("fftw_frho", frho, 0, 0); fftw_free(frho); #else @@ -636,6 +639,9 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); if (mesh->potential == NULL) error("Error allocating memory for the long-range gravity mesh."); + memuse_log_allocation("fftw_mesh.potential", mesh->potential, 1, + sizeof(double) * N * N * N); + #else error("No FFTW library found. Cannot compute periodic long-range forces."); #endif @@ -674,7 +680,10 @@ void pm_mesh_clean(struct pm_mesh* mesh) { fftw_cleanup_threads(); #endif - if (mesh->potential) free(mesh->potential); + if (mesh->potential) { + memuse_log_allocation("fftw_mesh.potential", mesh->potential, 0, 0); + free(mesh->potential); + } mesh->potential = 0; } @@ -718,6 +727,8 @@ void pm_mesh_struct_restore(struct pm_mesh* mesh, FILE* stream) { mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); if (mesh->potential == NULL) error("Error allocating memory for the long-range gravity mesh."); + memuse_log_allocation("fftw_mesh.potential", mesh->potential, 1, + sizeof(double) * N * N * N); #else error("No FFTW library found. Cannot compute periodic long-range forces."); #endif diff --git a/src/parallel_io.c b/src/parallel_io.c index b598d944dfe7c90510b036e7f9d85619ea103c59..0160a53c110c8913e42e0d7cd7c8720a9ed3d331 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -50,6 +50,7 @@ #include "hydro_properties.h" #include "io_properties.h" #include "kernel_hydro.h" +#include "memuse.h" #include "part.h" #include "part_type.h" #include "star_formation_io.h" @@ -472,7 +473,7 @@ void writeArray_chunk(struct engine* e, hid_t h_data, /* Allocate temporary buffer */ void* temp = NULL; - if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT, + if (swift_memalign("writebuff", (void**)&temp, IO_BUFFER_ALIGNMENT, num_elements * typeSize) != 0) error("Unable to allocate temporary i/o buffer"); @@ -556,7 +557,7 @@ void writeArray_chunk(struct engine* e, hid_t h_data, #endif /* Free and close everything */ - free(temp); + swift_free("writebuff", temp); H5Sclose(h_memspace); H5Sclose(h_filespace); } @@ -815,7 +816,7 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, /* Allocate memory to store SPH particles */ if (with_hydro) { *Ngas = N[0]; - if (posix_memalign((void**)parts, part_align, + if (swift_memalign("parts", (void**)parts, part_align, (*Ngas) * sizeof(struct part)) != 0) error("Error while allocating memory for particles"); bzero(*parts, *Ngas * sizeof(struct part)); @@ -824,7 +825,7 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, /* Allocate memory to store stars particles */ if (with_stars) { *Nstars = N[swift_type_stars]; - if (posix_memalign((void**)sparts, spart_align, + if (swift_memalign("sparts", (void**)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); @@ -836,7 +837,7 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + (with_stars ? N[swift_type_stars] : 0); - if (posix_memalign((void**)gparts, gpart_align, + if (swift_memalign("gparts", (void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); bzero(*gparts, *Ngparts * sizeof(struct gpart)); @@ -1487,10 +1488,12 @@ void write_output_parallel(struct engine* e, const char* baseName, Nparticles = Ngas_written; /* Allocate temporary arrays */ - if (posix_memalign((void**)&parts_written, part_align, + if (swift_memalign("parts_written", (void**)&parts_written, + part_align, Ngas_written * sizeof(struct part)) != 0) error("Error while allocating temporart memory for parts"); - if (posix_memalign((void**)&xparts_written, xpart_align, + if (swift_memalign("xparts_written", (void**)&xparts_written, + xpart_align, Ngas_written * sizeof(struct xpart)) != 0) error("Error while allocating temporart memory for xparts"); @@ -1535,13 +1538,15 @@ void write_output_parallel(struct engine* e, const char* baseName, Nparticles = Ndm_written; /* Allocate temporary array */ - if (posix_memalign((void**)&gparts_written, gpart_align, + if (swift_memalign("gparts_written", (void**)&gparts_written, + gpart_align, Ndm_written * sizeof(struct gpart)) != 0) error("Error while allocating temporart memory for gparts"); if (with_stf) { - if (posix_memalign( - (void**)&gpart_group_data_written, gpart_align, + if (swift_memalign( + "gpart_group_written", (void**)&gpart_group_data_written, + gpart_align, Ndm_written * sizeof(struct velociraptor_gpart_data)) != 0) error( "Error while allocating temporart memory for gparts STF " @@ -1582,7 +1587,8 @@ void write_output_parallel(struct engine* e, const char* baseName, Nparticles = Nstars_written; /* Allocate temporary arrays */ - if (posix_memalign((void**)&sparts_written, spart_align, + if (swift_memalign("sparts_written", (void**)&sparts_written, + spart_align, Nstars_written * sizeof(struct spart)) != 0) error("Error while allocating temporart memory for sparts"); @@ -1622,11 +1628,12 @@ void write_output_parallel(struct engine* e, const char* baseName, } /* Free temporary array */ - if (parts_written) free(parts_written); - if (xparts_written) free(xparts_written); - if (gparts_written) free(gparts_written); - if (gpart_group_data_written) free(gpart_group_data_written); - if (sparts_written) free(sparts_written); + if (parts_written) swift_free("parts_written", parts_written); + if (xparts_written) swift_free("xparts_written", xparts_written); + if (gparts_written) swift_free("gparts_written", gparts_written); + if (gpart_group_data_written) + swift_free("gpart_group_written", gpart_group_data_written); + if (sparts_written) swift_free("sparts_written", sparts_written); #ifdef IO_SPEED_MEASUREMENT MPI_Barrier(MPI_COMM_WORLD); diff --git a/src/proxy.c b/src/proxy.c index 4a67b4b3584c43b2df63f17303eba9ec5e742cb0..d9f27ad63cc4c888c14048de83a1576000aacac6 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -41,6 +41,7 @@ #include "cell.h" #include "engine.h" #include "error.h" +#include "memuse.h" #include "space.h" #ifdef WITH_MPI @@ -89,9 +90,9 @@ void proxy_tags_exchange(struct proxy *proxies, int num_proxies, /* Allocate the tags. */ int *tags_in = NULL; int *tags_out = NULL; - if (posix_memalign((void **)&tags_in, SWIFT_CACHE_ALIGNMENT, + if (swift_memalign("tags_in", (void **)&tags_in, SWIFT_CACHE_ALIGNMENT, sizeof(int) * count_in) != 0 || - posix_memalign((void **)&tags_out, SWIFT_CACHE_ALIGNMENT, + swift_memalign("tags_out", (void **)&tags_out, SWIFT_CACHE_ALIGNMENT, sizeof(int) * count_out) != 0) error("Failed to allocate tags buffers."); @@ -167,8 +168,8 @@ void proxy_tags_exchange(struct proxy *proxies, int num_proxies, error("MPI_Waitall on sends failed."); /* Clean up. */ - free(tags_in); - free(tags_out); + swift_free("tags_in", tags_in); + swift_free("tags_out", tags_out); free(reqs_in); free(cids_in); @@ -204,10 +205,12 @@ void proxy_cells_exchange_first(struct proxy *p) { // p->size_pcells_out , p->mynodeID , p->nodeID ); fflush(stdout); /* Allocate and fill the pcell buffer. */ - if (p->pcells_out != NULL) free(p->pcells_out); - if (posix_memalign((void **)&p->pcells_out, SWIFT_STRUCT_ALIGNMENT, + if (p->pcells_out != NULL) swift_free("pcells_out", p->pcells_out); + if (swift_memalign("pcells_out", (void **)&p->pcells_out, + SWIFT_STRUCT_ALIGNMENT, sizeof(struct pcell) * p->size_pcells_out) != 0) error("Failed to allocate pcell_out buffer."); + for (int ind = 0, k = 0; k < p->nr_cells_out; k++) { memcpy(&p->pcells_out[ind], p->cells_out[k]->mpi.pcell, sizeof(struct pcell) * p->cells_out[k]->mpi.pcell_size); @@ -250,8 +253,9 @@ void proxy_cells_exchange_second(struct proxy *p) { #ifdef WITH_MPI /* Re-allocate the pcell_in buffer. */ - if (p->pcells_in != NULL) free(p->pcells_in); - if (posix_memalign((void **)&p->pcells_in, SWIFT_STRUCT_ALIGNMENT, + if (p->pcells_in != NULL) swift_free("pcells_in", p->pcells_in); + if (swift_memalign("pcells_in", (void **)&p->pcells_in, + SWIFT_STRUCT_ALIGNMENT, sizeof(struct pcell) * p->size_pcells_in) != 0) error("Failed to allocate pcell_in buffer."); @@ -397,7 +401,7 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, /* Allocate the pcells. */ struct pcell *pcells = NULL; - if (posix_memalign((void **)&pcells, SWIFT_CACHE_ALIGNMENT, + if (swift_memalign("pcells", (void **)&pcells, SWIFT_CACHE_ALIGNMENT, sizeof(struct pcell) * count_out) != 0) error("Failed to allocate pcell buffer."); @@ -467,7 +471,7 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, /* Clean up. */ free(reqs); - free(pcells); + swift_free("pcells", pcells); #else error("SWIFT was not compiled with MPI support."); @@ -646,30 +650,30 @@ void proxy_parts_exchange_second(struct proxy *p) { do { p->size_parts_in *= proxy_buffgrow; } while (p->nr_parts_in > p->size_parts_in); - free(p->parts_in); - free(p->xparts_in); - if ((p->parts_in = (struct part *)malloc(sizeof(struct part) * - p->size_parts_in)) == NULL || - (p->xparts_in = (struct xpart *)malloc(sizeof(struct xpart) * - p->size_parts_in)) == NULL) + swift_free("parts_in", p->parts_in); + swift_free("xparts_in", p->xparts_in); + if ((p->parts_in = (struct part *)swift_malloc( + "parts_in", sizeof(struct part) * p->size_parts_in)) == NULL || + (p->xparts_in = (struct xpart *)swift_malloc( + "xparts_in", sizeof(struct xpart) * p->size_parts_in)) == NULL) error("Failed to re-allocate parts_in buffers."); } if (p->nr_gparts_in > p->size_gparts_in) { do { p->size_gparts_in *= proxy_buffgrow; } while (p->nr_gparts_in > p->size_gparts_in); - free(p->gparts_in); - if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * - p->size_gparts_in)) == NULL) + swift_free("gparts_in", p->gparts_in); + if ((p->gparts_in = (struct gpart *)swift_malloc( + "gparts_in", sizeof(struct gpart) * p->size_gparts_in)) == NULL) error("Failed to re-allocate gparts_in buffers."); } if (p->nr_sparts_in > p->size_sparts_in) { do { p->size_sparts_in *= proxy_buffgrow; } while (p->nr_sparts_in > p->size_sparts_in); - free(p->sparts_in); - if ((p->sparts_in = (struct spart *)malloc(sizeof(struct spart) * - p->size_sparts_in)) == NULL) + swift_free("sparts_in", p->sparts_in); + if ((p->sparts_in = (struct spart *)swift_malloc( + "sparts_in", sizeof(struct spart) * p->size_sparts_in)) == NULL) error("Failed to re-allocate sparts_in buffers."); } @@ -725,15 +729,15 @@ void proxy_parts_load(struct proxy *p, const struct part *parts, } while (p->nr_parts_out + N > p->size_parts_out); struct part *tp = NULL; struct xpart *txp = NULL; - if ((tp = (struct part *)malloc(sizeof(struct part) * p->size_parts_out)) == - NULL || - (txp = (struct xpart *)malloc(sizeof(struct xpart) * - p->size_parts_out)) == NULL) + if ((tp = (struct part *)swift_malloc( + "parts_out", sizeof(struct part) * p->size_parts_out)) == NULL || + (txp = (struct xpart *)swift_malloc( + "xparts_out", sizeof(struct xpart) * p->size_parts_out)) == NULL) error("Failed to re-allocate parts_out buffers."); memcpy(tp, p->parts_out, sizeof(struct part) * p->nr_parts_out); memcpy(txp, p->xparts_out, sizeof(struct xpart) * p->nr_parts_out); - free(p->parts_out); - free(p->xparts_out); + swift_free("parts_out", p->parts_out); + swift_free("xparts_out", p->xparts_out); p->parts_out = tp; p->xparts_out = txp; } @@ -761,11 +765,11 @@ void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) { p->size_gparts_out *= proxy_buffgrow; } while (p->nr_gparts_out + N > p->size_gparts_out); struct gpart *tp; - if ((tp = (struct gpart *)malloc(sizeof(struct gpart) * - p->size_gparts_out)) == NULL) + if ((tp = (struct gpart *)swift_malloc( + "gparts_out", sizeof(struct gpart) * p->size_gparts_out)) == NULL) error("Failed to re-allocate gparts_out buffers."); memcpy(tp, p->gparts_out, sizeof(struct gpart) * p->nr_gparts_out); - free(p->gparts_out); + swift_free("gparts_out", p->gparts_out); p->gparts_out = tp; } @@ -791,11 +795,11 @@ void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N) { p->size_sparts_out *= proxy_buffgrow; } while (p->nr_sparts_out + N > p->size_sparts_out); struct spart *tp; - if ((tp = (struct spart *)malloc(sizeof(struct spart) * - p->size_sparts_out)) == NULL) + if ((tp = (struct spart *)swift_malloc( + "sparts_out", sizeof(struct spart) * p->size_sparts_out)) == NULL) error("Failed to re-allocate sparts_out buffers."); memcpy(tp, p->sparts_out, sizeof(struct spart) * p->nr_sparts_out); - free(p->sparts_out); + swift_free("sparts_out", p->sparts_out); p->sparts_out = tp; } @@ -844,19 +848,19 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { /* Allocate the part send and receive buffers, if needed. */ if (p->parts_in == NULL) { p->size_parts_in = proxy_buffinit; - if ((p->parts_in = (struct part *)malloc(sizeof(struct part) * - p->size_parts_in)) == NULL || - (p->xparts_in = (struct xpart *)malloc(sizeof(struct xpart) * - p->size_parts_in)) == NULL) + if ((p->parts_in = (struct part *)swift_malloc( + "parts_in", sizeof(struct part) * p->size_parts_in)) == NULL || + (p->xparts_in = (struct xpart *)swift_malloc( + "xparts_in", sizeof(struct xpart) * p->size_parts_in)) == NULL) error("Failed to allocate parts_in buffers."); } p->nr_parts_in = 0; if (p->parts_out == NULL) { p->size_parts_out = proxy_buffinit; - if ((p->parts_out = (struct part *)malloc(sizeof(struct part) * - p->size_parts_out)) == NULL || - (p->xparts_out = (struct xpart *)malloc(sizeof(struct xpart) * - p->size_parts_out)) == NULL) + if ((p->parts_out = (struct part *)swift_malloc( + "parts_out", sizeof(struct part) * p->size_parts_out)) == NULL || + (p->xparts_out = (struct xpart *)swift_malloc( + "xparts_out", sizeof(struct xpart) * p->size_parts_out)) == NULL) error("Failed to allocate parts_out buffers."); } p->nr_parts_out = 0; @@ -864,15 +868,15 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { /* Allocate the gpart send and receive buffers, if needed. */ if (p->gparts_in == NULL) { p->size_gparts_in = proxy_buffinit; - if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * - p->size_gparts_in)) == NULL) + if ((p->gparts_in = (struct gpart *)swift_malloc( + "gparts_in", sizeof(struct gpart) * p->size_gparts_in)) == NULL) error("Failed to allocate gparts_in buffers."); } p->nr_gparts_in = 0; if (p->gparts_out == NULL) { p->size_gparts_out = proxy_buffinit; - if ((p->gparts_out = (struct gpart *)malloc(sizeof(struct gpart) * - p->size_gparts_out)) == NULL) + if ((p->gparts_out = (struct gpart *)swift_malloc( + "gparts_out", sizeof(struct gpart) * p->size_gparts_out)) == NULL) error("Failed to allocate gparts_out buffers."); } p->nr_gparts_out = 0; @@ -880,15 +884,15 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { /* Allocate the spart send and receive buffers, if needed. */ if (p->sparts_in == NULL) { p->size_sparts_in = proxy_buffinit; - if ((p->sparts_in = (struct spart *)malloc(sizeof(struct spart) * - p->size_sparts_in)) == NULL) + if ((p->sparts_in = (struct spart *)swift_malloc( + "sparts_in", sizeof(struct spart) * p->size_sparts_in)) == NULL) error("Failed to allocate sparts_in buffers."); } p->nr_sparts_in = 0; if (p->sparts_out == NULL) { p->size_sparts_out = proxy_buffinit; - if ((p->sparts_out = (struct spart *)malloc(sizeof(struct spart) * - p->size_sparts_out)) == NULL) + if ((p->sparts_out = (struct spart *)swift_malloc( + "sparts_out", sizeof(struct spart) * p->size_sparts_out)) == NULL) error("Failed to allocate sparts_out buffers."); } p->nr_sparts_out = 0; diff --git a/src/runner.c b/src/runner.c index 4f1f97693baf29bf1e6f3846b41920a75112a621..65a33863aa2a9eff297cd637c855b653b2dddcf4 100644 --- a/src/runner.c +++ b/src/runner.c @@ -55,6 +55,7 @@ #include "hydro_properties.h" #include "kick.h" #include "logger.h" +#include "memuse.h" #include "minmax.h" #include "runner_doiact_vec.h" #include "scheduler.h" @@ -818,14 +819,8 @@ void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current; #endif - /* start by allocating the entry arrays in the requested dimensions. */ - for (int j = 0; j < 13; j++) { - if ((flags & (1 << j)) && c->hydro.sort[j] == NULL) { - if ((c->hydro.sort[j] = (struct entry *)malloc(sizeof(struct entry) * - (count + 1))) == NULL) - error("Failed to allocate sort memory."); - } - } + /* Allocate memory for sorting. */ + cell_malloc_hydro_sorts(c, flags); /* Does this cell have any progeny? */ if (c->split) { @@ -1044,13 +1039,7 @@ void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, #endif /* start by allocating the entry arrays in the requested dimensions. */ - for (int j = 0; j < 13; j++) { - if ((flags & (1 << j)) && c->stars.sort[j] == NULL) { - if ((c->stars.sort[j] = (struct entry *)malloc(sizeof(struct entry) * - (count + 1))) == NULL) - error("Failed to allocate sort memory."); - } - } + cell_malloc_stars_sorts(c, flags); /* Does this cell have any progeny? */ if (c->split) { diff --git a/src/scheduler.c b/src/scheduler.c index 729b566e3be7d20bc5a73fc1692b0911885b0efe..496d15ab302e6f0ea1280594ec16c433b117eb32 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -46,6 +46,7 @@ #include "error.h" #include "intrinsics.h" #include "kernel_hydro.h" +#include "memuse.h" #include "queue.h" #include "sort_part.h" #include "space.h" @@ -67,9 +68,10 @@ static void scheduler_extend_unlocks(struct scheduler *s) { /* Allocate the new buffer. */ const int size_unlocks_new = s->size_unlocks * 2; - struct task **unlocks_new = - (struct task **)malloc(sizeof(struct task *) * size_unlocks_new); - int *unlock_ind_new = (int *)malloc(sizeof(int) * size_unlocks_new); + struct task **unlocks_new = (struct task **)swift_malloc( + "unlocks", sizeof(struct task *) * size_unlocks_new); + int *unlock_ind_new = + (int *)swift_malloc("unlock_ind", sizeof(int) * size_unlocks_new); if (unlocks_new == NULL || unlock_ind_new == NULL) error("Failed to re-allocate unlocks."); @@ -80,8 +82,8 @@ static void scheduler_extend_unlocks(struct scheduler *s) { /* Copy the buffers. */ memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * s->size_unlocks); memcpy(unlock_ind_new, s->unlock_ind, sizeof(int) * s->size_unlocks); - free(s->unlocks); - free(s->unlock_ind); + swift_free("unlocks", s->unlocks); + swift_free("unlock_ind", s->unlock_ind); s->unlocks = unlocks_new; s->unlock_ind = unlock_ind_new; @@ -1321,7 +1323,8 @@ void scheduler_set_unlocks(struct scheduler *s) { /* Store the counts for each task. */ short int *counts; - if ((counts = (short int *)malloc(sizeof(short int) * s->nr_tasks)) == NULL) + if ((counts = (short int *)swift_malloc( + "counts", sizeof(short int) * s->nr_tasks)) == NULL) error("Failed to allocate temporary counts array."); bzero(counts, sizeof(short int) * s->nr_tasks); for (int k = 0; k < s->nr_unlocks; k++) { @@ -1339,7 +1342,8 @@ void scheduler_set_unlocks(struct scheduler *s) { /* Compute the offset for each unlock block. */ int *offsets; - if ((offsets = (int *)malloc(sizeof(int) * (s->nr_tasks + 1))) == NULL) + if ((offsets = (int *)swift_malloc("offsets", + sizeof(int) * (s->nr_tasks + 1))) == NULL) error("Failed to allocate temporary offsets array."); offsets[0] = 0; for (int k = 0; k < s->nr_tasks; k++) { @@ -1353,8 +1357,8 @@ void scheduler_set_unlocks(struct scheduler *s) { /* Create and fill a temporary array with the sorted unlocks. */ struct task **unlocks; - if ((unlocks = (struct task **)malloc(sizeof(struct task *) * - s->size_unlocks)) == NULL) + if ((unlocks = (struct task **)swift_malloc( + "unlocks", sizeof(struct task *) * s->size_unlocks)) == NULL) error("Failed to allocate temporary unlocks array."); for (int k = 0; k < s->nr_unlocks; k++) { const int ind = s->unlock_ind[k]; @@ -1363,7 +1367,7 @@ void scheduler_set_unlocks(struct scheduler *s) { } /* Swap the unlocks. */ - free(s->unlocks); + swift_free("unlocks", s->unlocks); s->unlocks = unlocks; /* Re-set the offsets. */ @@ -1395,8 +1399,8 @@ void scheduler_set_unlocks(struct scheduler *s) { #endif /* Clean up. */ - free(counts); - free(offsets); + swift_free("counts", counts); + swift_free("offsets", offsets); } /** @@ -1478,14 +1482,16 @@ void scheduler_reset(struct scheduler *s, int size) { scheduler_free_tasks(s); /* Allocate the new lists. */ - if (posix_memalign((void **)&s->tasks, task_align, + if (swift_memalign("tasks", (void **)&s->tasks, task_align, size * sizeof(struct task)) != 0) error("Failed to allocate task array."); - if ((s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL) + if ((s->tasks_ind = (int *)swift_malloc("tasks_ind", sizeof(int) * size)) == + NULL) error("Failed to allocate task lists."); - if ((s->tid_active = (int *)malloc(sizeof(int) * size)) == NULL) + if ((s->tid_active = + (int *)swift_malloc("tid_active", sizeof(int) * size)) == NULL) error("Failed to allocate aactive task lists."); } @@ -2147,7 +2153,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, lock_init(&s->lock); /* Allocate the queues. */ - if (posix_memalign((void **)&s->queues, queue_struct_align, + if (swift_memalign("queues", (void **)&s->queues, queue_struct_align, sizeof(struct queue) * nr_queues) != 0) error("Failed to allocate queues."); @@ -2160,10 +2166,11 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, error("Failed to initialize sleep barrier."); /* Init the unlocks. */ - if ((s->unlocks = (struct task **)malloc( - sizeof(struct task *) * scheduler_init_nr_unlocks)) == NULL || - (s->unlock_ind = - (int *)malloc(sizeof(int) * scheduler_init_nr_unlocks)) == NULL) + if ((s->unlocks = (struct task **)swift_malloc( + "unlocks", sizeof(struct task *) * scheduler_init_nr_unlocks)) == + NULL || + (s->unlock_ind = (int *)swift_malloc( + "unlock_ind", sizeof(int) * scheduler_init_nr_unlocks)) == NULL) error("Failed to allocate unlocks."); s->nr_unlocks = 0; s->size_unlocks = scheduler_init_nr_unlocks; @@ -2214,10 +2221,10 @@ void scheduler_print_tasks(const struct scheduler *s, const char *fileName) { void scheduler_clean(struct scheduler *s) { scheduler_free_tasks(s); - free(s->unlocks); - free(s->unlock_ind); + swift_free("unlocks", s->unlocks); + swift_free("unlock_ind", s->unlock_ind); for (int i = 0; i < s->nr_queues; ++i) queue_clean(&s->queues[i]); - free(s->queues); + swift_free("queues", s->queues); } /** @@ -2226,15 +2233,15 @@ void scheduler_clean(struct scheduler *s) { void scheduler_free_tasks(struct scheduler *s) { if (s->tasks != NULL) { - free(s->tasks); + swift_free("tasks", s->tasks); s->tasks = NULL; } if (s->tasks_ind != NULL) { - free(s->tasks_ind); + swift_free("tasks_ind", s->tasks_ind); s->tasks_ind = NULL; } if (s->tid_active != NULL) { - free(s->tid_active); + swift_free("tid_active", s->tid_active); s->tid_active = NULL; } s->size = 0; diff --git a/src/serial_io.c b/src/serial_io.c index b55f4780814fe45544a5680e95e67f55194fc5e8..55a468411b7b9f84d4e05b8f388a5de7873f7f59 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -50,6 +50,7 @@ #include "hydro_properties.h" #include "io_properties.h" #include "kernel_hydro.h" +#include "memuse.h" #include "part.h" #include "part_type.h" #include "star_formation_io.h" @@ -364,7 +365,7 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, /* Allocate temporary buffer */ void* temp = NULL; - if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT, + if (swift_memalign("writebuff", (void**)&temp, IO_BUFFER_ALIGNMENT, num_elements * typeSize) != 0) error("Unable to allocate temporary i/o buffer"); @@ -415,7 +416,7 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, if (h_err < 0) error("Error while writing data array '%s'.", props.name); /* Free and close everything */ - free(temp); + swift_free("writebuff", temp); H5Dclose(h_data); H5Sclose(h_memspace); H5Sclose(h_filespace); @@ -614,7 +615,7 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, /* Allocate memory to store SPH particles */ if (with_hydro) { *Ngas = N[0]; - if (posix_memalign((void**)parts, part_align, + if (swift_memalign("parts", (void**)parts, part_align, *Ngas * sizeof(struct part)) != 0) error("Error while allocating memory for SPH particles"); bzero(*parts, *Ngas * sizeof(struct part)); @@ -623,7 +624,7 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, /* Allocate memory to store stars particles */ if (with_stars) { *Nstars = N[swift_type_stars]; - if (posix_memalign((void**)sparts, spart_align, + if (swift_memalign("sparts", (void**)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); @@ -635,7 +636,7 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + (with_stars ? N[swift_type_stars] : 0); - if (posix_memalign((void**)gparts, gpart_align, + if (swift_memalign("gparts", (void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); bzero(*gparts, *Ngparts * sizeof(struct gpart)); @@ -1132,10 +1133,12 @@ void write_output_serial(struct engine* e, const char* baseName, Nparticles = Ngas_written; /* Allocate temporary arrays */ - if (posix_memalign((void**)&parts_written, part_align, + if (swift_memalign("parts_written", (void**)&parts_written, + part_align, Ngas_written * sizeof(struct part)) != 0) error("Error while allocating temporart memory for parts"); - if (posix_memalign((void**)&xparts_written, xpart_align, + if (swift_memalign("xparts_written", (void**)&xparts_written, + xpart_align, Ngas_written * sizeof(struct xpart)) != 0) error("Error while allocating temporart memory for xparts"); @@ -1181,12 +1184,14 @@ void write_output_serial(struct engine* e, const char* baseName, Nparticles = Ndm_written; /* Allocate temporary array */ - if (posix_memalign((void**)&gparts_written, gpart_align, + if (swift_memalign("gparts_written", (void**)&gparts_written, + gpart_align, Ndm_written * sizeof(struct gpart)) != 0) error("Error while allocating temporart memory for gparts"); if (with_stf) { - if (posix_memalign( + if (swift_memalign( + "gpart_group_written", (void**)&gpart_group_data_written, gpart_align, Ndm_written * sizeof(struct velociraptor_gpart_data)) != 0) @@ -1229,7 +1234,8 @@ void write_output_serial(struct engine* e, const char* baseName, Nparticles = Nstars_written; /* Allocate temporary arrays */ - if (posix_memalign((void**)&sparts_written, spart_align, + if (swift_memalign("sparts_written", (void**)&sparts_written, + spart_align, Nstars_written * sizeof(struct spart)) != 0) error("Error while allocating temporart memory for sparts"); @@ -1270,11 +1276,12 @@ void write_output_serial(struct engine* e, const char* baseName, } /* Free temporary array */ - if (parts_written) free(parts_written); - if (xparts_written) free(xparts_written); - if (gparts_written) free(gparts_written); - if (gpart_group_data_written) free(gpart_group_data_written); - if (sparts_written) free(sparts_written); + if (parts_written) swift_free("parts_written", parts_written); + if (xparts_written) swift_free("xparts_written", xparts_written); + if (gparts_written) swift_free("gparts_written", gparts_written); + if (gpart_group_data_written) + swift_free("gpart_group_written", gpart_group_data_written); + if (sparts_written) swift_free("sparts_written", sparts_written); /* Close particle group */ H5Gclose(h_grp); diff --git a/src/single_io.c b/src/single_io.c index 4b23310ee02f3b485eff1a0358de850f497a3478..917dd880495300cea8f5d16df85513c6386b53f2 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -49,6 +49,7 @@ #include "hydro_properties.h" #include "io_properties.h" #include "kernel_hydro.h" +#include "memuse.h" #include "part.h" #include "part_type.h" #include "star_formation_io.h" @@ -240,7 +241,7 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, /* Allocate temporary buffer */ void* temp = NULL; - if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT, + if (swift_memalign("writebuff", (void**)&temp, IO_BUFFER_ALIGNMENT, num_elements * typeSize) != 0) error("Unable to allocate temporary i/o buffer"); @@ -332,7 +333,7 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, io_write_attribute_s(h_data, "Conversion factor", buffer); /* Free and close everything */ - free(temp); + swift_free("writebuff", temp); H5Pclose(h_prop); H5Dclose(h_data); H5Sclose(h_space); @@ -502,7 +503,7 @@ void read_ic_single(const char* fileName, /* Allocate memory to store SPH particles */ if (with_hydro) { *Ngas = N[swift_type_gas]; - if (posix_memalign((void**)parts, part_align, + if (swift_memalign("parts", (void**)parts, part_align, *Ngas * sizeof(struct part)) != 0) error("Error while allocating memory for SPH particles"); bzero(*parts, *Ngas * sizeof(struct part)); @@ -511,7 +512,7 @@ void read_ic_single(const char* fileName, /* Allocate memory to store star particles */ if (with_stars) { *Nstars = N[swift_type_stars]; - if (posix_memalign((void**)sparts, spart_align, + if (swift_memalign("sparts", (void**)sparts, spart_align, *Nstars * sizeof(struct spart)) != 0) error("Error while allocating memory for stars particles"); bzero(*sparts, *Nstars * sizeof(struct spart)); @@ -523,7 +524,7 @@ void read_ic_single(const char* fileName, *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + (with_stars ? N[swift_type_stars] : 0); - if (posix_memalign((void**)gparts, gpart_align, + if (swift_memalign("gparts", (void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); bzero(*gparts, *Ngparts * sizeof(struct gpart)); @@ -935,10 +936,12 @@ void write_output_single(struct engine* e, const char* baseName, N = Ngas_written; /* Allocate temporary arrays */ - if (posix_memalign((void**)&parts_written, part_align, + if (swift_memalign("parts_written", (void**)&parts_written, + part_align, Ngas_written * sizeof(struct part)) != 0) error("Error while allocating temporart memory for parts"); - if (posix_memalign((void**)&xparts_written, xpart_align, + if (swift_memalign("xparts_written", (void**)&xparts_written, + xpart_align, Ngas_written * sizeof(struct xpart)) != 0) error("Error while allocating temporart memory for xparts"); @@ -983,13 +986,15 @@ void write_output_single(struct engine* e, const char* baseName, N = Ndm_written; /* Allocate temporary array */ - if (posix_memalign((void**)&gparts_written, gpart_align, + if (swift_memalign("gparts_written", (void**)&gparts_written, + gpart_align, Ndm_written * sizeof(struct gpart)) != 0) error("Error while allocating temporart memory for gparts"); if (with_stf) { - if (posix_memalign( - (void**)&gpart_group_data_written, gpart_align, + if (swift_memalign( + "gpart_group_written", (void**)&gpart_group_data_written, + gpart_align, Ndm_written * sizeof(struct velociraptor_gpart_data)) != 0) error( "Error while allocating temporart memory for gparts STF " @@ -1028,7 +1033,8 @@ void write_output_single(struct engine* e, const char* baseName, N = Nstars_written; /* Allocate temporary arrays */ - if (posix_memalign((void**)&sparts_written, spart_align, + if (swift_memalign("sparts_written", (void**)&sparts_written, + spart_align, Nstars_written * sizeof(struct spart)) != 0) error("Error while allocating temporart memory for sparts"); @@ -1068,11 +1074,12 @@ void write_output_single(struct engine* e, const char* baseName, } /* Free temporary arrays */ - if (parts_written) free(parts_written); - if (xparts_written) free(xparts_written); - if (gparts_written) free(gparts_written); - if (gpart_group_data_written) free(gpart_group_data_written); - if (sparts_written) free(sparts_written); + if (parts_written) swift_free("parts_written", parts_written); + if (xparts_written) swift_free("xparts_written", xparts_written); + if (gparts_written) swift_free("gparts_written", gparts_written); + if (gpart_group_data_written) + swift_free("gpart_group_written", gpart_group_data_written); + if (sparts_written) swift_free("sparts_written", sparts_written); /* Close particle group */ H5Gclose(h_grp); diff --git a/src/space.c b/src/space.c index 0afb3a1fddbf7fa94d1ff1c1522a6fc1387e7b9b..f92830061085dcd439e6165977f693f8f00171a4 100644 --- a/src/space.c +++ b/src/space.c @@ -52,6 +52,7 @@ #include "kernel_hydro.h" #include "lock.h" #include "memswap.h" +#include "memuse.h" #include "minmax.h" #include "multipole.h" #include "restart.h" @@ -260,16 +261,9 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, #endif if (s->with_self_gravity) bzero(c->grav.multipole, sizeof(struct gravity_tensors)); - for (int i = 0; i < 13; i++) { - if (c->hydro.sort[i] != NULL) { - free(c->hydro.sort[i]); - c->hydro.sort[i] = NULL; - } - if (c->stars.sort[i] != NULL) { - free(c->stars.sort[i]); - c->stars.sort[i] = NULL; - } - } + + cell_free_hydro_sorts(c); + cell_free_stars_sorts(c); #if WITH_MPI c->mpi.tag = -1; @@ -319,17 +313,17 @@ void space_free_foreign_parts(struct space *s) { #ifdef WITH_MPI if (s->parts_foreign != NULL) { - free(s->parts_foreign); + swift_free("parts_foreign", s->parts_foreign); s->size_parts_foreign = 0; s->parts_foreign = NULL; } if (s->gparts_foreign != NULL) { - free(s->gparts_foreign); + swift_free("gparts_foreign", s->gparts_foreign); s->size_gparts_foreign = 0; s->gparts_foreign = NULL; } if (s->sparts_foreign != NULL) { - free(s->sparts_foreign); + swift_free("sparts_foreign", s->sparts_foreign); s->size_sparts_foreign = 0; s->sparts_foreign = NULL; } @@ -443,7 +437,8 @@ void space_regrid(struct space *s, int verbose) { oldwidth[1] = s->width[1]; oldwidth[2] = s->width[2]; - if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) + if ((oldnodeIDs = + (int *)swift_malloc("nodeIDs", sizeof(int) * s->nr_cells)) == NULL) error("Failed to allocate temporary nodeIDs."); int cid = 0; @@ -476,12 +471,13 @@ void space_regrid(struct space *s, int verbose) { /* Free the old cells, if they were allocated. */ if (s->cells_top != NULL) { space_free_cells(s); - free(s->local_cells_with_tasks_top); - free(s->local_cells_top); - free(s->cells_with_particles_top); - free(s->local_cells_with_particles_top); - free(s->cells_top); - free(s->multipoles_top); + swift_free("local_cells_with_tasks_top", s->local_cells_with_tasks_top); + swift_free("local_cells_top", s->local_cells_top); + swift_free("cells_with_particles_top", s->cells_with_particles_top); + swift_free("local_cells_with_particles_top", + s->local_cells_with_particles_top); + swift_free("cells_top", s->cells_top); + swift_free("multipoles_top", s->multipoles_top); } /* Also free the task arrays, these will be regenerated and we can use the @@ -498,39 +494,44 @@ void space_regrid(struct space *s, int verbose) { /* Allocate the highest level of cells. */ s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2]; - if (posix_memalign((void **)&s->cells_top, cell_align, + + if (swift_memalign("cells_top", (void **)&s->cells_top, cell_align, s->nr_cells * sizeof(struct cell)) != 0) error("Failed to allocate top-level cells."); bzero(s->cells_top, s->nr_cells * sizeof(struct cell)); /* Allocate the multipoles for the top-level cells. */ if (s->with_self_gravity) { - if (posix_memalign((void **)&s->multipoles_top, multipole_align, + if (swift_memalign("multipoles_top", (void **)&s->multipoles_top, + multipole_align, s->nr_cells * sizeof(struct gravity_tensors)) != 0) error("Failed to allocate top-level multipoles."); bzero(s->multipoles_top, s->nr_cells * sizeof(struct gravity_tensors)); } /* Allocate the indices of local cells */ - if (posix_memalign((void **)&s->local_cells_top, SWIFT_STRUCT_ALIGNMENT, - s->nr_cells * sizeof(int)) != 0) + if (swift_memalign("local_cells_top", (void **)&s->local_cells_top, + SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) error("Failed to allocate indices of local top-level cells."); bzero(s->local_cells_top, s->nr_cells * sizeof(int)); /* Allocate the indices of local cells with tasks */ - if (posix_memalign((void **)&s->local_cells_with_tasks_top, + if (swift_memalign("local_cells_with_tasks_top", + (void **)&s->local_cells_with_tasks_top, SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) error("Failed to allocate indices of local top-level cells with tasks."); bzero(s->local_cells_with_tasks_top, s->nr_cells * sizeof(int)); /* Allocate the indices of cells with particles */ - if (posix_memalign((void **)&s->cells_with_particles_top, + if (swift_memalign("cells_with_particles_top", + (void **)&s->cells_with_particles_top, SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) error("Failed to allocate indices of top-level cells with particles."); bzero(s->cells_with_particles_top, s->nr_cells * sizeof(int)); /* Allocate the indices of local cells with particles */ - if (posix_memalign((void **)&s->local_cells_with_particles_top, + if (swift_memalign("local_cells_with_particles_top", + (void **)&s->local_cells_with_particles_top, SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) error( "Failed to allocate indices of local top-level cells with " @@ -632,7 +633,7 @@ void space_regrid(struct space *s, int verbose) { engine_makeproxies(s->e); /* Finished with these. */ - free(oldnodeIDs); + swift_free("nodeIDs", oldnodeIDs); } else if (no_regrid && s->e != NULL) { /* If we have created the top-levels cells and not done an initial @@ -752,12 +753,12 @@ void space_allocate_extras(struct space *s, int verbose) { /* Create more space for parts */ struct gpart *gparts_new = NULL; - if (posix_memalign((void **)&gparts_new, gpart_align, + if (swift_memalign("gparts", (void **)&gparts_new, gpart_align, sizeof(struct gpart) * size_gparts) != 0) error("Failed to allocate new gpart data"); const ptrdiff_t delta = gparts_new - s->gparts; memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->size_gparts); - free(s->gparts); + swift_free("gparts", s->gparts); s->gparts = gparts_new; /* Update the counter */ @@ -843,20 +844,20 @@ void space_allocate_extras(struct space *s, int verbose) { /* Create more space for parts */ struct part *parts_new = NULL; - if (posix_memalign((void **)&parts_new, part_align, + if (swift_memalign("parts", (void **)&parts_new, part_align, sizeof(struct part) * size_parts) != 0) error("Failed to allocate new part data"); memcpy(parts_new, s->parts, sizeof(struct part) * s->size_parts); - free(s->parts); + swift_free("parts", s->parts); s->parts = parts_new; /* Same for xparts */ struct xpart *xparts_new = NULL; - if (posix_memalign((void **)&xparts_new, xpart_align, + if (swift_memalign("xparts", (void **)&xparts_new, xpart_align, sizeof(struct xpart) * size_parts) != 0) error("Failed to allocate new xpart data"); memcpy(xparts_new, s->xparts, sizeof(struct xpart) * s->size_parts); - free(s->xparts); + swift_free("xparts", s->xparts); s->xparts = xparts_new; /* Update the counter */ @@ -932,11 +933,11 @@ void space_allocate_extras(struct space *s, int verbose) { /* Create more space for parts */ struct spart *sparts_new = NULL; - if (posix_memalign((void **)&sparts_new, spart_align, + if (swift_memalign("sparts", (void **)&sparts_new, spart_align, sizeof(struct spart) * size_sparts) != 0) error("Failed to allocate new spart data"); memcpy(sparts_new, s->sparts, sizeof(struct spart) * s->size_sparts); - free(s->sparts); + swift_free("sparts", s->sparts); s->sparts = sparts_new; /* Update the counter */ @@ -1056,16 +1057,19 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { /* Allocate arrays to store the indices of the cells where particles belong. We allocate extra space to allow for particles we may receive from other nodes */ - int *h_index = (int *)malloc(sizeof(int) * h_index_size); - int *g_index = (int *)malloc(sizeof(int) * g_index_size); - int *s_index = (int *)malloc(sizeof(int) * s_index_size); + int *h_index = (int *)swift_malloc("h_index", sizeof(int) * h_index_size); + int *g_index = (int *)swift_malloc("g_index", sizeof(int) * g_index_size); + int *s_index = (int *)swift_malloc("s_index", sizeof(int) * s_index_size); if (h_index == NULL || g_index == NULL || s_index == NULL) error("Failed to allocate temporary particle indices."); /* Allocate counters of particles that will land in each cell */ - int *cell_part_counts = (int *)malloc(sizeof(int) * s->nr_cells); - int *cell_gpart_counts = (int *)malloc(sizeof(int) * s->nr_cells); - int *cell_spart_counts = (int *)malloc(sizeof(int) * s->nr_cells); + int *cell_part_counts = + (int *)swift_malloc("cell_part_counts", sizeof(int) * s->nr_cells); + int *cell_gpart_counts = + (int *)swift_malloc("cell_gpart_counts", sizeof(int) * s->nr_cells); + int *cell_spart_counts = + (int *)swift_malloc("cell_spart_counts", sizeof(int) * s->nr_cells); if (cell_part_counts == NULL || cell_gpart_counts == NULL || cell_spart_counts == NULL) error("Failed to allocate cell particle count buffer."); @@ -1311,20 +1315,22 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { /* Re-allocate the index array for the parts if needed.. */ if (s->nr_parts + 1 > h_index_size) { int *ind_new; - if ((ind_new = (int *)malloc(sizeof(int) * (s->nr_parts + 1))) == NULL) + if ((ind_new = (int *)swift_malloc( + "h_index", sizeof(int) * (s->nr_parts + 1))) == NULL) error("Failed to allocate temporary particle indices."); memcpy(ind_new, h_index, sizeof(int) * nr_parts); - free(h_index); + swift_free("h_index", h_index); h_index = ind_new; } /* Re-allocate the index array for the sparts if needed.. */ if (s->nr_sparts + 1 > s_index_size) { int *sind_new; - if ((sind_new = (int *)malloc(sizeof(int) * (s->nr_sparts + 1))) == NULL) + if ((sind_new = (int *)swift_malloc( + "s_index", sizeof(int) * (s->nr_sparts + 1))) == NULL) error("Failed to allocate temporary s-particle indices."); memcpy(sind_new, s_index, sizeof(int) * nr_sparts); - free(s_index); + swift_free("s_index", s_index); s_index = sind_new; } @@ -1453,20 +1459,21 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { } /* We no longer need the indices as of here. */ - free(h_index); - free(cell_part_counts); - free(s_index); - free(cell_spart_counts); + swift_free("h_index", h_index); + swift_free("cell_part_counts", cell_part_counts); + swift_free("s_index", s_index); + swift_free("cell_spart_counts", cell_spart_counts); #ifdef WITH_MPI /* Re-allocate the index array for the gparts if needed.. */ if (s->nr_gparts + 1 > g_index_size) { int *gind_new; - if ((gind_new = (int *)malloc(sizeof(int) * (s->nr_gparts + 1))) == NULL) + if ((gind_new = (int *)swift_malloc( + "g_index", sizeof(int) * (s->nr_gparts + 1))) == NULL) error("Failed to allocate temporary g-particle indices."); memcpy(gind_new, g_index, sizeof(int) * nr_gparts); - free(g_index); + swift_free("g_index", g_index); g_index = gind_new; } @@ -1540,8 +1547,8 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { } /* We no longer need the indices as of here. */ - free(g_index); - free(cell_gpart_counts); + swift_free("g_index", g_index); + swift_free("cell_gpart_counts", cell_gpart_counts); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the links are correct */ @@ -2272,7 +2279,7 @@ void space_parts_sort(struct part *parts, struct xpart *xparts, ptrdiff_t parts_offset) { /* Create the offsets array. */ size_t *offsets = NULL; - if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("parts_offsets", (void **)&offsets, SWIFT_STRUCT_ALIGNMENT, sizeof(size_t) * (num_bins + 1)) != 0) error("Failed to allocate temporary cell offsets array."); @@ -2317,7 +2324,7 @@ void space_parts_sort(struct part *parts, struct xpart *xparts, error("Bad offsets after shuffle."); #endif /* SWIFT_DEBUG_CHECKS */ - free(offsets); + swift_free("parts_offsets", offsets); } /** @@ -2335,7 +2342,8 @@ void space_sparts_sort(struct spart *sparts, int *restrict ind, ptrdiff_t sparts_offset) { /* Create the offsets array. */ size_t *offsets = NULL; - if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("sparts_offsets", (void **)&offsets, + SWIFT_STRUCT_ALIGNMENT, sizeof(size_t) * (num_bins + 1)) != 0) error("Failed to allocate temporary cell offsets array."); @@ -2377,7 +2385,7 @@ void space_sparts_sort(struct spart *sparts, int *restrict ind, error("Bad offsets after shuffle."); #endif /* SWIFT_DEBUG_CHECKS */ - free(offsets); + swift_free("sparts_offsets", offsets); } /** @@ -2395,7 +2403,8 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, int *restrict counts, int num_bins) { /* Create the offsets array. */ size_t *offsets = NULL; - if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("gparts_offsets", (void **)&offsets, + SWIFT_STRUCT_ALIGNMENT, sizeof(size_t) * (num_bins + 1)) != 0) error("Failed to allocate temporary cell offsets array."); @@ -2443,7 +2452,7 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, error("Bad offsets after shuffle."); #endif /* SWIFT_DEBUG_CHECKS */ - free(offsets); + swift_free("gparts_offsets", offsets); } /** @@ -2451,16 +2460,8 @@ void space_gparts_sort(struct gpart *gparts, struct part *parts, */ void space_map_clearsort(struct cell *c, void *data) { - for (int i = 0; i < 13; i++) { - if (c->hydro.sort[i] != NULL) { - free(c->hydro.sort[i]); - c->hydro.sort[i] = NULL; - } - if (c->stars.sort[i] != NULL) { - free(c->stars.sort[i]); - c->stars.sort[i] = NULL; - } - } + cell_free_hydro_sorts(c); + cell_free_stars_sorts(c); } /** @@ -2644,7 +2645,7 @@ void space_split_recursive(struct space *s, struct cell *c, const int allocate_buffer = (buff == NULL && gbuff == NULL && sbuff == NULL); if (allocate_buffer) { if (count > 0) { - if (posix_memalign((void **)&buff, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("tempbuff", (void **)&buff, SWIFT_STRUCT_ALIGNMENT, sizeof(struct cell_buff) * count) != 0) error("Failed to allocate temporary indices."); for (int k = 0; k < count; k++) { @@ -2660,7 +2661,7 @@ void space_split_recursive(struct space *s, struct cell *c, } } if (gcount > 0) { - if (posix_memalign((void **)&gbuff, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("tempgbuff", (void **)&gbuff, SWIFT_STRUCT_ALIGNMENT, sizeof(struct cell_buff) * gcount) != 0) error("Failed to allocate temporary indices."); for (int k = 0; k < gcount; k++) { @@ -2676,7 +2677,7 @@ void space_split_recursive(struct space *s, struct cell *c, } } if (scount > 0) { - if (posix_memalign((void **)&sbuff, SWIFT_STRUCT_ALIGNMENT, + if (swift_memalign("tempsbuff", (void **)&sbuff, SWIFT_STRUCT_ALIGNMENT, sizeof(struct cell_buff) * scount) != 0) error("Failed to allocate temporary indices."); for (int k = 0; k < scount; k++) { @@ -3054,9 +3055,9 @@ void space_split_recursive(struct space *s, struct cell *c, /* Clean up. */ if (allocate_buffer) { - if (buff != NULL) free(buff); - if (gbuff != NULL) free(gbuff); - if (sbuff != NULL) free(sbuff); + if (buff != NULL) swift_free("tempbuff", buff); + if (gbuff != NULL) swift_free("tempgbuff", gbuff); + if (sbuff != NULL) swift_free("tempsbuff", sbuff); } } @@ -3198,7 +3199,7 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Is the cell buffer empty? */ if (s->cells_sub == NULL) { - if (posix_memalign((void **)&s->cells_sub, cell_align, + if (swift_memalign("cells_sub", (void **)&s->cells_sub, cell_align, space_cellallocchunk * sizeof(struct cell)) != 0) error("Failed to allocate more cells."); @@ -3213,8 +3214,8 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Is the multipole buffer empty? */ if (s->with_self_gravity && s->multipoles_sub == NULL) { - if (posix_memalign( - (void **)&s->multipoles_sub, multipole_align, + if (swift_memalign( + "multipoles_sub", (void **)&s->multipoles_sub, multipole_align, space_cellallocchunk * sizeof(struct gravity_tensors)) != 0) error("Failed to allocate more multipoles."); @@ -3241,10 +3242,9 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Init some things in the cell we just got. */ for (int j = 0; j < nr_cells; j++) { - for (int k = 0; k < 13; k++) { - if (cells[j]->hydro.sort[k] != NULL) free(cells[j]->hydro.sort[k]); - if (cells[j]->stars.sort[k] != NULL) free(cells[j]->stars.sort[k]); - } + cell_free_hydro_sorts(cells[j]); + cell_free_stars_sorts(cells[j]); + struct gravity_tensors *temp = cells[j]->grav.multipole; bzero(cells[j], sizeof(struct cell)); cells[j]->grav.multipole = temp; @@ -3266,16 +3266,8 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { void space_free_buff_sort_indices(struct space *s) { for (struct cell *finger = s->cells_sub; finger != NULL; finger = finger->next) { - for (int k = 0; k < 13; k++) { - if (finger->hydro.sort[k] != NULL) { - free(finger->hydro.sort[k]); - finger->hydro.sort[k] = NULL; - } - if (finger->stars.sort[k] != NULL) { - free(finger->stars.sort[k]); - finger->stars.sort[k] = NULL; - } - } + cell_free_hydro_sorts(finger); + cell_free_stars_sorts(finger); } } @@ -3984,7 +3976,7 @@ void space_init(struct space *s, struct swift_params *params, /* Allocate the extra parts array for the gas particles. */ if (Npart > 0) { - if (posix_memalign((void **)&s->xparts, xpart_align, + if (swift_memalign("xparts", (void **)&s->xparts, xpart_align, Npart * sizeof(struct xpart)) != 0) error("Failed to allocate xparts."); bzero(s->xparts, Npart * sizeof(struct xpart)); @@ -4039,15 +4031,15 @@ void space_replicate(struct space *s, int replicate, int verbose) { struct gpart *gparts = NULL; struct spart *sparts = NULL; - if (posix_memalign((void **)&parts, part_align, + if (swift_memalign("parts", (void **)&parts, part_align, s->nr_parts * sizeof(struct part)) != 0) error("Failed to allocate new part array."); - if (posix_memalign((void **)&gparts, gpart_align, + if (swift_memalign("gparts", (void **)&gparts, gpart_align, s->nr_gparts * sizeof(struct gpart)) != 0) error("Failed to allocate new gpart array."); - if (posix_memalign((void **)&sparts, spart_align, + if (swift_memalign("sparts", (void **)&sparts, spart_align, s->nr_sparts * sizeof(struct spart)) != 0) error("Failed to allocate new spart array."); @@ -4109,9 +4101,9 @@ void space_replicate(struct space *s, int replicate, int verbose) { } /* Replace the content of the space */ - free(s->parts); - free(s->gparts); - free(s->sparts); + swift_free("parts", s->parts); + swift_free("gparts", s->gparts); + swift_free("sparts", s->sparts); s->parts = parts; s->gparts = gparts; s->sparts = sparts; @@ -4169,11 +4161,11 @@ void space_generate_gas(struct space *s, const struct cosmology *cosmo, struct part *parts = NULL; struct gpart *gparts = NULL; - if (posix_memalign((void **)&parts, part_align, + if (swift_memalign("parts", (void **)&parts, part_align, s->nr_parts * sizeof(struct part)) != 0) error("Failed to allocate new part array."); - if (posix_memalign((void **)&gparts, gpart_align, + if (swift_memalign("gparts", (void **)&gparts, gpart_align, s->nr_gparts * sizeof(struct gpart)) != 0) error("Failed to allocate new gpart array."); @@ -4259,7 +4251,7 @@ void space_generate_gas(struct space *s, const struct cosmology *cosmo, } /* Replace the content of the space */ - free(s->gparts); + swift_free("gparts", s->gparts); s->parts = parts; s->gparts = gparts; } @@ -4445,16 +4437,17 @@ void space_reset_task_counters(struct space *s) { void space_clean(struct space *s) { for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells_top[i]); - free(s->cells_top); - free(s->multipoles_top); - free(s->local_cells_top); - free(s->local_cells_with_tasks_top); - free(s->cells_with_particles_top); - free(s->local_cells_with_particles_top); - free(s->parts); - free(s->xparts); - free(s->gparts); - free(s->sparts); + swift_free("cells_top", s->cells_top); + swift_free("multipoles_top", s->multipoles_top); + swift_free("local_cells_top", s->local_cells_top); + swift_free("local_cells_with_tasks_top", s->local_cells_with_tasks_top); + swift_free("cells_with_particles_top", s->cells_with_particles_top); + swift_free("local_cells_with_particles_top", + s->local_cells_with_particles_top); + swift_free("parts", s->parts); + swift_free("xparts", s->xparts); + swift_free("gparts", s->gparts); + swift_free("sparts", s->sparts); } /** @@ -4522,10 +4515,11 @@ void space_struct_restore(struct space *s, FILE *stream) { if (s->nr_parts > 0) { /* Need the memory for these. */ - if (posix_memalign((void **)&s->parts, part_align, + if (swift_memalign("parts", (void **)&s->parts, part_align, s->size_parts * sizeof(struct part)) != 0) error("Failed to allocate restore part array."); - if (posix_memalign((void **)&s->xparts, xpart_align, + + if (swift_memalign("xparts", (void **)&s->xparts, xpart_align, s->size_parts * sizeof(struct xpart)) != 0) error("Failed to allocate restore xpart array."); @@ -4536,7 +4530,7 @@ void space_struct_restore(struct space *s, FILE *stream) { } s->gparts = NULL; if (s->nr_gparts > 0) { - if (posix_memalign((void **)&s->gparts, gpart_align, + if (swift_memalign("gparts", (void **)&s->gparts, gpart_align, s->size_gparts * sizeof(struct gpart)) != 0) error("Failed to allocate restore gpart array."); @@ -4546,7 +4540,7 @@ void space_struct_restore(struct space *s, FILE *stream) { s->sparts = NULL; if (s->nr_sparts > 0) { - if (posix_memalign((void **)&s->sparts, spart_align, + if (swift_memalign("sparts", (void **)&s->sparts, spart_align, s->size_sparts * sizeof(struct spart)) != 0) error("Failed to allocate restore spart array."); diff --git a/src/swift.h b/src/swift.h index e166dde5dd3baed07fb5c081c64ce941d6c6ce6d..0b5eda65e73cd6600eb69c25bc6a3f0a2e6e675f 100644 --- a/src/swift.h +++ b/src/swift.h @@ -49,6 +49,7 @@ #include "logger.h" #include "logger_io.h" #include "map.h" +#include "memuse.h" #include "mesh_gravity.h" #include "multipole.h" #include "outputlist.h" diff --git a/tools/analyse_memuse_logs.py b/tools/analyse_memuse_logs.py new file mode 100755 index 0000000000000000000000000000000000000000..4026c73d1e13a4484975fe3902e508493dc838cb --- /dev/null +++ b/tools/analyse_memuse_logs.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +""" +Usage: + process_memuse.py [options] memuse_report1.dat [memuse_report2.dat] ... + +Parse the output of a run of SWIFT to convert the memuse output dumps into a +timeseries of memory use. Also outputs use in memory per labelled type. + +This file is part of SWIFT. +Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +from collections import OrderedDict +import argparse +import sys + +# Command-line arguments. +parser = argparse.ArgumentParser(description="Analyse memory usage reports") + +parser.add_argument("memuse_report", nargs='+', + help="Memory usage reports (order by step if using more than one)") +parser.add_argument( + "-b", + "--blacklist", + dest="blacklist", + help="substring of allocations to ignore (maybe be repeated)", + default=None, + action='append' +) +args = parser.parse_args() + +memuse = OrderedDict() +labels = {} +totalmem = 0 +process_use = "" +peak = 0.0 + +for filename in args.memuse_report: + sys.stderr.write("## Processing: " + filename + "\n") + with open(filename) as infile: + print '# {:<18s} {:>30s} {:>9s} {:>9s} {:s}'.format("tic", "label", "allocated", "step", "MB") + for line in infile: + if line[0] == "#": + if "# Current use:" in line: + process_use = line[14:-1] + else: + tic, adr, rank, step, allocated, label, size = line.split() + + # Skip blacklisted allocations, these can swamp the signal... + if args.blacklist != None: + skip = False + for item in args.blacklist: + if item in label: + skip = True + break + if skip: + continue + + rank = int(rank) + step = int(step) + allocated = int(allocated) + size = int(size) + + doprint = True + if allocated == 1: + # Allocation. + totalmem = totalmem + size + if not adr in memuse: + memuse[adr] = [size] + labels[adr] = label + else: + memuse[adr].append(size) + else: + # Free, locate allocation. + if adr in memuse: + allocs = memuse[adr] + totalmem = totalmem - allocs[0] + if len(allocs) > 1: + memuse[adr] = allocs[1:] + else: + del memuse[adr] + else: + # Unmatched free, complain and skip. + #print "### unmatched free: ", label, adr + doprint = False + if doprint: + if totalmem > peak: + peak = totalmem + print '{:<20s} {:>30s} {:9d} {:9d} {:.3f}'.format(tic, label, allocated, step, totalmem/(1048576.0)) + sys.stderr.write("## Finished ingestion of: " + filename + "\n") + +totals = {} +numactive = {} +for adr in labels: + # If any remaining allocations. + if adr in memuse: + if labels[adr] in totals: + totals[labels[adr]] = totals[labels[adr]] + memuse[adr][0] + numactive[labels[adr]] = numactive[labels[adr]] + 1 + else: + totals[labels[adr]] = memuse[adr][0] + numactive[labels[adr]] = 1 + +print "# Memory use by label:" +print "## ", '{:<30s} {:>16s} {:>16s}'.format("label", "MB", "numactive") +print "## " +total = 0.0 +for label in sorted(totals): + mem = totals[label]/(1048576.0) + total = total + mem + print "## ", '{:<30s} {:16.3f} {:16d}'.format(label, mem, numactive[label]) +print "## " +print "# Total memory still in use : ", '{:.3f}'.format(total), " (MB)" +print "# Peak memory usage : ", '{:.3f}'.format(peak/1048576.0), " (MB)" +if process_use != "": + print "#" + print "# Memory use by process (all/system):", process_use +sys.exit(0) diff --git a/tools/process_memuse_logs.sh b/tools/process_memuse_logs.sh new file mode 100755 index 0000000000000000000000000000000000000000..c86efe9ceca388afcb8d7236c0a2a4e403a66083 --- /dev/null +++ b/tools/process_memuse_logs.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# +# Usage: +# process_memuse_logs nprocess +# +# Description: +# Process all the memuse report files in the current directory. +# Creating an analysis for step step and one for all the steps. +# +# The input files are created by a run configured for memuse reporting +# (--enable-memuse-reports) should be named "memuse_report-step<n>.dat" +# in the current directory. +# +# All located files will be processed using "nprocess" concurrent +# processes. The output for each step will be named memuse_report_step<n>.log +# and the overall analysis will be called memuse_report_all.log. +# +# This file is part of SWIFT: +# +# Copyright (C) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) +# All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Handle command-line +if test "$1" == ""; then + echo "Usage: $0 nprocess" + exit 1 +fi +NPROCS=$1 + +# Locate script. +SCRIPTHOME=$(dirname "$0") + +# Find all report files. Use version sort to get into correct order. +files=$(ls -v memuse_report-step*.dat) +if test $? != 0; then + echo "Failed to find any memuse report files" + exit 1 +fi + +# Construct list of input and output names. +list="" +for f in $files; do + output=$(echo $f| sed 's,.dat,.log,') + list="$list $f $output" +done + +# And process them. +echo "Processing memuse report files..." +echo $list | xargs -P $NPROCS -n 2 /bin/bash -c "${SCRIPTHOME}/analyse_memuse_logs.py \$0 > \$1" + +# Now process the overall file, if more than one file given. +n=$(echo $list| wc -w) +if test $n -gt 2; then + echo "Processing _all_ memuse report files..." + ${SCRIPTHOME}/analyse_memuse_logs.py $files > memuse_report-all.log +fi + +echo "Finished" + +exit diff --git a/tools/process_memuse_logs_MPI.sh b/tools/process_memuse_logs_MPI.sh new file mode 100755 index 0000000000000000000000000000000000000000..77a949d18432690fcb93f883eca5edff2ea19d92 --- /dev/null +++ b/tools/process_memuse_logs_MPI.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# +# Usage: +# process_memuse_logs_MPI rank nprocess +# +# Description: +# Process all the memuse report files in the current directory that +# are output from the given rank. +# Creating an analysis for each step and one for all the steps. +# +# The input files are created by a run configured for memuse reporting +# (--enable-memuse-reports) should be named "memuse_report-rank<n>-step<m>.dat" +# in the current directory. +# +# All located files will be processed using "nprocess" concurrent +# processes. The output for each step will be named memuse_report-rank<n>-step<m>.log +# and the overall analysis will be called memuse_report-all-rank<n>.log. +# +# This file is part of SWIFT: +# +# Copyright (C) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) +# All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Handle command-line +if test "$2" == ""; then + echo "Usage: $0 rank nprocess" + exit 1 +fi +RANK=$1 +NPROCS=$2 + +# Locate script. +SCRIPTHOME=$(dirname "$0") + +# Find all report files. Use version sort to get into correct order. +files=$(ls -v memuse_report-rank${RANK}-step*.dat) +if test $? != 0; then + echo "Failed to find any memuse report files" + exit 1 +fi + +# Construct list of input and output names. +list="" +for f in $files; do + output=$(echo $f| sed 's,.dat,.log,') + list="$list $f $output" +done + +# And process them. +echo "Processing memuse report files..." +echo $list | xargs -P $NPROCS -n 2 /bin/bash -c "${SCRIPTHOME}/analyse_memuse_logs.py \$0 > \$1" + +# Now process the overall file, if more than one file given. +n=$(echo $list| wc -w) +if test $n -gt 2; then + echo "Processing _all_ memuse report files..." + ${SCRIPTHOME}/analyse_memuse_logs.py $files > memuse_report-all-rank${RANK}.log +fi + +echo "Finished" + +exit