diff --git a/configure.ac b/configure.ac index 9fa9a1de591d63794dde5db6a8dd733cfcaada09..a02dcc57c720f1a9a792b160485caee728a91b98 100644 --- a/configure.ac +++ b/configure.ac @@ -351,7 +351,7 @@ AC_ARG_WITH([tcmalloc], [with_tcmalloc="no"] ) if test "x$with_tcmalloc" != "xno"; then - if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then + if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then tclibs="-L$with_tcmalloc -ltcmalloc" else tclibs="-ltcmalloc" @@ -361,7 +361,7 @@ if test "x$with_tcmalloc" != "xno"; then # Could just have the minimal version. if test "$have_tcmalloc" = "no"; then - if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then + if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then tclibs="-L$with_tcmalloc -ltcmalloc_minimal" else tclibs="-ltcmalloc_minimal" @@ -394,7 +394,7 @@ AC_ARG_WITH([profiler], [with_profiler="yes"] ) if test "x$with_profiler" != "xno"; then - if test "x$with_profiler" != "xyes" && test "x$with_profiler" != "x"; then + if test "x$with_profiler" != "xyes" -a "x$with_profiler" != "x"; then proflibs="-L$with_profiler -lprofiler" else proflibs="-lprofiler" @@ -411,6 +411,38 @@ fi AC_SUBST([PROFILER_LIBS]) AM_CONDITIONAL([HAVEPROFILER],[test -n "$PROFILER_LIBS"]) +# Check for jemalloc another fast malloc that is good with contention. +have_jemalloc="no" +AC_ARG_WITH([jemalloc], + [AS_HELP_STRING([--with-jemalloc], + [use jemalloc library or specify the directory with lib @<:@yes/no@:>@] + )], + [with_jemalloc="$withval"], + [with_jemalloc="no"] +) +if test "x$with_jemalloc" != "xno"; then + if test "x$with_jemalloc" != "xyes" -a "x$with_jemalloc" != "x"; then + jelibs="-L$with_jemalloc -ljemalloc" + else + jelibs="-ljemalloc" + fi + AC_CHECK_LIB([jemalloc],[malloc_usable_size],[have_jemalloc="yes"],[have_jemalloc="no"], + $jelibs) + + if test "$have_jemalloc" = "yes"; then + JEMALLOC_LIBS="$jelibs" + else + JEMALLOC_LIBS="" + fi +fi +AC_SUBST([JEMALLOC_LIBS]) +AM_CONDITIONAL([HAVEJEMALLOC],[test -n "$JEMALLOC_LIBS"]) + +# Don't allow both tcmalloc and jemalloc. +if test "x$have_tcmalloc" != "xno" -a "x$have_jemalloc" != "xno"; then + AC_MSG_ERROR([Cannot use tcmalloc at same time as jemalloc]) +fi + # Check for HDF5. This is required. AX_LIB_HDF5 @@ -781,6 +813,7 @@ AC_MSG_RESULT([ FFTW3 enabled : $have_fftw3 libNUMA enabled : $have_numa Using tcmalloc : $have_tcmalloc + Using jemalloc : $have_jemalloc CPU profiler : $have_profiler Hydro scheme : $with_hydro diff --git a/examples/Makefile.am b/examples/Makefile.am index 4da84788a485dacd2103fe85ad3e729ade6b582a..28a4629bdb401c0736379a2fe14a3a5f19caf650 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -24,7 +24,7 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) AM_LDFLAGS = $(HDF5_LDFLAGS) # Extra libraries. -EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) +EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) # MPI libraries. MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) diff --git a/examples/main.c b/examples/main.c index dcc113ab6af6a06e7c20ac1aac7c2d3b715f7ef3..11163b42523fa5b1de1438ad8e67dde0fe9c88ef 100644 --- a/examples/main.c +++ b/examples/main.c @@ -45,6 +45,9 @@ #define ENGINE_POLICY engine_policy_none #endif +/* Global profiler. */ +struct profiler prof; + /** * @brief Help messages for the command line parameters. */ diff --git a/src/Makefile.am b/src/Makefile.am index 49223f5b9cd81f40fca159f32de181d412170748..88474d72c08a1e8fc6dc8ed273dbf168d3b134ba 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -25,7 +25,7 @@ AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0 GIT_CMD = @GIT_CMD@ # Additional dependencies for shared libraries. -EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) +EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) # MPI libraries. MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) @@ -44,7 +44,8 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \ physical_constants.h physical_constants_cgs.h potential.h version.h \ hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \ - sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h + sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \ + dump.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ @@ -53,7 +54,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ physical_constants.c potential.c hydro_properties.c \ runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \ - statistics.c runner_doiact_vec.c + statistics.c runner_doiact_vec.c profiler.c dump.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/dump.c b/src/dump.c new file mode 100644 index 0000000000000000000000000000000000000000..2c0cf221ebd897bab0d047c196ce8a2aeddc6eae --- /dev/null +++ b/src/dump.c @@ -0,0 +1,153 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +/* This object's header. */ +#include "dump.h" + +/* Local headers. */ +#include "atomic.h" +#include "error.h" + +/** + * @brief Obtain a chunk of memory from a dump. + * + * @param d The #dump. + * @param count The number of bytes requested. + * @param offset The offset of the returned memory address within the dump file. + * @return A pointer to the memory-mapped chunk of data. + */ + +void *dump_get(struct dump *d, size_t count, size_t *offset) { + size_t local_offset = atomic_add(&d->count, count); + *offset = local_offset + d->file_offset; + return (char *)d->data + local_offset; +} + +/** + * @brief Ensure that at least size bytes are available in the #dump. + */ + +void dump_ensure(struct dump *d, size_t size) { + + /* If we have enough space already, just bail. */ + if (d->size - d->count > size) return; + + /* Unmap the current data. */ + size_t trunc_count = d->count & d->page_mask; + if (munmap(d->data, trunc_count > 0 ? trunc_count : 1) != 0) { + error("Failed to unmap %zi bytes of dump data (%s).", trunc_count, + strerror(errno)); + } + + /* Update the size and count. */ + d->file_offset += trunc_count; + d->count -= trunc_count; + d->size = (size * dump_grow_ensure_factor + ~d->page_mask) & d->page_mask; + + /* Re-allocate the file size. */ + if (posix_fallocate(d->fd, d->file_offset, d->size) != 0) { + error("Failed to pre-allocate the dump file."); + } + + /* Re-map starting at the end of the file. */ + if ((d->data = mmap(NULL, d->size, PROT_WRITE, MAP_SHARED, d->fd, + d->file_offset)) == MAP_FAILED) { + error("Failed to allocate map of size %zi bytes (%s).", d->size, + strerror(errno)); + } +} + +/** + * @brief Flush the #dump to disk. + */ + +void dump_sync(struct dump *d) { + if (msync(d->data, d->count, MS_SYNC) != 0) + error("Failed to sync memory-mapped data."); +} + +/** + * @brief Finalize the #dump. + */ + +void dump_close(struct dump *d) { + /* Unmap the data in memory. */ + if (munmap(d->data, d->count) != 0) { + error("Failed to unmap dump data (%s).", strerror(errno)); + } + + /* Truncate the file to the correct length. */ + if (ftruncate(d->fd, d->file_offset + d->count) != 0) { + error("Failed to truncate dump file (%s).", strerror(errno)); + } + + /* Close the memory-mapped file. */ + if (close(d->fd) != 0) error("Failed to close memory-mapped file."); +} + +/** + * @brief Initialize a file dump. + * + * @param d The #dump to initialize. + * @param filename The fully qualified name of the file in which to dump, + * note that it will be overwritten. + * @param size The initial buffer size for this #dump. + */ + +void dump_init(struct dump *d, const char *filename, size_t size) { + + /* Create the output file. */ + if ((d->fd = open(filename, O_CREAT | O_RDWR, 0660)) == -1) { + error("Failed to create dump file '%s' (%s).", filename, strerror(errno)); + } + + /* Adjust the size to be at least the page size. */ + const size_t page_mask = ~(sysconf(_SC_PAGE_SIZE) - 1); + size = (size + ~page_mask) & page_mask; + + /* Pre-allocate the file size. */ + if (posix_fallocate(d->fd, 0, size) != 0) { + error("Failed to pre-allocate the dump file."); + } + + /* Map memory to the created file. */ + if ((d->data = mmap(NULL, size, PROT_WRITE, MAP_SHARED, d->fd, 0)) == + MAP_FAILED) { + error("Failed to allocate map of size %zi bytes (%s).", size, + strerror(errno)); + } + + /* Init some counters. */ + d->size = size; + d->count = 0; + d->file_offset = 0; + d->page_mask = page_mask; +} diff --git a/src/dump.h b/src/dump.h new file mode 100644 index 0000000000000000000000000000000000000000..a7e934218c271d2f82b99d39f278e5af3047be6e --- /dev/null +++ b/src/dump.h @@ -0,0 +1,57 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DUMP_H +#define SWIFT_DUMP_H + +/* Includes. */ +#include "lock.h" + +/* Some constants. */ +#define dump_grow_ensure_factor 10 + +/** The dump struct. */ +struct dump { + + /* The memory-mapped data of this dump. */ + void *data; + + /* The size of the memory-mapped data, in bytes. */ + size_t size; + + /* The number of bytes that have been dumped. */ + size_t count; + + /* The offset of the data within the current file. */ + size_t file_offset; + + /* The file with which this memory is associated. */ + int fd; + + /* Mask containing the significant bits for page addresses. */ + size_t page_mask; +}; + +/* Function prototypes. */ +void dump_init(struct dump *d, const char *filename, size_t size); +void dump_ensure(struct dump *d, size_t size); +void dump_sync(struct dump *d); +void dump_close(struct dump *d); +void *dump_get(struct dump *d, size_t count, size_t *offset); + +#endif /* SWIFT_DUMP_H */ diff --git a/src/engine.c b/src/engine.c index 27aa684b80a62fd743c940dd6edaff9a4a0609c8..e4dc4bb4f661144b3faef856a79eab451eed21f3 100644 --- a/src/engine.c +++ b/src/engine.c @@ -59,6 +59,7 @@ #include "parallel_io.h" #include "part.h" #include "partition.h" +#include "profiler.h" #include "proxy.h" #include "runner.h" #include "serial_io.h" @@ -320,6 +321,23 @@ void engine_redistribute(struct engine *e) { MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to allreduce particle transfer counts."); + /* Report how many particles will be moved. */ + if (e->verbose) { + if (e->nodeID == 0) { + size_t total = 0; + size_t unmoved = 0; + for (int p = 0, r = 0; p < nr_nodes; p++) { + for (int s = 0; s < nr_nodes; s++) { + total += counts[r]; + if (p == s) unmoved += counts[r]; + r++; + } + } + message("%ld of %ld (%.2f%%) of particles moved", total - unmoved, total, + 100.0 * (double)(total - unmoved) / (double)total); + } + } + /* Get all the g_counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) @@ -3408,7 +3426,7 @@ void engine_print_policy(struct engine *e) { #else printf("%s engine_policy: engine policies are [ ", clocks_get_timesincestart()); - for (int k = 1; k < 32; k++) + for (int k = 1; k < 31; k++) if (e->policy & (1 << k)) printf(" %s ", engine_policy_names[k + 1]); printf(" ]\n"); fflush(stdout); diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index 3b3454f1bb348b178ac57899da4f7611802a69cd..beb6f98b8c0d781aa709fb6ee3ca564a52704db2 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -66,7 +66,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy( __attribute__((always_inline)) INLINE static float hydro_get_pressure( const struct part *restrict p, float dt) { - return p->force.pressure; + const float u = p->u + p->u_dt * dt; + + return gas_pressure_from_internal_energy(p->rho, u); } /** diff --git a/src/partition.c b/src/partition.c index 89ba3f2835cb78e07ec2bc5cb04c3f8f71751563..85745d880e3ab0f6beaf918a5c226730b6b82a7c 100644 --- a/src/partition.c +++ b/src/partition.c @@ -278,6 +278,18 @@ static void split_metis(struct space *s, int nregions, int *celllist) { #endif #if defined(WITH_MPI) && defined(HAVE_METIS) + +/* qsort support. */ +struct indexval { + int index; + int count; +}; +static int indexvalcmp(const void *p1, const void *p2) { + const struct indexval *iv1 = (const struct indexval *)p1; + const struct indexval *iv2 = (const struct indexval *)p2; + return iv2->count - iv1->count; +} + /** * @brief Partition the given space into a number of connected regions. * @@ -382,14 +394,70 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew, if (regionid[k] < 0 || regionid[k] >= nregions) error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k); + /* We want a solution in which the current regions of the space are + * preserved when possible, to avoid unneccesary particle movement. + * So create a 2d-array of cells counts that are common to all pairs + * of old and new ranks. Each element of the array has a cell count and + * an unique index so we can sort into decreasing counts. */ + int indmax = nregions * nregions; + struct indexval *ivs = malloc(sizeof(struct indexval) * indmax); + bzero(ivs, sizeof(struct indexval) * indmax); + for (int k = 0; k < ncells; k++) { + int index = regionid[k] + nregions * s->cells_top[k].nodeID; + ivs[index].count++; + ivs[index].index = index; + } + qsort(ivs, indmax, sizeof(struct indexval), indexvalcmp); + + /* Go through the ivs using the largest counts first, these are the + * regions with the most cells in common, old partition to new. */ + int *oldmap = malloc(sizeof(int) * nregions); + int *newmap = malloc(sizeof(int) * nregions); + for (int k = 0; k < nregions; k++) { + oldmap[k] = -1; + newmap[k] = -1; + } + for (int k = 0; k < indmax; k++) { + + /* Stop when all regions with common cells have been considered. */ + if (ivs[k].count == 0) break; + + /* Store old and new IDs, if not already used. */ + int oldregion = ivs[k].index / nregions; + int newregion = ivs[k].index - oldregion * nregions; + if (newmap[newregion] == -1 && oldmap[oldregion] == -1) { + newmap[newregion] = oldregion; + oldmap[oldregion] = newregion; + } + } + + /* Handle any regions that did not get selected by picking an unused rank + * from oldmap and assigning to newmap. */ + int spare = 0; + for (int k = 0; k < nregions; k++) { + if (newmap[k] == -1) { + for (int j = spare; j < nregions; j++) { + if (oldmap[j] == -1) { + newmap[k] = j; + oldmap[j] = j; + spare = j; + break; + } + } + } + } + /* Set the cell list to the region index. */ for (int k = 0; k < ncells; k++) { - celllist[k] = regionid[k]; + celllist[k] = newmap[regionid[k]]; } /* Clean up. */ if (weights_v != NULL) free(weights_v); if (weights_e != NULL) free(weights_e); + free(ivs); + free(oldmap); + free(newmap); free(xadj); free(adjncy); free(regionid); diff --git a/src/profiler.c b/src/profiler.c new file mode 100644 index 0000000000000000000000000000000000000000..ad8338eebfd130d4088f9fd9d4fcc9856c8cc731 --- /dev/null +++ b/src/profiler.c @@ -0,0 +1,234 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James S. Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <string.h> + +/* This object's header. */ +#include "profiler.h" + +/* Local includes */ +#include "clocks.h" +#include "hydro.h" +#include "version.h" + +/** + * @brief Resets all timers. + * + * @param profiler #profiler object that holds file pointers and + * function timers. + */ +void profiler_reset_timers(struct profiler *profiler) { + + profiler->collect_timesteps_time = 0; + profiler->drift_time = 0; + profiler->rebuild_time = 0; + profiler->reweight_time = 0; + profiler->clear_waits_time = 0; + profiler->re_wait_time = 0; + profiler->enqueue_time = 0; + profiler->stats_time = 0; + profiler->launch_time = 0; + profiler->space_rebuild_time = 0; + profiler->engine_maketasks_time = 0; + profiler->engine_marktasks_time = 0; + profiler->space_regrid_time = 0; + profiler->space_parts_sort_time = 0; + profiler->space_split_time = 0; + profiler->space_parts_get_cell_id_time = 0; + profiler->space_count_parts_time = 0; +} + +/** + * @brief Opens an output file and populates the header. + * + * @param e #engine object to get various properties. + * @param fileName name of file to be written to. + * @param functionName name of function that is being timed. + * @param file (return) pointer used to open output file. + */ +void profiler_write_timing_info_header(const struct engine *e, char *fileName, + char *functionName, FILE **file) { + + /* Create the file name in the format: "fileName_(no. of threads)" */ + char fullFileName[200] = ""; + sprintf(fullFileName + strlen(fullFileName), "%s_%d.txt", fileName, + e->nr_nodes * e->nr_threads); + + /* Open the file and write the header. */ + *file = fopen(fullFileName, "w"); + fprintf(*file, + "# Host: %s\n# Branch: %s\n# Revision: %s\n# Compiler: %s, " + "Version: %s \n# " + "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic " + "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f " + "+/- %.2f\n# Eta: %f\n" + "# %6s %14s %14s %10s %10s %16s [%s]\n", + hostname(), functionName, git_revision(), compiler_name(), + compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, + kernel_name, e->hydro_properties->target_neighbours, + e->hydro_properties->delta_neighbours, + e->hydro_properties->eta_neighbours, "Step", "Time", "Time-step", + "Updates", "g-Updates", "Wall-clock time", clocks_getunit()); + + fflush(*file); +} + +/** + * @brief Writes the headers for all output files. Should be called once at the + * start of the simulation, it could be called in engine_init() for example. + * + * @param e #engine object to get various properties. + * @param profiler #profiler object that holds file pointers and + * function timers. + */ +void profiler_write_all_timing_info_headers(const struct engine *e, + struct profiler *profiler) { + + profiler_write_timing_info_header(e, "enginecollecttimesteps", + "engine_collect_timesteps", + &profiler->file_engine_collect_timesteps); + profiler_write_timing_info_header(e, "enginedrift", "engine_drift", + &profiler->file_engine_drift); + profiler_write_timing_info_header(e, "enginerebuild", "engine_rebuild", + &profiler->file_engine_rebuild); + profiler_write_timing_info_header(e, "schedulerreweight", + "scheduler_reweight", + &profiler->file_scheduler_reweight); + profiler_write_timing_info_header(e, "schedulerclearwaits", + "scheduler_clear_waits", + &profiler->file_scheduler_clear_waits); + profiler_write_timing_info_header(e, "schedulerrewait", "scheduler_rewait", + &profiler->file_scheduler_re_wait); + profiler_write_timing_info_header(e, "schedulerenqueue", "scheduler_enqueue", + &profiler->file_scheduler_enqueue); + profiler_write_timing_info_header(e, "engineprintstats", "engine_print_stats", + &profiler->file_engine_stats); + profiler_write_timing_info_header(e, "enginelaunch", "engine_launch", + &profiler->file_engine_launch); + profiler_write_timing_info_header(e, "spacerebuild", "space_rebuild", + &profiler->file_space_rebuild); + profiler_write_timing_info_header(e, "enginemaketasks", "engine_maketasks", + &profiler->file_engine_maketasks); + profiler_write_timing_info_header(e, "enginemarktasks", "engine_marktasks", + &profiler->file_engine_marktasks); + profiler_write_timing_info_header(e, "spaceregrid", "space_regrid", + &profiler->file_space_regrid); + profiler_write_timing_info_header(e, "spacepartssort", "space_parts_sort", + &profiler->file_space_parts_sort); + profiler_write_timing_info_header(e, "spacesplit", "space_split", + &profiler->file_space_split); + profiler_write_timing_info_header(e, "spacegetcellid", "space_get_cell_id", + &profiler->file_space_parts_get_cell_id); + profiler_write_timing_info_header(e, "spacecountparts", "space_count_parts", + &profiler->file_space_count_parts); +} + +/** + * @brief Writes timing info to the output file. + * + * @param e #engine object to get various properties. + * @param time Time in ticks to be written to the output file. + * @param file pointer used to open output file. + */ +void profiler_write_timing_info(const struct engine *e, ticks time, + FILE *file) { + + fprintf(file, " %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time, + e->timeStep, e->updates, e->g_updates, clocks_from_ticks(time)); + fflush(file); +} + +/** + * @brief Writes timing info to all output files. Should be called at the end of + * every time step, in engine_step() for example. + * + * @param e #engine object to get various properties. + * @param profiler #profiler object that holds file pointers and + * function timers. + */ +void profiler_write_all_timing_info(const struct engine *e, + struct profiler *profiler) { + + profiler_write_timing_info(e, profiler->drift_time, + profiler->file_engine_drift); + profiler_write_timing_info(e, profiler->rebuild_time, + profiler->file_engine_rebuild); + profiler_write_timing_info(e, profiler->reweight_time, + profiler->file_scheduler_reweight); + profiler_write_timing_info(e, profiler->clear_waits_time, + profiler->file_scheduler_clear_waits); + profiler_write_timing_info(e, profiler->re_wait_time, + profiler->file_scheduler_re_wait); + profiler_write_timing_info(e, profiler->enqueue_time, + profiler->file_scheduler_enqueue); + profiler_write_timing_info(e, profiler->stats_time, + profiler->file_engine_stats); + profiler_write_timing_info(e, profiler->launch_time, + profiler->file_engine_launch); + profiler_write_timing_info(e, profiler->space_rebuild_time, + profiler->file_space_rebuild); + profiler_write_timing_info(e, profiler->engine_maketasks_time, + profiler->file_engine_maketasks); + profiler_write_timing_info(e, profiler->engine_marktasks_time, + profiler->file_engine_marktasks); + profiler_write_timing_info(e, profiler->space_regrid_time, + profiler->file_space_regrid); + profiler_write_timing_info(e, profiler->space_parts_sort_time, + profiler->file_space_parts_sort); + profiler_write_timing_info(e, profiler->space_split_time, + profiler->file_space_split); + profiler_write_timing_info(e, profiler->space_parts_get_cell_id_time, + profiler->file_space_parts_get_cell_id); + profiler_write_timing_info(e, profiler->space_count_parts_time, + profiler->file_space_count_parts); + + /* Reset timers. */ + profiler_reset_timers(profiler); +} + +/** + * @brief Closes all output files, should be called at the end of the + * simulation. + * + * @param profiler #profiler object that holds file pointers and + * function timers. + */ +void profiler_close_files(struct profiler *profiler) { + + fclose(profiler->file_engine_drift); + fclose(profiler->file_engine_rebuild); + fclose(profiler->file_scheduler_reweight); + fclose(profiler->file_scheduler_clear_waits); + fclose(profiler->file_scheduler_re_wait); + fclose(profiler->file_scheduler_enqueue); + fclose(profiler->file_engine_stats); + fclose(profiler->file_engine_launch); + fclose(profiler->file_space_rebuild); + fclose(profiler->file_engine_maketasks); + fclose(profiler->file_engine_marktasks); + fclose(profiler->file_space_regrid); + fclose(profiler->file_space_parts_sort); + fclose(profiler->file_space_split); + fclose(profiler->file_space_parts_get_cell_id); + fclose(profiler->file_space_count_parts); +} diff --git a/src/profiler.h b/src/profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..b00bc986ece8b78282b11ce317a6746ecba5a50f --- /dev/null +++ b/src/profiler.h @@ -0,0 +1,78 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James S. Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PROFILER_H +#define SWIFT_PROFILER_H + +/* Config parameters. */ +#include "../config.h" + +/* Local includes */ +#include "engine.h" + +/* Profiler that holds file pointers and time taken in functions. */ +struct profiler { + + /* File pointers for timing info. */ + FILE *file_engine_collect_timesteps; + FILE *file_engine_drift; + FILE *file_engine_rebuild; + FILE *file_scheduler_reweight; + FILE *file_scheduler_clear_waits; + FILE *file_scheduler_re_wait; + FILE *file_scheduler_enqueue; + FILE *file_engine_stats; + FILE *file_engine_launch; + FILE *file_space_rebuild; + FILE *file_engine_maketasks; + FILE *file_engine_marktasks; + FILE *file_space_regrid; + FILE *file_space_parts_sort; + FILE *file_space_split; + FILE *file_space_parts_get_cell_id; + FILE *file_space_count_parts; + + /* Time taken in functions. */ + ticks collect_timesteps_time; + ticks drift_time; + ticks rebuild_time; + ticks reweight_time; + ticks clear_waits_time; + ticks re_wait_time; + ticks enqueue_time; + ticks stats_time; + ticks launch_time; + ticks space_rebuild_time; + ticks engine_maketasks_time; + ticks engine_marktasks_time; + ticks space_regrid_time; + ticks space_parts_sort_time; + ticks space_split_time; + ticks space_parts_get_cell_id_time; + ticks space_count_parts_time; +}; + +/* Function prototypes. */ +void profiler_reset_timers(struct profiler *profiler); +void profiler_write_all_timing_info_headers(const struct engine *e, + struct profiler *profiler); +void profiler_write_all_timing_info(const struct engine *e, + struct profiler *profiler); +void profiler_close_files(struct profiler *profiler); + +#endif /* SWIFT_PROFILER_H */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 308764e755806a124f1cc234dfae253c57e0eda6..6bc8f2da808cc2d953482b90e9441b833384bc75 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1989,6 +1989,10 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { /* Should we even bother? */ if (!cell_is_active(ci, r->e)) return; +#ifdef SWIFT_DEBUG_CHECKS + cell_is_drifted(ci, r->e); +#endif + /* Recurse? */ if (ci->split) { diff --git a/src/space.c b/src/space.c index 935677a9ebed97acfde8341ec1545ef4f33a56c0..44bafdd2bb2c7a930c1a6e6691b3ea0beca66683 100644 --- a/src/space.c +++ b/src/space.c @@ -173,18 +173,68 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, * * @param s The #space. * @param c The #cell to recycle. + * @param rec_begin Pointer to the start of the list of cells to recycle. + * @param rec_end Pointer to the end of the list of cells to recycle. */ -void space_rebuild_recycle(struct space *s, struct cell *c) { - +void space_rebuild_recycle_rec(struct space *s, struct cell *c, + struct cell **rec_begin, struct cell **rec_end) { if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { - space_rebuild_recycle(s, c->progeny[k]); - space_recycle(s, c->progeny[k]); + space_rebuild_recycle_rec(s, c->progeny[k], rec_begin, rec_end); + c->progeny[k]->next = *rec_begin; + *rec_begin = c->progeny[k]; + if (*rec_end == NULL) *rec_end = *rec_begin; c->progeny[k] = NULL; } } +void space_rebuild_recycle_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct space *s = (struct space *)extra_data; + struct cell *cells = (struct cell *)map_data; + + for (int k = 0; k < num_elements; k++) { + struct cell *c = &cells[k]; + struct cell *rec_begin = NULL, *rec_end = NULL; + space_rebuild_recycle_rec(s, c, &rec_begin, &rec_end); + if (rec_begin != NULL) space_recycle_list(s, rec_begin, rec_end); + c->sorts = NULL; + c->nr_tasks = 0; + c->density = NULL; + c->gradient = NULL; + c->force = NULL; + c->grav = NULL; + c->dx_max = 0.0f; + c->sorted = 0; + c->count = 0; + c->gcount = 0; + c->init = NULL; + c->extra_ghost = NULL; + c->ghost = NULL; + c->kick = NULL; + c->cooling = NULL; + c->sourceterms = NULL; + c->super = c; + if (c->sort != NULL) { + free(c->sort); + c->sort = NULL; + } +#if WITH_MPI + c->recv_xv = NULL; + c->recv_rho = NULL; + c->recv_gradient = NULL; + c->recv_ti = NULL; + + c->send_xv = NULL; + c->send_rho = NULL; + c->send_gradient = NULL; + c->send_ti = NULL; +#endif + } +} + /** * @brief Re-build the top-level cell grid. * @@ -297,10 +347,8 @@ void space_regrid(struct space *s, int verbose) { /* Free the old cells, if they were allocated. */ if (s->cells_top != NULL) { - for (int k = 0; k < s->nr_cells; k++) { - space_rebuild_recycle(s, &s->cells_top[k]); - if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort); - } + threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper, + s->cells_top, s->nr_cells, sizeof(struct cell), 100, s); free(s->cells_top); s->maxdepth = 0; } @@ -388,42 +436,12 @@ void space_regrid(struct space *s, int verbose) { // message( "rebuilding upper-level cells took %.3f %s." , // clocks_from_ticks(double)(getticks() - tic), clocks_getunit()); - } /* re-build upper-level cells? */ - + } /* re-build upper-level cells? */ else { /* Otherwise, just clean up the cells. */ /* Free the old cells, if they were allocated. */ - for (int k = 0; k < s->nr_cells; k++) { - space_rebuild_recycle(s, &s->cells_top[k]); - s->cells_top[k].sorts = NULL; - s->cells_top[k].nr_tasks = 0; - s->cells_top[k].density = NULL; - s->cells_top[k].gradient = NULL; - s->cells_top[k].force = NULL; - s->cells_top[k].grav = NULL; - s->cells_top[k].dx_max = 0.0f; - s->cells_top[k].sorted = 0; - s->cells_top[k].count = 0; - s->cells_top[k].gcount = 0; - s->cells_top[k].init = NULL; - s->cells_top[k].extra_ghost = NULL; - s->cells_top[k].ghost = NULL; - s->cells_top[k].kick = NULL; - s->cells_top[k].cooling = NULL; - s->cells_top[k].sourceterms = NULL; - s->cells_top[k].super = &s->cells_top[k]; -#if WITH_MPI - s->cells_top[k].recv_xv = NULL; - s->cells_top[k].recv_rho = NULL; - s->cells_top[k].recv_gradient = NULL; - s->cells_top[k].recv_ti = NULL; - - s->cells_top[k].send_xv = NULL; - s->cells_top[k].send_rho = NULL; - s->cells_top[k].send_gradient = NULL; - s->cells_top[k].send_ti = NULL; -#endif - } + threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper, + s->cells_top, s->nr_cells, sizeof(struct cell), 100, s); s->maxdepth = 0; } @@ -472,7 +490,6 @@ void space_rebuild(struct space *s, int verbose) { space_gparts_get_cell_index(s, gind, cells_top, verbose); #ifdef WITH_MPI - /* Move non-local parts to the end of the list. */ const int local_nodeID = s->e->nodeID; for (size_t k = 0; k < nr_parts;) { @@ -1606,24 +1623,22 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { } /** - * @brief Return a used cell to the buffer od unused sub-cells. + * @brief Return a used cell to the buffer of unused sub-cells. * * @param s The #space. * @param c The #cell. */ void space_recycle(struct space *s, struct cell *c) { - /* Lock the space. */ - lock_lock(&s->lock); - /* Clear the cell. */ - if (lock_destroy(&c->lock) != 0) error("Failed to destroy spinlock."); + if (lock_destroy(&c->lock) != 0 || lock_destroy(&c->glock) != 0) + error("Failed to destroy spinlock."); /* Clear this cell's sort arrays. */ if (c->sort != NULL) free(c->sort); - /* Clear the cell data. */ - bzero(c, sizeof(struct cell)); + /* Lock the space. */ + lock_lock(&s->lock); /* Hook this cell into the buffer. */ c->next = s->cells_sub; @@ -1633,6 +1648,47 @@ void space_recycle(struct space *s, struct cell *c) { /* Unlock the space. */ lock_unlock_blind(&s->lock); } + +/** + * @brief Return a list of used cells to the buffer of unused sub-cells. + * + * @param s The #space. + * @param list_begin Pointer to the first #cell in the linked list of + * cells joined by their @c next pointers. + * @param list_end Pointer to the last #cell in the linked list of + * cells joined by their @c next pointers. It is assumed that this + * cell's @c next pointer is @c NULL. + */ +void space_recycle_list(struct space *s, struct cell *list_begin, + struct cell *list_end) { + + int count = 0; + + /* Clean up the list of cells. */ + for (struct cell *c = list_begin; c != NULL; c = c->next) { + /* Clear the cell. */ + if (lock_destroy(&c->lock) != 0 || lock_destroy(&c->glock) != 0) + error("Failed to destroy spinlock."); + + /* Clear this cell's sort arrays. */ + if (c->sort != NULL) free(c->sort); + + /* Count this cell. */ + count += 1; + } + + /* Lock the space. */ + lock_lock(&s->lock); + + /* Hook this cell into the buffer. */ + list_end->next = s->cells_sub; + s->cells_sub = list_begin; + s->tot_cells -= count; + + /* Unlock the space. */ + lock_unlock_blind(&s->lock); +} + /** * @brief Get a new empty (sub-)#cell. * @@ -1652,9 +1708,6 @@ struct cell *space_getcell(struct space *s) { space_cellallocchunk * sizeof(struct cell)) != 0) error("Failed to allocate more cells."); - /* Zero everything for good measure */ - bzero(s->cells_sub, space_cellallocchunk * sizeof(struct cell)); - /* Constructed a linked list */ for (int k = 0; k < space_cellallocchunk - 1; k++) s->cells_sub[k].next = &s->cells_sub[k + 1]; diff --git a/src/space.h b/src/space.h index 53cf2d0c8fa548ae19aa7452abb38c3e3e028165..4aea2a07560865c8d8a474f069b370748e12e65e 100644 --- a/src/space.h +++ b/src/space.h @@ -171,6 +171,8 @@ void space_gparts_sort_mapper(void *map_data, int num_elements, void *extra_data); void space_rebuild(struct space *s, int verbose); void space_recycle(struct space *s, struct cell *c); +void space_recycle_list(struct space *s, struct cell *list_begin, + struct cell *list_end); void space_split(struct space *s, struct cell *cells, int nr_cells, int verbose); void space_split_mapper(void *map_data, int num_elements, void *extra_data); diff --git a/src/swift.h b/src/swift.h index 1e3e0f2cf88d7307d19f36d42c59f692f282b98c..6c2bcf1811c336b7d5dd2b838b4a4f518ba34ebb 100644 --- a/src/swift.h +++ b/src/swift.h @@ -45,6 +45,7 @@ #include "partition.h" #include "physical_constants.h" #include "potential.h" +#include "profiler.h" #include "queue.h" #include "runner.h" #include "scheduler.h" diff --git a/src/threadpool.c b/src/threadpool.c index 6bc887d96cb72804f0fbc8e2801a6522bf27f947..c11fd8121bb02f36fce1796d79a7eb55a38102c4 100644 --- a/src/threadpool.c +++ b/src/threadpool.c @@ -91,6 +91,10 @@ void threadpool_init(struct threadpool *tp, int num_threads) { tp->num_threads = num_threads; tp->num_threads_waiting = 0; + /* If there is only a single thread, do nothing more as of here as + we will just do work in the (blocked) calling thread. */ + if (num_threads == 1) return; + /* Init the threadpool mutexes. */ if (pthread_mutex_init(&tp->thread_mutex, NULL) != 0) error("Failed to initialize mutexex."); @@ -144,6 +148,12 @@ void threadpool_map(struct threadpool *tp, threadpool_map_function map_function, void *map_data, size_t N, int stride, int chunk, void *extra_data) { + /* If we just have a single thread, call the map function directly. */ + if (tp->num_threads == 1) { + map_function(map_data, N, extra_data); + return; + } + /* Set the map data and signal the threads. */ pthread_mutex_lock(&tp->thread_mutex); tp->map_data_stride = stride; diff --git a/tests/Makefile.am b/tests/Makefile.am index 136b7ad231947574a5459298e7fb85902028a3f4..1250835853f7521c069f3978f920cabd8f03540b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -25,7 +25,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh \ testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT \ testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \ - testMatrixInversion testThreadpool + testMatrixInversion testThreadpool testDump # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ @@ -33,7 +33,7 @@ check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testKernel testKernelGrav testFFT testInteractions testMaths \ testSymmetry testThreadpool \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ - testRiemannHLLC testMatrixInversion + testRiemannHLLC testMatrixInversion testDump # Sources for the individual programs testGreetings_SOURCES = testGreetings.c @@ -78,6 +78,8 @@ testMatrixInversion_SOURCES = testMatrixInversion.c testThreadpool_SOURCES = testThreadpool.c +testDump_SOURCES = testDump.c + # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ test27cells.sh test27cellsPerturbed.sh testParser.sh \ diff --git a/tests/testDump.c b/tests/testDump.c new file mode 100644 index 0000000000000000000000000000000000000000..5f46d30a4eeb3d936563a3983b89c3d46ecd4a06 --- /dev/null +++ b/tests/testDump.c @@ -0,0 +1,84 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +/* This object's header. */ +#include "../src/dump.h" + +/* Local headers. */ +#include "../src/threadpool.h" + +void dump_mapper(void *map_data, int num_elements, void *extra_data) { + struct dump *d = (struct dump *)extra_data; + size_t offset; + char *out_string = dump_get(d, 7, &offset); + char out_buff[8]; + snprintf(out_buff, 8, "%06zi\n", offset / 7); + memcpy(out_string, out_buff, 7); +} + +int main(int argc, char *argv[]) { + + /* Some constants. */ + const int num_threads = 4; + const char *filename = "/tmp/dump_test.out"; + const int num_runs = 20; + const int chunk_size = 1000; + + /* Prepare a threadpool to write to the dump. */ + struct threadpool t; + threadpool_init(&t, num_threads); + + /* Prepare a dump. */ + struct dump d; + dump_init(&d, filename, 1024); + + /* Dump numbers in chunks. */ + for (int run = 0; run < num_runs; run++) { + + /* Ensure capacity. */ + dump_ensure(&d, 7 * chunk_size); + + /* Dump a few numbers. */ + printf("dumping %i chunks...\n", chunk_size); + fflush(stdout); + threadpool_map(&t, dump_mapper, NULL, chunk_size, 0, 1, &d); + } + + /* Sync the file, not necessary before dump_close, but just to test this. */ + dump_sync(&d); + + /* Finalize the dump. */ + dump_close(&d); + + /* Return a happy number. */ + return 0; +}