Commit ca7b098d authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'master' into particle-caching

parents d4e7f8b5 b362c1a7
......@@ -351,7 +351,7 @@ AC_ARG_WITH([tcmalloc],
[with_tcmalloc="no"]
)
if test "x$with_tcmalloc" != "xno"; then
if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then
if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then
tclibs="-L$with_tcmalloc -ltcmalloc"
else
tclibs="-ltcmalloc"
......@@ -361,7 +361,7 @@ if test "x$with_tcmalloc" != "xno"; then
# Could just have the minimal version.
if test "$have_tcmalloc" = "no"; then
if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then
if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then
tclibs="-L$with_tcmalloc -ltcmalloc_minimal"
else
tclibs="-ltcmalloc_minimal"
......@@ -394,7 +394,7 @@ AC_ARG_WITH([profiler],
[with_profiler="yes"]
)
if test "x$with_profiler" != "xno"; then
if test "x$with_profiler" != "xyes" && test "x$with_profiler" != "x"; then
if test "x$with_profiler" != "xyes" -a "x$with_profiler" != "x"; then
proflibs="-L$with_profiler -lprofiler"
else
proflibs="-lprofiler"
......@@ -411,6 +411,38 @@ fi
AC_SUBST([PROFILER_LIBS])
AM_CONDITIONAL([HAVEPROFILER],[test -n "$PROFILER_LIBS"])
# Check for jemalloc another fast malloc that is good with contention.
have_jemalloc="no"
AC_ARG_WITH([jemalloc],
[AS_HELP_STRING([--with-jemalloc],
[use jemalloc library or specify the directory with lib @<:@yes/no@:>@]
)],
[with_jemalloc="$withval"],
[with_jemalloc="no"]
)
if test "x$with_jemalloc" != "xno"; then
if test "x$with_jemalloc" != "xyes" -a "x$with_jemalloc" != "x"; then
jelibs="-L$with_jemalloc -ljemalloc"
else
jelibs="-ljemalloc"
fi
AC_CHECK_LIB([jemalloc],[malloc_usable_size],[have_jemalloc="yes"],[have_jemalloc="no"],
$jelibs)
if test "$have_jemalloc" = "yes"; then
JEMALLOC_LIBS="$jelibs"
else
JEMALLOC_LIBS=""
fi
fi
AC_SUBST([JEMALLOC_LIBS])
AM_CONDITIONAL([HAVEJEMALLOC],[test -n "$JEMALLOC_LIBS"])
# Don't allow both tcmalloc and jemalloc.
if test "x$have_tcmalloc" != "xno" -a "x$have_jemalloc" != "xno"; then
AC_MSG_ERROR([Cannot use tcmalloc at same time as jemalloc])
fi
# Check for HDF5. This is required.
AX_LIB_HDF5
......@@ -781,6 +813,7 @@ AC_MSG_RESULT([
FFTW3 enabled : $have_fftw3
libNUMA enabled : $have_numa
Using tcmalloc : $have_tcmalloc
Using jemalloc : $have_jemalloc
CPU profiler : $have_profiler
Hydro scheme : $with_hydro
......
......@@ -24,7 +24,7 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS)
AM_LDFLAGS = $(HDF5_LDFLAGS)
# Extra libraries.
EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS)
EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS)
# MPI libraries.
MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
......
......@@ -45,6 +45,9 @@
#define ENGINE_POLICY engine_policy_none
#endif
/* Global profiler. */
struct profiler prof;
/**
* @brief Help messages for the command line parameters.
*/
......
......@@ -25,7 +25,7 @@ AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0
GIT_CMD = @GIT_CMD@
# Additional dependencies for shared libraries.
EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS)
EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS)
# MPI libraries.
MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
......@@ -44,7 +44,8 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \
physical_constants.h physical_constants_cgs.h potential.h version.h \
hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \
sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h
sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \
dump.h
# Common source files
AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
......@@ -53,7 +54,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
physical_constants.c potential.c hydro_properties.c \
runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \
statistics.c runner_doiact_vec.c
statistics.c runner_doiact_vec.c profiler.c dump.c
# Include files for distribution, not installation.
nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Some standard headers. */
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
/* This object's header. */
#include "dump.h"
/* Local headers. */
#include "atomic.h"
#include "error.h"
/**
* @brief Obtain a chunk of memory from a dump.
*
* @param d The #dump.
* @param count The number of bytes requested.
* @param offset The offset of the returned memory address within the dump file.
* @return A pointer to the memory-mapped chunk of data.
*/
void *dump_get(struct dump *d, size_t count, size_t *offset) {
size_t local_offset = atomic_add(&d->count, count);
*offset = local_offset + d->file_offset;
return (char *)d->data + local_offset;
}
/**
* @brief Ensure that at least size bytes are available in the #dump.
*/
void dump_ensure(struct dump *d, size_t size) {
/* If we have enough space already, just bail. */
if (d->size - d->count > size) return;
/* Unmap the current data. */
size_t trunc_count = d->count & d->page_mask;
if (munmap(d->data, trunc_count > 0 ? trunc_count : 1) != 0) {
error("Failed to unmap %zi bytes of dump data (%s).", trunc_count,
strerror(errno));
}
/* Update the size and count. */
d->file_offset += trunc_count;
d->count -= trunc_count;
d->size = (size * dump_grow_ensure_factor + ~d->page_mask) & d->page_mask;
/* Re-allocate the file size. */
if (posix_fallocate(d->fd, d->file_offset, d->size) != 0) {
error("Failed to pre-allocate the dump file.");
}
/* Re-map starting at the end of the file. */
if ((d->data = mmap(NULL, d->size, PROT_WRITE, MAP_SHARED, d->fd,
d->file_offset)) == MAP_FAILED) {
error("Failed to allocate map of size %zi bytes (%s).", d->size,
strerror(errno));
}
}
/**
* @brief Flush the #dump to disk.
*/
void dump_sync(struct dump *d) {
if (msync(d->data, d->count, MS_SYNC) != 0)
error("Failed to sync memory-mapped data.");
}
/**
* @brief Finalize the #dump.
*/
void dump_close(struct dump *d) {
/* Unmap the data in memory. */
if (munmap(d->data, d->count) != 0) {
error("Failed to unmap dump data (%s).", strerror(errno));
}
/* Truncate the file to the correct length. */
if (ftruncate(d->fd, d->file_offset + d->count) != 0) {
error("Failed to truncate dump file (%s).", strerror(errno));
}
/* Close the memory-mapped file. */
if (close(d->fd) != 0) error("Failed to close memory-mapped file.");
}
/**
* @brief Initialize a file dump.
*
* @param d The #dump to initialize.
* @param filename The fully qualified name of the file in which to dump,
* note that it will be overwritten.
* @param size The initial buffer size for this #dump.
*/
void dump_init(struct dump *d, const char *filename, size_t size) {
/* Create the output file. */
if ((d->fd = open(filename, O_CREAT | O_RDWR, 0660)) == -1) {
error("Failed to create dump file '%s' (%s).", filename, strerror(errno));
}
/* Adjust the size to be at least the page size. */
const size_t page_mask = ~(sysconf(_SC_PAGE_SIZE) - 1);
size = (size + ~page_mask) & page_mask;
/* Pre-allocate the file size. */
if (posix_fallocate(d->fd, 0, size) != 0) {
error("Failed to pre-allocate the dump file.");
}
/* Map memory to the created file. */
if ((d->data = mmap(NULL, size, PROT_WRITE, MAP_SHARED, d->fd, 0)) ==
MAP_FAILED) {
error("Failed to allocate map of size %zi bytes (%s).", size,
strerror(errno));
}
/* Init some counters. */
d->size = size;
d->count = 0;
d->file_offset = 0;
d->page_mask = page_mask;
}
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_DUMP_H
#define SWIFT_DUMP_H
/* Includes. */
#include "lock.h"
/* Some constants. */
#define dump_grow_ensure_factor 10
/** The dump struct. */
struct dump {
/* The memory-mapped data of this dump. */
void *data;
/* The size of the memory-mapped data, in bytes. */
size_t size;
/* The number of bytes that have been dumped. */
size_t count;
/* The offset of the data within the current file. */
size_t file_offset;
/* The file with which this memory is associated. */
int fd;
/* Mask containing the significant bits for page addresses. */
size_t page_mask;
};
/* Function prototypes. */
void dump_init(struct dump *d, const char *filename, size_t size);
void dump_ensure(struct dump *d, size_t size);
void dump_sync(struct dump *d);
void dump_close(struct dump *d);
void *dump_get(struct dump *d, size_t count, size_t *offset);
#endif /* SWIFT_DUMP_H */
......@@ -59,6 +59,7 @@
#include "parallel_io.h"
#include "part.h"
#include "partition.h"
#include "profiler.h"
#include "proxy.h"
#include "runner.h"
#include "serial_io.h"
......@@ -320,6 +321,23 @@ void engine_redistribute(struct engine *e) {
MPI_COMM_WORLD) != MPI_SUCCESS)
error("Failed to allreduce particle transfer counts.");
/* Report how many particles will be moved. */
if (e->verbose) {
if (e->nodeID == 0) {
size_t total = 0;
size_t unmoved = 0;
for (int p = 0, r = 0; p < nr_nodes; p++) {
for (int s = 0; s < nr_nodes; s++) {
total += counts[r];
if (p == s) unmoved += counts[r];
r++;
}
}
message("%ld of %ld (%.2f%%) of particles moved", total - unmoved, total,
100.0 * (double)(total - unmoved) / (double)total);
}
}
/* Get all the g_counts from all the nodes. */
if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
......@@ -3408,7 +3426,7 @@ void engine_print_policy(struct engine *e) {
#else
printf("%s engine_policy: engine policies are [ ",
clocks_get_timesincestart());
for (int k = 1; k < 32; k++)
for (int k = 1; k < 31; k++)
if (e->policy & (1 << k)) printf(" %s ", engine_policy_names[k + 1]);
printf(" ]\n");
fflush(stdout);
......
......@@ -66,7 +66,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
__attribute__((always_inline)) INLINE static float hydro_get_pressure(
const struct part *restrict p, float dt) {
return p->force.pressure;
const float u = p->u + p->u_dt * dt;
return gas_pressure_from_internal_energy(p->rho, u);
}
/**
......
......@@ -278,6 +278,18 @@ static void split_metis(struct space *s, int nregions, int *celllist) {
#endif
#if defined(WITH_MPI) && defined(HAVE_METIS)
/* qsort support. */
struct indexval {
int index;
int count;
};
static int indexvalcmp(const void *p1, const void *p2) {
const struct indexval *iv1 = (const struct indexval *)p1;
const struct indexval *iv2 = (const struct indexval *)p2;
return iv2->count - iv1->count;
}
/**
* @brief Partition the given space into a number of connected regions.
*
......@@ -382,14 +394,70 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew,
if (regionid[k] < 0 || regionid[k] >= nregions)
error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k);
/* We want a solution in which the current regions of the space are
* preserved when possible, to avoid unneccesary particle movement.
* So create a 2d-array of cells counts that are common to all pairs
* of old and new ranks. Each element of the array has a cell count and
* an unique index so we can sort into decreasing counts. */
int indmax = nregions * nregions;
struct indexval *ivs = malloc(sizeof(struct indexval) * indmax);
bzero(ivs, sizeof(struct indexval) * indmax);
for (int k = 0; k < ncells; k++) {
int index = regionid[k] + nregions * s->cells_top[k].nodeID;
ivs[index].count++;
ivs[index].index = index;
}
qsort(ivs, indmax, sizeof(struct indexval), indexvalcmp);
/* Go through the ivs using the largest counts first, these are the
* regions with the most cells in common, old partition to new. */
int *oldmap = malloc(sizeof(int) * nregions);
int *newmap = malloc(sizeof(int) * nregions);
for (int k = 0; k < nregions; k++) {
oldmap[k] = -1;
newmap[k] = -1;
}
for (int k = 0; k < indmax; k++) {
/* Stop when all regions with common cells have been considered. */
if (ivs[k].count == 0) break;
/* Store old and new IDs, if not already used. */
int oldregion = ivs[k].index / nregions;
int newregion = ivs[k].index - oldregion * nregions;
if (newmap[newregion] == -1 && oldmap[oldregion] == -1) {
newmap[newregion] = oldregion;
oldmap[oldregion] = newregion;
}
}
/* Handle any regions that did not get selected by picking an unused rank
* from oldmap and assigning to newmap. */
int spare = 0;
for (int k = 0; k < nregions; k++) {
if (newmap[k] == -1) {
for (int j = spare; j < nregions; j++) {
if (oldmap[j] == -1) {
newmap[k] = j;
oldmap[j] = j;
spare = j;
break;
}
}
}
}
/* Set the cell list to the region index. */
for (int k = 0; k < ncells; k++) {
celllist[k] = regionid[k];
celllist[k] = newmap[regionid[k]];
}
/* Clean up. */
if (weights_v != NULL) free(weights_v);
if (weights_e != NULL) free(weights_e);
free(ivs);
free(oldmap);
free(newmap);
free(xadj);
free(adjncy);
free(regionid);
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 James S. Willis (james.s.willis@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Some standard headers. */
#include <string.h>
/* This object's header. */
#include "profiler.h"
/* Local includes */
#include "clocks.h"
#include "hydro.h"
#include "version.h"
/**
* @brief Resets all timers.
*
* @param profiler #profiler object that holds file pointers and
* function timers.
*/
void profiler_reset_timers(struct profiler *profiler) {
profiler->collect_timesteps_time = 0;
profiler->drift_time = 0;
profiler->rebuild_time = 0;
profiler->reweight_time = 0;
profiler->clear_waits_time = 0;
profiler->re_wait_time = 0;
profiler->enqueue_time = 0;
profiler->stats_time = 0;
profiler->launch_time = 0;
profiler->space_rebuild_time = 0;
profiler->engine_maketasks_time = 0;
profiler->engine_marktasks_time = 0;
profiler->space_regrid_time = 0;
profiler->space_parts_sort_time = 0;
profiler->space_split_time = 0;
profiler->space_parts_get_cell_id_time = 0;
profiler->space_count_parts_time = 0;
}
/**
* @brief Opens an output file and populates the header.
*
* @param e #engine object to get various properties.
* @param fileName name of file to be written to.
* @param functionName name of function that is being timed.
* @param file (return) pointer used to open output file.
*/
void profiler_write_timing_info_header(const struct engine *e, char *fileName,
char *functionName, FILE **file) {
/* Create the file name in the format: "fileName_(no. of threads)" */
char fullFileName[200] = "";
sprintf(fullFileName + strlen(fullFileName), "%s_%d.txt", fileName,
e->nr_nodes * e->nr_threads);
/* Open the file and write the header. */
*file = fopen(fullFileName, "w");
fprintf(*file,
"# Host: %s\n# Branch: %s\n# Revision: %s\n# Compiler: %s, "
"Version: %s \n# "
"Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic "
"scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f "
"+/- %.2f\n# Eta: %f\n"
"# %6s %14s %14s %10s %10s %16s [%s]\n",
hostname(), functionName, git_revision(), compiler_name(),
compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION,
kernel_name, e->hydro_properties->target_neighbours,
e->hydro_properties->delta_neighbours,
e->hydro_properties->eta_neighbours, "Step", "Time", "Time-step",
"Updates", "g-Updates", "Wall-clock time", clocks_getunit());
fflush(*file);
}
/**
* @brief Writes the headers for all output files. Should be called once at the
* start of the simulation, it could be called in engine_init() for example.
*
* @param e #engine object to get various properties.
* @param profiler #profiler object that holds file pointers and
* function timers.
*/
void profiler_write_all_timing_info_headers(const struct engine *e,
struct profiler *profiler) {
profiler_write_timing_info_header(e, "enginecollecttimesteps",
"engine_collect_timesteps",
&profiler->file_engine_collect_timesteps);
profiler_write_timing_info_header(e, "enginedrift", "engine_drift",
&profiler->file_engine_drift);
profiler_write_timing_info_header(e, "enginerebuild", "engine_rebuild",
&profiler->file_engine_rebuild);
profiler_write_timing_info_header(e, "schedulerreweight",
"scheduler_reweight",
&profiler->file_scheduler_reweight);
profiler_write_timing_info_header(e, "schedulerclearwaits",
"scheduler_clear_waits",
&profiler->file_scheduler_clear_waits);
profiler_write_timing_info_header(e, "schedulerrewait", "scheduler_rewait",
&profiler->file_scheduler_re_wait);
profiler_write_timing_info_header(e, "schedulerenqueue", "scheduler_enqueue",
&profiler->file_scheduler_enqueue);
profiler_write_timing_info_header(e, "engineprintstats", "engine_print_stats",
&profiler->file_engine_stats);
profiler_write_timing_info_header(e, "enginelaunch", "engine_launch",
&profiler->file_engine_launch);
profiler_write_timing_info_header(e, "spacerebuild", "space_rebuild",
&profiler->file_space_rebuild);
profiler_write_timing_info_header(e, "enginemaketasks", "engine_maketasks",
&profiler->file_engine_maketasks);
profiler_write_timing_info_header(e, "enginemarktasks", "engine_marktasks",
&profiler->file_engine_marktasks);
profiler_write_timing_info_header(e, "spaceregrid", "space_regrid",
&profiler->file_space_regrid);
profiler_write_timing_info_header(e, "spacepartssort", "space_parts_sort",
&profiler->file_space_parts_sort);
profiler_write_timing_info_header(e, "spacesplit", "space_split",
&profiler->file_space_split);
profiler_write_timing_info_header(e, "spacegetcellid", "space_get_cell_id",
&profiler->file_space_parts_get_cell_id);
profiler_write_timing_info_header(e, "spacecountparts", "space_count_parts",
&profiler->file_space_count_parts);
}
/**
* @brief Writes timing info to the output file.
*
* @param e #engine object to get various properties.
* @param time Time in ticks to be written to the output file.
* @param file pointer used to open output file.
*/
void profiler_write_timing_info(const struct engine *e, ticks time,
FILE *file) {