Commit bdfd6679 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'master' into drift_on_demand

parents 6676d261 2ab1e4d0
......@@ -45,6 +45,9 @@
#define ENGINE_POLICY engine_policy_none
#endif
/* Global profiler. */
struct profiler prof;
/**
* @brief Help messages for the command line parameters.
*/
......
......@@ -44,8 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \
physical_constants.h physical_constants_cgs.h potential.h version.h \
hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \
sourceterms_struct.h statistics.h memswap.h
sourceterms_struct.h statistics.h memswap.h profiler.h
# Common source files
AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
......@@ -54,7 +53,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
physical_constants.c potential.c hydro_properties.c \
runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \
statistics.c
statistics.c profiler.c
# Include files for distribution, not installation.
nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
......
......@@ -59,6 +59,7 @@
#include "parallel_io.h"
#include "part.h"
#include "partition.h"
#include "profiler.h"
#include "proxy.h"
#include "runner.h"
#include "serial_io.h"
......@@ -322,6 +323,23 @@ void engine_redistribute(struct engine *e) {
MPI_COMM_WORLD) != MPI_SUCCESS)
error("Failed to allreduce particle transfer counts.");
/* Report how many particles will be moved. */
if (e->verbose) {
if (e->nodeID == 0) {
size_t total = 0;
size_t unmoved = 0;
for (int p = 0, r = 0; p < nr_nodes; p++) {
for (int s = 0; s < nr_nodes; s++) {
total += counts[r];
if (p == s) unmoved += counts[r];
r++;
}
}
message("%ld of %ld (%.2f%%) of particles moved", total - unmoved, total,
100.0 * (double)(total - unmoved) / (double)total);
}
}
/* Get all the g_counts from all the nodes. */
if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
......
......@@ -66,7 +66,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy(
__attribute__((always_inline)) INLINE static float hydro_get_pressure(
const struct part *restrict p, float dt) {
return p->force.pressure;
const float u = p->u + p->u_dt * dt;
return gas_pressure_from_internal_energy(p->rho, u);
}
/**
......
......@@ -278,6 +278,18 @@ static void split_metis(struct space *s, int nregions, int *celllist) {
#endif
#if defined(WITH_MPI) && defined(HAVE_METIS)
/* qsort support. */
struct indexval {
int index;
int count;
};
static int indexvalcmp(const void *p1, const void *p2) {
const struct indexval *iv1 = (const struct indexval *)p1;
const struct indexval *iv2 = (const struct indexval *)p2;
return iv2->count - iv1->count;
}
/**
* @brief Partition the given space into a number of connected regions.
*
......@@ -382,14 +394,70 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew,
if (regionid[k] < 0 || regionid[k] >= nregions)
error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k);
/* We want a solution in which the current regions of the space are
* preserved when possible, to avoid unneccesary particle movement.
* So create a 2d-array of cells counts that are common to all pairs
* of old and new ranks. Each element of the array has a cell count and
* an unique index so we can sort into decreasing counts. */
int indmax = nregions * nregions;
struct indexval *ivs = malloc(sizeof(struct indexval) * indmax);
bzero(ivs, sizeof(struct indexval) * indmax);
for (int k = 0; k < ncells; k++) {
int index = regionid[k] + nregions * s->cells_top[k].nodeID;
ivs[index].count++;
ivs[index].index = index;
}
qsort(ivs, indmax, sizeof(struct indexval), indexvalcmp);
/* Go through the ivs using the largest counts first, these are the
* regions with the most cells in common, old partition to new. */
int *oldmap = malloc(sizeof(int) * nregions);
int *newmap = malloc(sizeof(int) * nregions);
for (int k = 0; k < nregions; k++) {
oldmap[k] = -1;
newmap[k] = -1;
}
for (int k = 0; k < indmax; k++) {
/* Stop when all regions with common cells have been considered. */
if (ivs[k].count == 0) break;
/* Store old and new IDs, if not already used. */
int oldregion = ivs[k].index / nregions;
int newregion = ivs[k].index - oldregion * nregions;
if (newmap[newregion] == -1 && oldmap[oldregion] == -1) {
newmap[newregion] = oldregion;
oldmap[oldregion] = newregion;
}
}
/* Handle any regions that did not get selected by picking an unused rank
* from oldmap and assigning to newmap. */
int spare = 0;
for (int k = 0; k < nregions; k++) {
if (newmap[k] == -1) {
for (int j = spare; j < nregions; j++) {
if (oldmap[j] == -1) {
newmap[k] = j;
oldmap[j] = j;
spare = j;
break;
}
}
}
}
/* Set the cell list to the region index. */
for (int k = 0; k < ncells; k++) {
celllist[k] = regionid[k];
celllist[k] = newmap[regionid[k]];
}
/* Clean up. */
if (weights_v != NULL) free(weights_v);
if (weights_e != NULL) free(weights_e);
free(ivs);
free(oldmap);
free(newmap);
free(xadj);
free(adjncy);
free(regionid);
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 James S. Willis (james.s.willis@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Some standard headers. */
#include <string.h>
/* This object's header. */
#include "profiler.h"
/* Local includes */
#include "clocks.h"
#include "hydro.h"
#include "version.h"
/**
* @brief Resets all timers.
*
* @param profiler #profiler object that holds file pointers and
* function timers.
*/
void profiler_reset_timers(struct profiler *profiler) {
profiler->collect_timesteps_time = 0;
profiler->drift_time = 0;
profiler->rebuild_time = 0;
profiler->reweight_time = 0;
profiler->clear_waits_time = 0;
profiler->re_wait_time = 0;
profiler->enqueue_time = 0;
profiler->stats_time = 0;
profiler->launch_time = 0;
profiler->space_rebuild_time = 0;
profiler->engine_maketasks_time = 0;
profiler->engine_marktasks_time = 0;
profiler->space_regrid_time = 0;
profiler->space_parts_sort_time = 0;
profiler->space_split_time = 0;
profiler->space_parts_get_cell_id_time = 0;
profiler->space_count_parts_time = 0;
}
/**
* @brief Opens an output file and populates the header.
*
* @param e #engine object to get various properties.
* @param fileName name of file to be written to.
* @param functionName name of function that is being timed.
* @param file (return) pointer used to open output file.
*/
void profiler_write_timing_info_header(const struct engine *e, char *fileName,
char *functionName, FILE **file) {
/* Create the file name in the format: "fileName_(no. of threads)" */
char fullFileName[200] = "";
sprintf(fullFileName + strlen(fullFileName), "%s_%d.txt", fileName,
e->nr_nodes * e->nr_threads);
/* Open the file and write the header. */
*file = fopen(fullFileName, "w");
fprintf(*file,
"# Host: %s\n# Branch: %s\n# Revision: %s\n# Compiler: %s, "
"Version: %s \n# "
"Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic "
"scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f "
"+/- %.2f\n# Eta: %f\n"
"# %6s %14s %14s %10s %10s %16s [%s]\n",
hostname(), functionName, git_revision(), compiler_name(),
compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION,
kernel_name, e->hydro_properties->target_neighbours,
e->hydro_properties->delta_neighbours,
e->hydro_properties->eta_neighbours, "Step", "Time", "Time-step",
"Updates", "g-Updates", "Wall-clock time", clocks_getunit());
fflush(*file);
}
/**
* @brief Writes the headers for all output files. Should be called once at the
* start of the simulation, it could be called in engine_init() for example.
*
* @param e #engine object to get various properties.
* @param profiler #profiler object that holds file pointers and
* function timers.
*/
void profiler_write_all_timing_info_headers(const struct engine *e,
struct profiler *profiler) {
profiler_write_timing_info_header(e, "enginecollecttimesteps",
"engine_collect_timesteps",
&profiler->file_engine_collect_timesteps);
profiler_write_timing_info_header(e, "enginedrift", "engine_drift",
&profiler->file_engine_drift);
profiler_write_timing_info_header(e, "enginerebuild", "engine_rebuild",
&profiler->file_engine_rebuild);
profiler_write_timing_info_header(e, "schedulerreweight",
"scheduler_reweight",
&profiler->file_scheduler_reweight);
profiler_write_timing_info_header(e, "schedulerclearwaits",
"scheduler_clear_waits",
&profiler->file_scheduler_clear_waits);
profiler_write_timing_info_header(e, "schedulerrewait", "scheduler_rewait",
&profiler->file_scheduler_re_wait);
profiler_write_timing_info_header(e, "schedulerenqueue", "scheduler_enqueue",
&profiler->file_scheduler_enqueue);
profiler_write_timing_info_header(e, "engineprintstats", "engine_print_stats",
&profiler->file_engine_stats);
profiler_write_timing_info_header(e, "enginelaunch", "engine_launch",
&profiler->file_engine_launch);
profiler_write_timing_info_header(e, "spacerebuild", "space_rebuild",
&profiler->file_space_rebuild);
profiler_write_timing_info_header(e, "enginemaketasks", "engine_maketasks",
&profiler->file_engine_maketasks);
profiler_write_timing_info_header(e, "enginemarktasks", "engine_marktasks",
&profiler->file_engine_marktasks);
profiler_write_timing_info_header(e, "spaceregrid", "space_regrid",
&profiler->file_space_regrid);
profiler_write_timing_info_header(e, "spacepartssort", "space_parts_sort",
&profiler->file_space_parts_sort);
profiler_write_timing_info_header(e, "spacesplit", "space_split",
&profiler->file_space_split);
profiler_write_timing_info_header(e, "spacegetcellid", "space_get_cell_id",
&profiler->file_space_parts_get_cell_id);
profiler_write_timing_info_header(e, "spacecountparts", "space_count_parts",
&profiler->file_space_count_parts);
}
/**
* @brief Writes timing info to the output file.
*
* @param e #engine object to get various properties.
* @param time Time in ticks to be written to the output file.
* @param file pointer used to open output file.
*/
void profiler_write_timing_info(const struct engine *e, ticks time,
FILE *file) {
fprintf(file, " %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time,
e->timeStep, e->updates, e->g_updates, clocks_from_ticks(time));
fflush(file);
}
/**
* @brief Writes timing info to all output files. Should be called at the end of
* every time step, in engine_step() for example.
*
* @param e #engine object to get various properties.
* @param profiler #profiler object that holds file pointers and
* function timers.
*/
void profiler_write_all_timing_info(const struct engine *e,
struct profiler *profiler) {
profiler_write_timing_info(e, profiler->drift_time,
profiler->file_engine_drift);
profiler_write_timing_info(e, profiler->rebuild_time,
profiler->file_engine_rebuild);
profiler_write_timing_info(e, profiler->reweight_time,
profiler->file_scheduler_reweight);
profiler_write_timing_info(e, profiler->clear_waits_time,
profiler->file_scheduler_clear_waits);
profiler_write_timing_info(e, profiler->re_wait_time,
profiler->file_scheduler_re_wait);
profiler_write_timing_info(e, profiler->enqueue_time,
profiler->file_scheduler_enqueue);
profiler_write_timing_info(e, profiler->stats_time,
profiler->file_engine_stats);
profiler_write_timing_info(e, profiler->launch_time,
profiler->file_engine_launch);
profiler_write_timing_info(e, profiler->space_rebuild_time,
profiler->file_space_rebuild);
profiler_write_timing_info(e, profiler->engine_maketasks_time,
profiler->file_engine_maketasks);
profiler_write_timing_info(e, profiler->engine_marktasks_time,
profiler->file_engine_marktasks);
profiler_write_timing_info(e, profiler->space_regrid_time,
profiler->file_space_regrid);
profiler_write_timing_info(e, profiler->space_parts_sort_time,
profiler->file_space_parts_sort);
profiler_write_timing_info(e, profiler->space_split_time,
profiler->file_space_split);
profiler_write_timing_info(e, profiler->space_parts_get_cell_id_time,
profiler->file_space_parts_get_cell_id);
profiler_write_timing_info(e, profiler->space_count_parts_time,
profiler->file_space_count_parts);
/* Reset timers. */
profiler_reset_timers(profiler);
}
/**
* @brief Closes all output files, should be called at the end of the
* simulation.
*
* @param profiler #profiler object that holds file pointers and
* function timers.
*/
void profiler_close_files(struct profiler *profiler) {
fclose(profiler->file_engine_drift);
fclose(profiler->file_engine_rebuild);
fclose(profiler->file_scheduler_reweight);
fclose(profiler->file_scheduler_clear_waits);
fclose(profiler->file_scheduler_re_wait);
fclose(profiler->file_scheduler_enqueue);
fclose(profiler->file_engine_stats);
fclose(profiler->file_engine_launch);
fclose(profiler->file_space_rebuild);
fclose(profiler->file_engine_maketasks);
fclose(profiler->file_engine_marktasks);
fclose(profiler->file_space_regrid);
fclose(profiler->file_space_parts_sort);
fclose(profiler->file_space_split);
fclose(profiler->file_space_parts_get_cell_id);
fclose(profiler->file_space_count_parts);
}
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 James S. Willis (james.s.willis@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_PROFILER_H
#define SWIFT_PROFILER_H
/* Config parameters. */
#include "../config.h"
/* Local includes */
#include "engine.h"
/* Profiler that holds file pointers and time taken in functions. */
struct profiler {
/* File pointers for timing info. */
FILE *file_engine_collect_timesteps;
FILE *file_engine_drift;
FILE *file_engine_rebuild;
FILE *file_scheduler_reweight;
FILE *file_scheduler_clear_waits;
FILE *file_scheduler_re_wait;
FILE *file_scheduler_enqueue;
FILE *file_engine_stats;
FILE *file_engine_launch;
FILE *file_space_rebuild;
FILE *file_engine_maketasks;
FILE *file_engine_marktasks;
FILE *file_space_regrid;
FILE *file_space_parts_sort;
FILE *file_space_split;
FILE *file_space_parts_get_cell_id;
FILE *file_space_count_parts;
/* Time taken in functions. */
ticks collect_timesteps_time;
ticks drift_time;
ticks rebuild_time;
ticks reweight_time;
ticks clear_waits_time;
ticks re_wait_time;
ticks enqueue_time;
ticks stats_time;
ticks launch_time;
ticks space_rebuild_time;
ticks engine_maketasks_time;
ticks engine_marktasks_time;
ticks space_regrid_time;
ticks space_parts_sort_time;
ticks space_split_time;
ticks space_parts_get_cell_id_time;
ticks space_count_parts_time;
};
/* Function prototypes. */
void profiler_reset_timers(struct profiler *profiler);
void profiler_write_all_timing_info_headers(const struct engine *e,
struct profiler *profiler);
void profiler_write_all_timing_info(const struct engine *e,
struct profiler *profiler);
void profiler_close_files(struct profiler *profiler);
#endif /* SWIFT_PROFILER_H */
......@@ -173,18 +173,69 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
*
* @param s The #space.
* @param c The #cell to recycle.
* @param rec_begin Pointer to the start of the list of cells to recycle.
* @param rec_end Pointer to the end of the list of cells to recycle.
*/
void space_rebuild_recycle(struct space *s, struct cell *c) {
void space_rebuild_recycle_rec(struct space *s, struct cell *c,
struct cell **rec_begin, struct cell **rec_end) {
if (c->split)
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL) {
space_rebuild_recycle(s, c->progeny[k]);
space_recycle(s, c->progeny[k]);
space_rebuild_recycle_rec(s, c->progeny[k], rec_begin, rec_end);
c->progeny[k]->next = *rec_begin;
*rec_begin = c->progeny[k];
if (*rec_end == NULL) *rec_end = *rec_begin;
c->progeny[k] = NULL;
}
}
void space_rebuild_recycle_mapper(void *map_data, int num_elements,
void *extra_data) {
struct space *s = (struct space *)extra_data;
struct cell *cells = (struct cell *)map_data;
for (int k = 0; k < num_elements; k++) {
struct cell *c = &cells[k];
struct cell *rec_begin = NULL, *rec_end = NULL;
space_rebuild_recycle_rec(s, c, &rec_begin, &rec_end);
if (rec_begin != NULL) space_recycle_list(s, rec_begin, rec_end);
c->sorts = NULL;
c->nr_tasks = 0;
c->density = NULL;
c->gradient = NULL;
c->force = NULL;
c->grav = NULL;
c->dx_max = 0.0f;
c->sorted = 0;
c->count = 0;
c->gcount = 0;
c->init = NULL;
c->extra_ghost = NULL;
c->ghost = NULL;
c->kick = NULL;
c->drift = NULL;
c->cooling = NULL;
c->sourceterms = NULL;
c->super = c;
if (c->sort != NULL) {
free(c->sort);
c->sort = NULL;
}
#if WITH_MPI
c->recv_xv = NULL;
c->recv_rho = NULL;
c->recv_gradient = NULL;
c->recv_ti = NULL;
c->send_xv = NULL;
c->send_rho = NULL;
c->send_gradient = NULL;
c->send_ti = NULL;
#endif
}
}
/**
* @brief Re-build the top-level cell grid.
*
......@@ -303,10 +354,8 @@ void space_regrid(struct space *s, int verbose) {
/* Free the old cells, if they were allocated. */
if (s->cells_top != NULL) {
for (int k = 0; k < s->nr_cells; k++) {
space_rebuild_recycle(s, &s->cells_top[k]);
if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort);
}
threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
free(s->cells_top);
s->maxdepth = 0;
}
......@@ -394,44 +443,12 @@ void space_regrid(struct space *s, int verbose) {
// message( "rebuilding upper-level cells took %.3f %s." ,
// clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
} /* re-build upper-level cells? */
} /* re-build upper-level cells? */
else { /* Otherwise, just clean up the cells. */
/* Free the old cells, if they were allocated. */
for (int k = 0; k < s->nr_cells; k++) {
space_rebuild_recycle(s, &s->cells_top[k]);
s->cells_top[k].sorts = NULL;
s->cells_top[k].nr_tasks = 0;
s->cells_top[k].density = NULL;
s->cells_top[k].gradient = NULL;
s->cells_top[k].force = NULL;
s->cells_top[k].grav = NULL;
s->cells_top[k].dx_max = 0.0f;
s->cells_top[k].sorted = 0;
s->cells_top[k].count = 0;
s->cells_top[k].gcount = 0;
s->cells_top[k].init = NULL;
s->cells_top[k].extra_ghost = NULL;
s->cells_top[k].ghost = NULL;
s->cells_top[k].kick = NULL;
s->cells_top[k].drift = NULL;
s->cells_top[k].cooling = NULL;
s->cells_top[k].sourceterms = NULL;
s->cells_top[k].super = &s->cells_top[k];
s->cells_top[k].ti_old = 0;
#if WITH_MPI
s->cells_top[k].recv_xv = NULL;
s->cells_top[k].recv_rho = NULL;
s->cells_top[k].recv_gradient = NULL;
s->cells_top[k].recv_ti = NULL;
s->cells_top[k].send_xv = NULL;
s->cells_top[k].send_rho = NULL;
s->cells_top[k].send_gradient = NULL;
s->cells_top[k].send_ti = NULL;
#endif
}
threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
s->maxdepth = 0;
}
......@@ -1616,24 +1633,22 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) {
}
/**
* @brief Return a used cell to the buffer od unused sub-cells.
* @brief Return a used cell to the buffer of unused sub-cells.
*