Commit d37e1e19 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'master' into gravity_fixes

parents d5525d15 a989b0c0
......@@ -26,11 +26,13 @@ examples/*/*.xmf
examples/*/*.hdf5
examples/*/*.png
examples/*/*.txt
examples/*/*.dot
examples/*/used_parameters.yml
examples/*/*/*.xmf
examples/*/*/*.hdf5
examples/*/*/*.png
examples/*/*/*.txt
examples/*/*/*.dot
examples/*/*/used_parameters.yml
examples/*/gravity_checks_*.dat
......
#!/usr/bin/env python
"""
Usage:
analysedumpcells.py nx ny nx cell<1>.dat cell<2>.dat ...
Analyses a number of output files created by calls to the dumpCells() debug
function (presumably called in engine_step()) to output a list of active
top-level cells, identifying those that are on the edges of the volumes being
processed on by various nodes. The point is that these should be minimised to
reduce the MPI communications.
The nx, ny and nz arguments are the number of cells in the complete space,
we need these so that we can wrap around the edge of space correctly.
This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import pylab as pl
import numpy as np
import sys
import pandas
xcol = 0
ycol = 1
zcol = 2
xwcol = 3
ywcol = 4
zwcol = 5
localcol = 18
supercol = 15
activecol = 16
# Command-line arguments.
if len(sys.argv) < 5:
print "usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ..."
sys.exit(1)
nx = int(sys.argv[1])
ny = int(sys.argv[2])
nz = int(sys.argv[3])
print "# x y z onedge"
allactives = []
onedge = 0
tcount = 0
for i in range(4, len(sys.argv)):
# Read the file.
data = pl.loadtxt(sys.argv[i])
#print data
# Select cells that are on the current rank and are super cells.
rdata = data[data[:,localcol] == 1]
tdata = rdata[rdata[:,supercol] == 1]
# Separation of the cells is in data.
xwidth = tdata[0,xwcol]
ywidth = tdata[0,ywcol]
zwidth = tdata[0,zwcol]
# Fill space nx, ny,n nz with all toplevel cells and flag their active
# state.
space = np.zeros((nx,ny,nz))
actives = []
for line in tdata:
ix = int(np.rint(line[xcol] / xwidth))
iy = int(np.rint(line[ycol] / ywidth))
iz = int(np.rint(line[zcol] / zwidth))
active = int(line[activecol])
space[ix,iy,iz] = 1 + active
tcount = tcount + 1
if active == 1:
actives.append([ix, iy, iz, line])
# Report all active cells and flag any without 26 neighbours. These are
# on the edge of the partition volume and will have foreign neighbour
# cells.
for active in actives:
count = 0
for ii in [-1, 0, 1]:
i = active[0] + ii
if i < 0:
i = i + nx
elif i >= nx:
i = i - nx
for jj in [-1, 0, 1]:
j = active[1] + jj
if j < 0:
j = j + ny
elif j >= ny:
j = j - ny
for kk in [-1, 0, 1]:
k = active[2] + kk
if k < 0:
k = k + nz
elif k >= nz:
k = k - nz
if space[i, j, k] > 0:
count = count + 1
if count < 27:
onedge = onedge + 1
print active[3][0], active[3][1], active[3][2], 1
else:
print active[3][0], active[3][1], active[3][2], 0
allactives.extend(actives)
print "# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge
sys.exit(0)
......@@ -15,6 +15,7 @@ Scheduler:
cell_split_size: 400 # (Optional) Maximal number of particles per cell (this is the default value).
max_top_level_cells: 12 # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value).
tasks_per_cell: 0 # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...).
mpi_message_limit: 4096 # (Optional) Maximum MPI task message size to send non-buffered, KB.
# Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.)
TimeIntegration:
......@@ -79,8 +80,13 @@ DomainDecomposition:
initial_grid_x: 10 # (Optional) Grid size if the "grid" strategy is chosen.
initial_grid_y: 10 # ""
initial_grid_z: 10 # ""
repartition_type: task_weights # (Optional) The re-decomposition strategy: "none", "task_weights", "particle_weights",
# "edge_task_weights" or "hybrid_weights".
repartition_type: costs/costs # (Optional) The re-decomposition strategy, one of:
# "none/none", "costs/costs", "counts/none", "none/costs", "counts/costs",
# "costs/time" or "none/time".
# These are vertex/edge weights with "costs" as task timing, "counts" as
# sum of particles and "time" as the expected time of the next updates
trigger: 0.05 # (Optional) Fractional (<1) CPU time difference between MPI ranks required to trigger a
# new decomposition, or number of steps (>1) between decompositions
minfrac: 0.9 # (Optional) Fractional of all particles that should be updated in previous step when
......@@ -120,7 +126,7 @@ SineWavePotential:
amplitude: 10. # Amplitude of the sine wave (internal units)
timestep_limit: 1. # Time-step dimensionless pre-factor.
growth_time: 0. # (Optional) Time for the potential to grow to its final size.
# Parameters related to cooling function ----------------------------------------------
# Constant du/dt cooling function
......
#!/bin/bash
# Creates a graphic from the task graph file dependency_graph.dot.
# Requires the graphviz command "dot".
if [ ! -e dependency_graph.dot ]; then
echo "Missing task-graph output 'dependency_graph.dot'! Cannot generate figure."
else
dot -Tpng dependency_graph.dot -o task_graph.png
echo "Output written to task_graph.png"
fi
exit
#!/bin/bash
#
# Usage:
# process_cells nx ny nz nprocess
#
# Description:
# Process all the cell dumps in the current directory.
# Outputs file per rank with the active cells identified and marked as to
# whether they are near an edge or not. Note requires the numbers of cells
# per dimension of the space.
#
# Also outputs a graphic showing the fraction of active cells on edges
# for each step.
# Handle command-line
if test "$4" = ""; then
echo "Usage: $0 nx ny nz nprocess"
exit 1
fi
NX=$1
NY=$2
NZ=$3
NPROCS=$4
# Locate script.
SCRIPTHOME=$(dirname "$0")
# Find all files. Use version sort to get into correct order.
files=$(ls -v cells_*.dat)
if test $? != 0; then
echo "Failed to find any cell dump files"
exit 1
fi
# Construct list of names need the number of ranks.
ranks=$(ls -v cells_*.dat | sed 's,cells_\(.*\)_.*.dat,\1,' | sort | uniq | wc -l)
echo "Number of ranks = $ranks"
# Now construct a list of files ordered by rank, not step.
files=$(ls cells_*.dat | sort -t "_" -k 3,3 -n | xargs -n 4)
# Need number of steps.
nfiles=$(echo $files| wc -w)
echo "Number of files = $nfiles"
steps=$(( $nfiles / $ranks + 1 ))
echo "Number of steps = $steps"
# And process them,
echo "Processing cell dumps files..."
#echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper $NX $NY $NZ \$0 \$1 \$2 \$3"
# Create summary.
grep "top cells" step*-active-cells.dat | sort -h > active_cells.log
# And plot of active cells to edge cells.
stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=0 ymax=$steps grid=1 \
legend=false xpix=600 ypix=500 xlabel="Edge cells/Active cells" ylabel="Step" \
layer1=mark x1="col9/1.0/col6" y1="index" size1=3 shading1=aux auxmap=rainbow \
aux=col6 auxfunc=log auxlabel="Active cells" layer2=histogram x2="col9/1.0/col6" \
color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps thick2=1 \
out=active_cells.png
exit
#!/bin/bash
# Helper for process_cells.
# Locate script.
SCRIPTHOME=$(dirname "$0")
step=$(echo $4|sed 's,cells_\(.*\)_\(.*\).dat,\2,')
echo "${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat"
${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat
exit
......@@ -276,10 +276,13 @@ int checkCellhdxmax(const struct cell *c, int *depth) {
* only.
*/
static void dumpCells_map(struct cell *c, void *data) {
uintptr_t *ldata = (uintptr_t *)data;
size_t *ldata = (size_t *)data;
FILE *file = (FILE *)ldata[0];
struct engine *e = (struct engine *)ldata[1];
float ntasks = c->nr_tasks;
int active = (int)ldata[2];
int mpiactive = (int)ldata[3];
int pactive = (int)ldata[4];
#if SWIFT_DEBUG_CHECKS
/* The c->nr_tasks field does not include all the tasks. So let's check this
......@@ -300,46 +303,94 @@ static void dumpCells_map(struct cell *c, void *data) {
}
#endif
/* Only locally active cells are dumped. */
if (c->count > 0 || c->gcount > 0 || c->scount > 0)
fprintf(file,
" %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d "
"%6.1f %20lld %6d %6d %6d %6d\n",
c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1],
c->width[2], c->count, c->gcount, c->scount, c->depth, ntasks,
c->ti_end_min, get_time_bin(c->ti_end_min), (c->super == c),
cell_is_active(c, e), c->nodeID);
/* Only cells with particles are dumped. */
if (c->count > 0 || c->gcount > 0 || c->scount > 0) {
/* In MPI mode we may only output cells with foreign partners.
* These define the edges of the partitions. */
#if WITH_MPI
if (mpiactive)
mpiactive = (c->send_xv != NULL);
else
mpiactive = 1;
#else
mpiactive = 1;
#endif
/* Active cells, otherwise all. */
if (active)
active = cell_is_active(c, e);
else
active = 1;
/* So output local super cells that are active and have MPI tasks as
* requested. */
if (c->nodeID == e->nodeID && (c->super == c) && active && mpiactive) {
/* If requested we work out how many particles are active in this cell. */
int pactcount = 0;
if (pactive) {
const struct part *parts = c->parts;
for (int k = 0; k < c->count; k++)
if (part_is_active(&parts[k], e)) pactcount++;
struct gpart *gparts = c->gparts;
for (int k = 0; k < c->gcount; k++)
if (gpart_is_active(&gparts[k], e)) pactcount++;
struct spart *sparts = c->sparts;
for (int k = 0; k < c->scount; k++)
if (spart_is_active(&sparts[k], e)) pactcount++;
}
fprintf(file,
" %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d "
"%6.1f %20lld %6d %6d %6d %6d %6d\n",
c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1],
c->width[2], e->step, c->count, c->gcount, c->scount, pactcount,
c->depth, ntasks, c->ti_end_min, get_time_bin(c->ti_end_min),
(c->super == c), cell_is_active(c, e), c->nodeID,
c->nodeID == e->nodeID);
}
}
}
/**
* @brief Dump the location, depth, task counts and timebins and active state,
* for all cells to a simple text file.
* for all cells to a simple text file. A more costly count of the active
* particles in a cell can also be output.
*
* @param prefix base output filename
* @param prefix base output filename, result is written to
* %prefix%_%rank%_%step%.dat
* @param active just output active cells.
* @param mpiactive just output MPI active cells, i.e. those with foreign cells.
* @param pactive also output a count of active particles.
* @param s the space holding the cells to dump.
* @param rank node ID of MPI rank, or 0 if not relevant.
* @param step the current engine step, or some unique integer.
*/
void dumpCells(const char *prefix, struct space *s) {
void dumpCells(const char *prefix, int active, int mpiactive, int pactive,
struct space *s, int rank, int step) {
FILE *file = NULL;
/* Name of output file. */
static int nseq = 0;
char fname[200];
int uniq = atomic_inc(&nseq);
sprintf(fname, "%s_%03d.dat", prefix, uniq);
sprintf(fname, "%s_%03d_%03d.dat", prefix, rank, step);
file = fopen(fname, "w");
/* Header. */
fprintf(file,
"# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s "
"%20s %6s %6s %6s\n",
"x", "y", "z", "xw", "yw", "zw", "count", "gcount", "scount", "depth",
"tasks", "ti_end_min", "timebin", "issuper", "active", "rank");
"# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s "
"%20s %6s %6s %6s %6s %6s\n",
"x", "y", "z", "xw", "yw", "zw", "step", "count", "gcount", "scount",
"actcount", "depth", "tasks", "ti_end_min", "timebin", "issuper",
"active", "rank", "local");
uintptr_t data[2];
size_t data[5];
data[0] = (size_t)file;
data[1] = (size_t)s->e;
data[2] = (size_t)active;
data[3] = (size_t)mpiactive;
data[4] = (size_t)pactive;
space_map_cells_pre(s, 1, dumpCells_map, &data);
fclose(file);
}
......
......@@ -36,7 +36,8 @@ void printgParticle_single(struct gpart *gp);
int checkSpacehmax(struct space *s);
int checkCellhdxmax(const struct cell *c, int *depth);
void dumpCells(const char *prefix, struct space *s);
void dumpCells(const char *prefix, int active, int mpiactive, int pactive,
struct space *s, int rank, int step);
#ifdef HAVE_METIS
#include "metis.h"
......
......@@ -4008,6 +4008,8 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs,
gravity_exact_force_compute(e->s, e);
#endif
if (e->nodeID == 0) scheduler_write_dependencies(&e->sched, e->verbose);
/* Run the 0th time-step */
engine_launch(e);
......@@ -4162,6 +4164,9 @@ void engine_step(struct engine *e) {
/* Print the number of active tasks ? */
if (e->verbose) engine_print_task_counts(e);
/* Dump local cells and active particle counts. */
/* dumpCells("cells", 0, 0, 1, e->s, e->nodeID, e->step); */
#ifdef SWIFT_DEBUG_CHECKS
/* Check that we have the correct total mass in the top-level multipoles */
long long num_gpart_mpole = 0;
......@@ -5113,6 +5118,12 @@ void engine_init(struct engine *e, struct space *s,
scheduler_init(&e->sched, e->s, engine_estimate_nr_tasks(e), nr_queues,
(policy & scheduler_flag_steal), e->nodeID, &e->threadpool);
/* Maximum size of MPI task messages, in KB, that should not be buffered,
* that is sent using MPI_Issend, not MPI_Isend. 4Mb by default.
*/
e->sched.mpi_message_limit =
parser_get_opt_param_int(params, "Scheduler:mpi_message_limit", 4) * 1024;
/* Allocate and init the threads. */
if ((e->runners = (struct runner *)malloc(sizeof(struct runner) *
e->nr_threads)) == NULL)
......
......@@ -185,7 +185,8 @@ struct eos_parameters {
/**
* @brief Returns the internal energy given density and entropy
*
* Since we are using an isothermal EoS, the entropy value is ignored
* Since we are using an isothermal EoS, the entropy and density values are
* ignored.
* Computes \f$u = u_{cst}\f$.
*
* @param density The density \f$\rho\f$.
......@@ -196,10 +197,11 @@ gas_internal_energy_from_entropy(float density, float entropy) {
return eos.isothermal_internal_energy;
}
/**
* @brief Returns the pressure given density and entropy
*
* Since we are using an isothermal EoS, the entropy value is ignored
* Since we are using an isothermal EoS, the entropy value is ignored.
* Computes \f$P = (\gamma - 1)u_{cst}\rho\f$.
*
* @param density The density \f$\rho\f$.
......@@ -214,7 +216,8 @@ __attribute__((always_inline)) INLINE static float gas_pressure_from_entropy(
/**
* @brief Returns the entropy given density and pressure.
*
* Computes \f$A = \frac{P}{\rho^\gamma}\f$.
* Since we are using an isothermal EoS, the pressure value is ignored.
* Computes \f$A = (\gamma - 1)u_{cst}\rho^{-(\gamma-1)}\f$.
*
* @param density The density \f$\rho\f$.
* @param pressure The pressure \f$P\f$ (ignored).
......@@ -230,8 +233,9 @@ __attribute__((always_inline)) INLINE static float gas_entropy_from_pressure(
/**
* @brief Returns the sound speed given density and entropy
*
* Since we are using an isothermal EoS, the entropy value is ignored
* Computes \f$c = \sqrt{u_{cst} \gamma \rho^{\gamma-1}}\f$.
* Since we are using an isothermal EoS, the entropy and density values are
* ignored.
* Computes \f$c = \sqrt{u_{cst} \gamma (\gamma-1)}\f$.
*
* @param density The density \f$\rho\f$.
* @param entropy The entropy \f$S\f$.
......@@ -246,7 +250,7 @@ __attribute__((always_inline)) INLINE static float gas_soundspeed_from_entropy(
/**
* @brief Returns the entropy given density and internal energy
*
* Since we are using an isothermal EoS, the energy value is ignored
* Since we are using an isothermal EoS, the energy value is ignored.
* Computes \f$S = \frac{(\gamma - 1)u_{cst}}{\rho^{\gamma-1}}\f$.
*
* @param density The density \f$\rho\f$
......@@ -262,7 +266,7 @@ gas_entropy_from_internal_energy(float density, float u) {
/**
* @brief Returns the pressure given density and internal energy
*
* Since we are using an isothermal EoS, the energy value is ignored
* Since we are using an isothermal EoS, the energy value is ignored.
* Computes \f$P = (\gamma - 1)u_{cst}\rho\f$.
*
* @param density The density \f$\rho\f$
......@@ -291,8 +295,9 @@ gas_internal_energy_from_pressure(float density, float pressure) {
/**
* @brief Returns the sound speed given density and internal energy
*
* Since we are using an isothermal EoS, the energy value is ignored
* Computes \f$c = \sqrt{u_{cst} \gamma \rho^{\gamma-1}}\f$.
* Since we are using an isothermal EoS, the energy and density values are
* ignored.
* Computes \f$c = \sqrt{u_{cst} \gamma (\gamma-1)}\f$.
*
* @param density The density \f$\rho\f$
* @param u The internal energy \f$u\f$
......@@ -307,8 +312,9 @@ gas_soundspeed_from_internal_energy(float density, float u) {
/**
* @brief Returns the sound speed given density and pressure
*
* Since we are using an isothermal EoS, the pressure value is ignored
* Computes \f$c = \sqrt{u_{cst} \gamma \rho^{\gamma-1}}\f$.
* Since we are using an isothermal EoS, the pressure and density values are
* ignored.
* Computes \f$c = \sqrt{u_{cst} \gamma (\gamma-1)}\f$.
*
* @param density The density \f$\rho\f$
* @param P The pressure \f$P\f$
......
......@@ -146,7 +146,7 @@ void readArray_chunk(hid_t h_data, hid_t h_plist_id,
* @brief Reads a data array from a given HDF5 group.
*
* @param grp The group from which to read.
* @param prop The #io_props of the field to read.
* @param props The #io_props of the field to read.
* @param N The number of particles on that rank.
* @param N_total The total number of particles.
* @param mpi_rank The MPI rank of this node.
......
This diff is collapsed.
......@@ -43,10 +43,13 @@ struct partition {
/* Repartition type to use. */
enum repartition_type {
REPART_NONE = 0,
REPART_METIS_BOTH,
REPART_METIS_VERTEX,
REPART_METIS_EDGE,
REPART_METIS_VERTEX_EDGE
REPART_METIS_VERTEX_COSTS_EDGE_COSTS,
REPART_METIS_VERTEX_COUNTS,
REPART_METIS_EDGE_COSTS,
REPART_METIS_VERTEX_COUNTS_EDGE_COSTS,
REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS,
REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS,
REPART_METIS_EDGE_TIMEBINS
};
/* Repartition preferences. */
......
......@@ -50,6 +50,7 @@
#include "space.h"
#include "task.h"
#include "timers.h"
#include "version.h"
/**
* @brief Re-set the list of active tasks.
......@@ -111,6 +112,125 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta,
atomic_inc(&s->completed_unlock_writes);
}
/**
* @brief Write a dot file with the task dependencies.
*
* Run plot_task_dependencies.sh for an example of how to use it
* to generate the figure.
*
* @param s The #scheduler we are working in.
* @param verbose Are we verbose about this?
*/
void scheduler_write_dependencies(struct scheduler *s, int verbose) {
const ticks tic = getticks();
/* Conservative number of dependencies per task type */
const int max_nber_dep = 128;
/* Number of possible relations between tasks */
const int nber_relation =
2 * task_type_count * task_subtype_count * max_nber_dep;
/* To get the table of max_nber_dep for a task:
* ind = (ta * task_subtype_count + sa) * max_nber_dep * 2
* where ta is the value of task_type and sa is the value of
* task_subtype */
int *table = malloc(nber_relation * sizeof(int));
if (table == NULL)
error("Error allocating memory for task-dependency graph.");
/* Reset everything */
for (int i = 0; i < nber_relation; i++) table[i] = -1;
/* Create file */
char filename[200] = "dependency_graph.dot";
FILE *f = fopen(filename, "w");
if (f == NULL) error("Error opening dependency graph file.");
/* Write header */
fprintf(f, "digraph task_dep {\n");
fprintf(f, "label=\"Task dependencies for SWIFT %s\"", git_revision());
fprintf(f, "\t compound=true;\n");
fprintf(f, "\t ratio=0.66;\n");
fprintf(f, "\t node[nodesep=0.15];\n");
/* loop over all tasks */
for (int i = 0; i < s->nr_tasks; i++) {
const struct task *ta = &s->tasks[i];
/* and their dependencies */
for (int j = 0; j < ta->nr_unlock_tasks; j++) {
const struct task *tb = ta->unlock_tasks[j];
/* check if dependency already written */
int written = 0;
/* Current index */
int ind = ta->type * task_subtype_count + ta->subtype;