Commit 509f3158 authored by James Willis's avatar James Willis
Browse files

Merge branch 'master' into dopair2-vectorisation

parents dee27176 d408db13
......@@ -100,6 +100,7 @@ tests/testVoronoi3D
tests/testDump
tests/testLogger
tests/benchmarkInteractions
tests/testGravityDerivatives
theory/latex/swift.pdf
theory/SPH/Kernels/kernels.pdf
......@@ -132,6 +133,9 @@ m4/lt~obsolete.m4
/stamp-h1
/test-driver
# Intel compiler optimization reports
*.optrpt
# Object files
.deps/
*.o
......
......@@ -226,6 +226,18 @@ if test "$enable_debugging_checks" = "yes"; then
AC_DEFINE([SWIFT_DEBUG_CHECKS],1,[Enable expensive debugging])
fi
# Check whether we want to default to naive cell interactions
AC_ARG_ENABLE([naive-interactions],
[AS_HELP_STRING([--enable-naive-interactions],
[Activate use of naive cell interaction functions @<:@yes/no@:>@]
)],
[enable_naive_interactions="$enableval"],
[enable_naive_interactions="no"]
)
if test "$enable_naive_interactions" = "yes"; then
AC_DEFINE([SWIFT_USE_NAIVE_INTERACTIONS],1,[Enable use of naive cell interaction functions])
fi
# Check if gravity force checks are on for some particles.
AC_ARG_ENABLE([gravity-force-checks],
[AS_HELP_STRING([--enable-gravity-force-checks],
......@@ -919,6 +931,7 @@ AC_MSG_RESULT([
Task debugging : $enable_task_debugging
Threadpool debugging : $enable_threadpool_debugging
Debugging checks : $enable_debugging_checks
Naive interactions : $enable_naive_interactions
Gravity checks : $gravity_force_checks
------------------------])
......@@ -13,9 +13,6 @@ TimeIntegration:
dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units).
dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units).
Scheduler:
cell_split_size: 64
# Parameters governing the snapshots
Snapshots:
basename: eagle # Common part of the name of output files
......@@ -29,8 +26,8 @@ Statistics:
# Parameters for the self-gravity scheme
Gravity:
eta: 0.025 # Constant dimensionless multiplier for time integration.
theta: 0.7 # Opening angle (Multipole acceptance criterion)
epsilon: 0.001 # Softening length (in internal units).
theta: 0.85 # Opening angle (Multipole acceptance criterion)
epsilon: 0.001 # Softening length (in internal units).
# Parameters for the hydrodynamics scheme
SPH:
......
# This file is part of SWIFT.
# tHIS FIle is part of SWIFT.
# Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
# Matthieu Schaller (matthieu.schaller@durham.ac.uk).
#
......@@ -95,9 +95,7 @@ EXTRA_DIST = BigCosmoVolume/makeIC.py \
EXTRA_DIST += parameter_example.yml
# Scripts to plot task graphs
EXTRA_DIST += plot_tasks_MPI.py plot_tasks.py \
analyse_tasks_MPI.py analyse_tasks.py \
process_plot_tasks_MPI process_plot_tasks
EXTRA_DIST += plot_tasks.py analyse_tasks.py process_plot_tasks_MPI process_plot_tasks
# Scripts to plot threadpool 'task' graphs
EXTRA_DIST += analyse_threadpool_tasks.py \
......
......@@ -361,6 +361,13 @@ int main(int argc, char *argv[]) {
message("WARNING: Debugging checks activated. Code will be slower !");
#endif
/* Do we have debugging checks ? */
#ifdef SWIFT_USE_NAIVE_INTERACTIONS
if (myrank == 0)
message(
"WARNING: Naive cell interactions activated. Code will be slower !");
#endif
/* Do we have gravity accuracy checks ? */
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
if (myrank == 0)
......
......@@ -329,9 +329,15 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts;
double loc[3];
loc[0] = ci->loc[0];
loc[1] = ci->loc[1];
loc[2] = ci->loc[2];
loc[0] = cj->loc[0];
loc[1] = cj->loc[1];
loc[2] = cj->loc[2];
/* Shift ci particles for boundary conditions and location of cell.*/
double total_ci_shift[3];
total_ci_shift[0] = loc[0] + shift[0];
total_ci_shift[1] = loc[1] + shift[1];
total_ci_shift[2] = loc[2] + shift[2];
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
......@@ -353,9 +359,9 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
* due to BCs but leave cell cj. */
for (int i = 0; i < ci_cache_count; i++) {
idx = sort_i[i + first_pi_align].i;
x[i] = (float)(parts_i[idx].x[0] - loc[0] - shift[0]);
y[i] = (float)(parts_i[idx].x[1] - loc[1] - shift[1]);
z[i] = (float)(parts_i[idx].x[2] - loc[2] - shift[2]);
x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
h[i] = parts_i[idx].h;
m[i] = parts_i[idx].mass;
......@@ -364,6 +370,43 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
vz[i] = parts_i[idx].v[2];
}
#ifdef SWIFT_DEBUG_CHECKS
const float shift_threshold_x =
2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part);
const float shift_threshold_y =
2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part);
const float shift_threshold_z =
2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part);
/* Make sure that particle positions have been shifted correctly. */
for (int i = 0; i < ci_cache_count; i++) {
if (x[i] > shift_threshold_x || x[i] < -shift_threshold_x)
error(
"Error: ci->loc[%lf,%lf,%lf],cj->loc[%lf,%lf,%lf] Particle %d x pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. x=%f, ci->width[0]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], loc[0], loc[1], loc[2], i, x[i],
ci->width[0]);
if (y[i] > shift_threshold_y || y[i] < -shift_threshold_y)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d y pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. y=%f, ci->width[1]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], loc[0], loc[1], loc[2], i, y[i],
ci->width[1]);
if (z[i] > shift_threshold_z || z[i] < -shift_threshold_z)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d z pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. z=%f, ci->width[2]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], loc[0], loc[1], loc[2], i, z[i],
ci->width[2]);
}
#endif
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
float fake_pix = 2.0f * parts_i[sort_i[ci->count - 1].i].x[0];
......@@ -404,6 +447,36 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
vzj[i] = parts_j[idx].v[2];
}
#ifdef SWIFT_DEBUG_CHECKS
/* Make sure that particle positions have been shifted correctly. */
for (int i = 0; i <= last_pj_align; i++) {
if (xj[i] > shift_threshold_x || xj[i] < -shift_threshold_x)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d xj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. xj=%f, ci->width[0]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], loc[0], loc[1], loc[2], i, xj[i],
ci->width[0]);
if (yj[i] > shift_threshold_y || yj[i] < -shift_threshold_y)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d yj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. yj=%f, ci->width[1]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], loc[0], loc[1], loc[2], i, yj[i],
ci->width[1]);
if (zj[i] > shift_threshold_z || zj[i] < -shift_threshold_z)
error(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d zj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. zj=%f, ci->width[2]=%f",
ci->loc[0], ci->loc[1], ci->loc[2], loc[0], loc[1], loc[2], i, zj[i],
ci->width[2]);
}
#endif
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
float fake_pjx = 2.0f * cj->parts[sort_j[cj->count - 1].i].x[0];
......
......@@ -82,73 +82,6 @@ int cell_getsize(struct cell *c) {
return count;
}
/**
* @brief Unpack the data of a given cell and its sub-cells.
*
* @param pc An array of packed #pcell.
* @param c The #cell in which to unpack the #pcell.
* @param s The #space in which the cells are created.
*
* @return The number of cells created.
*/
int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
#ifdef WITH_MPI
/* Unpack the current pcell. */
c->h_max = pc->h_max;
c->ti_end_min = pc->ti_end_min;
c->ti_end_max = pc->ti_end_max;
c->ti_old_part = pc->ti_old_part;
c->ti_old_gpart = pc->ti_old_gpart;
c->count = pc->count;
c->gcount = pc->gcount;
c->scount = pc->scount;
c->tag = pc->tag;
/* Number of new cells created. */
int count = 1;
/* Fill the progeny recursively, depth-first. */
for (int k = 0; k < 8; k++)
if (pc->progeny[k] >= 0) {
struct cell *temp;
space_getcells(s, 1, &temp);
temp->count = 0;
temp->gcount = 0;
temp->scount = 0;
temp->loc[0] = c->loc[0];
temp->loc[1] = c->loc[1];
temp->loc[2] = c->loc[2];
temp->width[0] = c->width[0] / 2;
temp->width[1] = c->width[1] / 2;
temp->width[2] = c->width[2] / 2;
temp->dmin = c->dmin / 2;
if (k & 4) temp->loc[0] += temp->width[0];
if (k & 2) temp->loc[1] += temp->width[1];
if (k & 1) temp->loc[2] += temp->width[2];
temp->depth = c->depth + 1;
temp->split = 0;
temp->dx_max_part = 0.f;
temp->dx_max_gpart = 0.f;
temp->dx_max_sort = 0.f;
temp->nodeID = c->nodeID;
temp->parent = c;
c->progeny[k] = temp;
c->split = 1;
count += cell_unpack(&pc[pc->progeny[k]], temp, s);
}
/* Return the total number of unpacked cells. */
c->pcell_size = count;
return count;
#else
error("SWIFT was not compiled with MPI support.");
return 0;
#endif
}
/**
* @brief Link the cells recursively to the given #part array.
*
......@@ -233,7 +166,7 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) {
*
* @return The number of packed cells.
*/
int cell_pack(struct cell *c, struct pcell *pc) {
int cell_pack(struct cell *restrict c, struct pcell *restrict pc) {
#ifdef WITH_MPI
......@@ -267,26 +200,97 @@ int cell_pack(struct cell *c, struct pcell *pc) {
#endif
}
/**
* @brief Unpack the data of a given cell and its sub-cells.
*
* @param pc An array of packed #pcell.
* @param c The #cell in which to unpack the #pcell.
* @param s The #space in which the cells are created.
*
* @return The number of cells created.
*/
int cell_unpack(struct pcell *restrict pc, struct cell *restrict c,
struct space *restrict s) {
#ifdef WITH_MPI
/* Unpack the current pcell. */
c->h_max = pc->h_max;
c->ti_end_min = pc->ti_end_min;
c->ti_end_max = pc->ti_end_max;
c->ti_old_part = pc->ti_old_part;
c->ti_old_gpart = pc->ti_old_gpart;
c->count = pc->count;
c->gcount = pc->gcount;
c->scount = pc->scount;
c->tag = pc->tag;
/* Number of new cells created. */
int count = 1;
/* Fill the progeny recursively, depth-first. */
for (int k = 0; k < 8; k++)
if (pc->progeny[k] >= 0) {
struct cell *temp;
space_getcells(s, 1, &temp);
temp->count = 0;
temp->gcount = 0;
temp->scount = 0;
temp->loc[0] = c->loc[0];
temp->loc[1] = c->loc[1];
temp->loc[2] = c->loc[2];
temp->width[0] = c->width[0] / 2;
temp->width[1] = c->width[1] / 2;
temp->width[2] = c->width[2] / 2;
temp->dmin = c->dmin / 2;
if (k & 4) temp->loc[0] += temp->width[0];
if (k & 2) temp->loc[1] += temp->width[1];
if (k & 1) temp->loc[2] += temp->width[2];
temp->depth = c->depth + 1;
temp->split = 0;
temp->dx_max_part = 0.f;
temp->dx_max_gpart = 0.f;
temp->dx_max_sort = 0.f;
temp->nodeID = c->nodeID;
temp->parent = c;
c->progeny[k] = temp;
c->split = 1;
count += cell_unpack(&pc[pc->progeny[k]], temp, s);
}
/* Return the total number of unpacked cells. */
c->pcell_size = count;
return count;
#else
error("SWIFT was not compiled with MPI support.");
return 0;
#endif
}
/**
* @brief Pack the time information of the given cell and all it's sub-cells.
*
* @param c The #cell.
* @param ti_ends (output) The time information we pack into
* @param pcells (output) The end-of-timestep information we pack into
*
* @return The number of packed cells.
*/
int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends) {
int cell_pack_end_step(struct cell *restrict c,
struct pcell_step *restrict pcells) {
#ifdef WITH_MPI
/* Pack this cell's data. */
ti_ends[0] = c->ti_end_min;
pcells[0].ti_end_min = c->ti_end_min;
pcells[0].dx_max_part = c->dx_max_part;
pcells[0].dx_max_gpart = c->dx_max_gpart;
/* Fill in the progeny, depth-first recursion. */
int count = 1;
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL) {
count += cell_pack_ti_ends(c->progeny[k], &ti_ends[count]);
count += cell_pack_end_step(c->progeny[k], &pcells[count]);
}
/* Return the number of packed values. */
......@@ -302,22 +306,25 @@ int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends) {
* @brief Unpack the time information of a given cell and its sub-cells.
*
* @param c The #cell
* @param ti_ends The time information to unpack
* @param pcells The end-of-timestep information to unpack
*
* @return The number of cells created.
*/
int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends) {
int cell_unpack_end_step(struct cell *restrict c,
struct pcell_step *restrict pcells) {
#ifdef WITH_MPI
/* Unpack this cell's data. */
c->ti_end_min = ti_ends[0];
c->ti_end_min = pcells[0].ti_end_min;
c->dx_max_part = pcells[0].dx_max_part;
c->dx_max_gpart = pcells[0].dx_max_gpart;
/* Fill in the progeny, depth-first recursion. */
int count = 1;
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL) {
count += cell_unpack_ti_ends(c->progeny[k], &ti_ends[count]);
count += cell_unpack_end_step(c->progeny[k], &pcells[count]);
}
/* Return the number of packed values. */
......@@ -1724,6 +1731,9 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj,
/* Anything to do here? */
if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
if (ci->ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e);
if (cj->ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e);
/* Recover the multipole information */
struct gravity_tensors *const multi_i = ci->multipole;
struct gravity_tensors *const multi_j = cj->multipole;
......
......@@ -70,24 +70,63 @@ struct link {
struct link *next;
};
/* Packed cell. */
/**
* @brief Packed cell for information correct at rebuild time.
*
* Contains all the information for a tree walk in a non-local cell.
*/
struct pcell {
/* Stats on this cell's particles. */
/*! Maximal smoothing length. */
double h_max;
integertime_t ti_end_min, ti_end_max, ti_beg_max, ti_old_part, ti_old_gpart;
/* Number of particles in this cell. */
int count, gcount, scount;
/*! Minimal integer end-of-timestep in this cell */
integertime_t ti_end_min;
/*! Maximal integer end-of-timestep in this cell */
integertime_t ti_end_max;
/*! Maximal integer beginning-of-timestep in this cell */
integertime_t ti_beg_max;
/*! Integer time of the last drift of the #part in this cell */
integertime_t ti_old_part;
/*! Integer time of the last drift of the #gpart in this cell */
integertime_t ti_old_gpart;
/*! Number of #part in this cell. */
int count;
/*! Number of #gpart in this cell. */
int gcount;
/*! Number of #spart in this cell. */
int scount;
/* tag used for MPI communication. */
/*! tag used for MPI communication. */
int tag;
/* Relative indices of the cell's progeny. */
/*! Relative indices of the cell's progeny. */
int progeny[8];
} SWIFT_STRUCT_ALIGN;
/**
* @brief Cell information at the end of a time-step.
*/
struct pcell_step {
/*! Minimal integer end-of-timestep in this cell */
integertime_t ti_end_min;
/*! Maximal distance any #part has travelled since last rebuild */
float dx_max_part;
/*! Maximal distance any #gpart has travelled since last rebuild */
float dx_max_gpart;
};
/**
* @brief Cell within the tree structure.
*
......@@ -389,8 +428,8 @@ int cell_slocktree(struct cell *c);
void cell_sunlocktree(struct cell *c);
int cell_pack(struct cell *c, struct pcell *pc);
int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends);
int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends);
int cell_pack_end_step(struct cell *c, struct pcell_step *pcell);
int cell_unpack_end_step(struct cell *c, struct pcell_step *pcell);
int cell_getsize(struct cell *c);
int cell_link_parts(struct cell *c, struct part *parts);
int cell_link_gparts(struct cell *c, struct gpart *gparts);
......
......@@ -2971,7 +2971,7 @@ int engine_estimate_nr_tasks(struct engine *e) {
#endif
}
if (e->policy & engine_policy_self_gravity) {
n1 += 24;
n1 += 32;
n2 += 1;
#ifdef WITH_MPI
n2 += 2;
......@@ -3070,13 +3070,6 @@ void engine_rebuild(struct engine *e, int clean_h_values) {
/* Re-build the tasks. */
engine_maketasks(e);
/* Run through the tasks and mark as skip or not. */
if (engine_marktasks(e))
error("engine_marktasks failed after space_rebuild.");
/* Print the status of the system */
// if (e->verbose) engine_print_task_counts(e);
#ifdef SWIFT_DEBUG_CHECKS
/* Check that all cells have been drifted to the current time.
* That can include cells that have not
......@@ -3085,6 +3078,13 @@ void engine_rebuild(struct engine *e, int clean_h_values) {
e->policy & engine_policy_self_gravity);
#endif
/* Run through the tasks and mark as skip or not. */
if (engine_marktasks(e))
error("engine_marktasks failed after space_rebuild.");
/* Print the status of the system */
// if (e->verbose) engine_print_task_counts(e);
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
......@@ -3692,8 +3692,8 @@ void engine_step(struct engine *e) {
if (e->policy & engine_policy_reconstruct_mpoles)
engine_reconstruct_multipoles(e);
// else
// engine_drift_top_multipoles(e);
else
engine_drift_top_multipoles(e);
}
/* Print the number of active tasks ? */
......@@ -3901,7 +3901,7 @@ void engine_do_drift_top_multipoles_mapper(void *map_data, int num_elements,
if (c != NULL && c->nodeID == e->nodeID) {
/* Drift the multipole at this level only */
cell_drift_multipole(c, e);
if (c->ti_old_multipole != e->ti_current) cell_drift_multipole(c, e);
}
}
}
......
......@@ -26,6 +26,10 @@ __attribute__((always_inline)) INLINE static void gravity_debug_particle(
"x=[%.5e,%.5e,%.5e], v_full=[%.5e,%.5e,%.5e], a=[%.5e,%.5e,%.5e]\n",
p->mass, p->epsilon, p->time_bin, p->x[0], p->x[1], p->x[2], p->v_full[0],
p->v_full[1], p->v_full[2], p->a_grav[0], p->a_grav[1], p->a_grav[2]);
#ifdef SWIFT_DEBUG_CHECKS
printf("num_interacted=%lld ti_drift=%lld ti_kick=%lld\n", p->num_interacted,
p->ti_drift, p->ti_kick);
#endif
}
#endif /* SWIFT_DEFAULT_GRAVITY_DEBUG_H */
......@@ -189,13 +189,14 @@ __attribute__((always_inline)) INLINE static void gravity_cache_populate(
* that have a reasonable magnitude. We use the cell width for this */
const float pos_padded[3] = {-2. * cell->width[0], -2. * cell->width[1],
-2. * cell->width[2]};
const float eps_padded = epsilon[0];
/* Pad the caches */
for (int i = gcount; i < gcount_padded; ++i) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
epsilon[i] = 0.f;
epsilon[i] = eps_padded;
m[i] = 0.f;
active[i] = 0;
use_mpole[i] = 0;
......@@ -248,12 +249,14 @@ gravity_cache_populate_no_mpole(timebin_t max_active_bin,
* that have a reasonable magnitude. We use the cell width for this */
const float pos_padded[3] = {-2. * cell->width[0], -2. * cell->width[1],
-2. * cell->width[2]};
const float eps_padded = epsilon[0];
/* Pad the caches */
for (int i = gcount; i < gcount_padded; ++i) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
epsilon[i] = 0.f;
epsilon[i] = eps_padded;
m[i] = 0.f;
active[i] = 0;
}
......
......@@ -78,7 +78,7 @@ __attribute__((always_inline)) INLINE static void fourier_kernel_long_grav_eval(