diff --git a/examples/plot_scaling_results.py b/examples/plot_scaling_results.py index cf9c96d41fe8f0897acb6be1f9af2269abee6174..5a76e9870bd3ec55807c7b79c475c62b14119e5c 100755 --- a/examples/plot_scaling_results.py +++ b/examples/plot_scaling_results.py @@ -48,7 +48,7 @@ threadList = [] hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466', '#4477AA'] -linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6]) +linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6],hexcols[8]) #cmdLine = './swift_fixdt -s -t 16 cosmoVolume.yml' #platform = 'KNL' @@ -68,6 +68,9 @@ elif len(sys.argv) == 5: elif len(sys.argv) == 6: inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5]) numOfSeries = 5 +elif len(sys.argv) == 7: + inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6]) + numOfSeries = 6 # Get the names of the branch, Git revision, hydro scheme and hydro kernel def parse_header(inputFile): @@ -76,7 +79,8 @@ def parse_header(inputFile): for line in f: if 'Branch:' in line: s = line.split() - branch.append(s[2]) + line = s[2:] + branch.append(" ".join(line)) elif 'Revision:' in line: s = line.split() revision.append(s[2]) @@ -129,8 +133,8 @@ def parse_files(): parse_header(file_list[0]) version.append(branch[i] + " " + revision[i] + "\n" + hydro_scheme[i] + - "\n" + hydro_kernel[i] + r", $N_{ngb}$=" + hydro_neighbours[i] + - r", $\eta$=" + hydro_eta[i]) + "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + + r", $\eta=%.3f$"%float(hydro_eta[i])) times.append([]) totalTime.append([]) speedUp.append([]) @@ -176,7 +180,7 @@ def print_results(times,totalTime,parallelEff,version): def plot_results(times,totalTime,speedUp,parallelEff): - fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=False) + fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=True) speedUpPlot = axarr[0, 0] parallelEffPlot = axarr[0, 1] totalTimePlot = axarr[1, 0] @@ -218,7 +222,7 @@ def plot_results(times,totalTime,speedUp,parallelEff): totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[i][-1]))+0.5)]) totalTimePlot.set_ylim([10**np.floor(np.log10(np.min(totalTime)*0.6)), 1.2*10**np.floor(np.log10(np.max(totalTime) * 1.5)+1)]) - totalTimePlot.legend(bbox_to_anchor=(1.14, 1), loc=2, borderaxespad=0.,prop={'size':12}) + totalTimePlot.legend(bbox_to_anchor=(1.14, 0.97), loc=2, borderaxespad=0.,prop={'size':12}, frameon=False) emptyPlot.axis('off') for i, txt in enumerate(threadList[0]): diff --git a/src/Makefile.am b/src/Makefile.am index 0a4b2c64e85b30d9d15b9216f6135517e4217cc5..84cfd823ad4f076d3984847eb846e18067c9a6ab 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -70,8 +70,8 @@ nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_h hydro/Gadget2/hydro_debug.h hydro/Gadget2/hydro_part.h \ hydro/Gizmo/hydro.h hydro/Gizmo/hydro_iact.h hydro/Gizmo/hydro_io.h \ hydro/Gizmo/hydro_debug.h hydro/Gizmo/hydro_part.h \ - riemann.h \ - riemann/riemann_hllc.h riemann/riemann_trrs.h riemann/riemann_exact.h \ + riemann.h riemann/riemann_hllc.h riemann/riemann_trrs.h \ + riemann/riemann_exact.h riemann/riemann_vacuum.h \ cooling/const_du/cooling.h cooling/const_lambda/cooling.h # Sources and flags for regular library diff --git a/src/cell.c b/src/cell.c index 6e0d13edcb92ee07334d151de064ed4fbec8c5a8..7ce6fb81a8fa6875884d3f5c840c36e5177cdf6b 100644 --- a/src/cell.c +++ b/src/cell.c @@ -580,7 +580,7 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset) { } /* Re-link the gparts. */ - part_relink_gparts(parts, count, parts_offset); + if (count > 0 && gcount > 0) part_relink_gparts(parts, count, parts_offset); #ifdef SWIFT_DEBUG_CHECKS /* Verify that _all_ the parts have been assigned to a cell. */ @@ -675,7 +675,8 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset) { } /* Re-link the parts. */ - part_relink_parts(gparts, gcount, parts - parts_offset); + if (count > 0 && gcount > 0) + part_relink_parts(gparts, gcount, parts - parts_offset); } /** diff --git a/src/cell.h b/src/cell.h index 50da67952490b85fc8b078d325066774a32c8c22..fd836206241c55cd6b9da2e29157c11a14c5a892 100644 --- a/src/cell.h +++ b/src/cell.h @@ -128,9 +128,14 @@ struct cell { /* Parent cell. */ struct cell *parent; - /* Super cell, i.e. the highest-level supercell that has interactions. */ + /* Super cell, i.e. the highest-level supercell that has hydro interactions. + */ struct cell *super; + /* Super cell, i.e. the highest-level supercell that has gravity interactions. + */ + struct cell *gsuper; + /* The task computing this cell's sorts. */ struct task *sorts; int sortsize; diff --git a/src/debug.c b/src/debug.c index 15354b7d419544a8456543b79c38235eb3b68b2c..d73bc86a92cf5ca28c202e7b567cf7c40ba6eccb 100644 --- a/src/debug.c +++ b/src/debug.c @@ -154,8 +154,9 @@ int checkSpacehmax(struct space *s) { /* Loop over local cells. */ float cell_h_max = 0.0f; for (int k = 0; k < s->nr_cells; k++) { - if (s->cells[k].nodeID == s->e->nodeID && s->cells[k].h_max > cell_h_max) { - cell_h_max = s->cells[k].h_max; + if (s->cells_top[k].nodeID == s->e->nodeID && + s->cells_top[k].h_max > cell_h_max) { + cell_h_max = s->cells_top[k].h_max; } } @@ -172,9 +173,9 @@ int checkSpacehmax(struct space *s) { /* There is a problem. Hunt it down. */ for (int k = 0; k < s->nr_cells; k++) { - if (s->cells[k].nodeID == s->e->nodeID) { - if (s->cells[k].h_max > part_h_max) { - message("cell %d is inconsistent (%f > %f)", k, s->cells[k].h_max, + if (s->cells_top[k].nodeID == s->e->nodeID) { + if (s->cells_top[k].h_max > part_h_max) { + message("cell %d is inconsistent (%f > %f)", k, s->cells_top[k].h_max, part_h_max); } } diff --git a/src/engine.c b/src/engine.c index f88005b37d99e8faa4aea11f120629748fc42204..333d3e5852881159a97dc70f836c27b8933e6af2 100644 --- a/src/engine.c +++ b/src/engine.c @@ -118,10 +118,10 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { * * @param e The #engine. * @param c The #cell. - * @param super The super #cell. + * @param gsuper The gsuper #cell. */ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, - struct cell *super) { + struct cell *gsuper) { struct scheduler *s = &e->sched; const int is_with_external_gravity = @@ -130,10 +130,10 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt; /* Is this the super-cell? */ - if (super == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) { + if (gsuper == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) { /* This is the super cell, i.e. the first with gravity tasks attached. */ - super = c; + gsuper = c; /* Local tasks only... */ if (c->nodeID == e->nodeID) { @@ -161,13 +161,13 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, } /* Set the super-cell. */ - c->super = super; + c->gsuper = gsuper; /* Recurse. */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_make_gravity_hierarchical_tasks(e, c->progeny[k], super); + engine_make_gravity_hierarchical_tasks(e, c->progeny[k], gsuper); } /** @@ -262,7 +262,7 @@ void engine_redistribute(struct engine *e) { const int nr_nodes = e->nr_nodes; const int nodeID = e->nodeID; struct space *s = e->s; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; const int *cdim = s->cdim; const double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; @@ -541,12 +541,12 @@ void engine_redistribute(struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS /* Verify that all parts are in the right place. */ - for (int k = 0; k < nr_parts; k++) { - int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0], - parts_new[k].x[1] * iwidth[1], - parts_new[k].x[2] * iwidth[2]); + for (size_t k = 0; k < nr_parts; k++) { + const int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0], + parts_new[k].x[1] * iwidth[1], + parts_new[k].x[2] * iwidth[2]); if (cells[cid].nodeID != nodeID) - error("Received particle (%i) that does not belong here (nodeID=%i).", k, + error("Received particle (%zu) that does not belong here (nodeID=%i).", k, cells[cid].nodeID); } @@ -567,7 +567,7 @@ void engine_redistribute(struct engine *e) { for (size_t k = 0; k < nr_parts; ++k) { if (parts_new[k].gpart != NULL && - parts_new[k].gpart->id_or_neg_offset != -k) { + parts_new[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) { error("Linking problem !"); } } @@ -857,7 +857,7 @@ void engine_exchange_cells(struct engine *e) { #ifdef WITH_MPI struct space *s = e->s; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; const int nr_proxies = e->nr_proxies; int offset[nr_cells]; @@ -1017,7 +1017,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Put the parts and gparts into the corresponding proxies. */ for (size_t k = 0; k < *Npart; k++) { /* Get the target node and proxy ID. */ - const int node_id = e->s->cells[ind_part[k]].nodeID; + const int node_id = e->s->cells_top[ind_part[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) error("Bad node ID %i.", node_id); const int pid = e->proxy_ind[node_id]; @@ -1041,7 +1041,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, &s->xparts[offset_parts + k], 1); } for (size_t k = 0; k < *Ngpart; k++) { - const int node_id = e->s->cells[ind_gpart[k]].nodeID; + const int node_id = e->s->cells_top[ind_gpart[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) error("Bad node ID %i.", node_id); const int pid = e->proxy_ind[node_id]; @@ -1246,7 +1246,7 @@ void engine_make_gravity_tasks(struct engine *e) { struct space *s = e->s; struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; for (int cid = 0; cid < nr_cells; ++cid) { @@ -1301,7 +1301,7 @@ void engine_make_hydroloop_tasks(struct engine *e) { struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; const int *cdim = s->cdim; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; /* Run through the highest level of cells and add pairs. */ for (int i = 0; i < cdim[0]; i++) { @@ -1429,11 +1429,11 @@ static inline void engine_make_gravity_dependencies(struct scheduler *sched, struct cell *c) { /* init --> gravity --> kick */ - scheduler_addunlock(sched, c->super->init, gravity); - scheduler_addunlock(sched, gravity, c->super->kick); + scheduler_addunlock(sched, c->gsuper->init, gravity); + scheduler_addunlock(sched, gravity, c->gsuper->kick); /* grav_up --> gravity ( --> kick) */ - scheduler_addunlock(sched, c->super->grav_up, gravity); + scheduler_addunlock(sched, c->gsuper->grav_up, gravity); } /** @@ -1475,10 +1475,10 @@ void engine_link_gravity_tasks(struct engine *e) { /* Gather the multipoles --> mm interaction --> kick */ scheduler_addunlock(sched, gather, t); - scheduler_addunlock(sched, t, t->ci->super->kick); + scheduler_addunlock(sched, t, t->ci->gsuper->kick); /* init --> mm interaction */ - scheduler_addunlock(sched, t->ci->super->init, t); + scheduler_addunlock(sched, t->ci->gsuper->init, t); } /* Self-interaction? */ @@ -1496,7 +1496,7 @@ void engine_link_gravity_tasks(struct engine *e) { engine_make_gravity_dependencies(sched, t, t->ci); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) { engine_make_gravity_dependencies(sched, t, t->cj); } @@ -1518,7 +1518,7 @@ void engine_link_gravity_tasks(struct engine *e) { engine_make_gravity_dependencies(sched, t, t->ci); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) { engine_make_gravity_dependencies(sched, t, t->cj); } @@ -1814,7 +1814,7 @@ void engine_make_gravityrecursive_tasks(struct engine *e) { struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; const int nr_cells = s->nr_cells; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; for (int k = 0; k < nr_cells; k++) { @@ -1847,7 +1847,7 @@ void engine_maketasks(struct engine *e) { struct space *s = e->s; struct scheduler *sched = &e->sched; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; const ticks tic = getticks(); @@ -1971,7 +1971,7 @@ void engine_marktasks_fixdt_mapper(void *map_data, int num_elements, /* Too much particle movement? */ if (t->tight && - (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || + (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || ci->dx_max > space_maxreldx * ci->h_max || cj->dx_max > space_maxreldx * cj->h_max)) *rebuild_space = 1; @@ -2043,7 +2043,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Too much particle movement? */ if (t->tight && - (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || + (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || ci->dx_max > space_maxreldx * ci->h_max || cj->dx_max > space_maxreldx * cj->h_max)) *rebuild_space = 1; @@ -2289,7 +2289,7 @@ void engine_prepare(struct engine *e) { /* First drift all particles to the current time */ e->drift_all = 1; - threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells, + threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); /* Restore the default drifting policy */ @@ -2407,8 +2407,8 @@ void engine_collect_timestep(struct engine *e) { /* Collect the cell data. */ for (int k = 0; k < s->nr_cells; k++) - if (s->cells[k].nodeID == e->nodeID) { - struct cell *c = &s->cells[k]; + if (s->cells_top[k].nodeID == e->nodeID) { + struct cell *c = &s->cells_top[k]; /* Make the top-cells recurse */ engine_collect_kick(c); @@ -2461,8 +2461,8 @@ void engine_print_stats(struct engine *e) { /* Collect the cell data. */ for (int k = 0; k < s->nr_cells; k++) - if (s->cells[k].nodeID == e->nodeID) { - struct cell *c = &s->cells[k]; + if (s->cells_top[k].nodeID == e->nodeID) { + struct cell *c = &s->cells_top[k]; mass += c->mass; e_kin += c->e_kin; e_int += c->e_int; @@ -2677,7 +2677,7 @@ void engine_step(struct engine *e) { /* Drift everybody to the snapshot position */ e->drift_all = 1; - threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells, + threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); /* Restore the default drifting policy */ @@ -2717,7 +2717,7 @@ void engine_step(struct engine *e) { } /* Drift only the necessary particles */ - threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells, + threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); /* Re-distribute the particles amongst the nodes? */ @@ -2821,7 +2821,7 @@ void engine_makeproxies(struct engine *e) { #ifdef WITH_MPI const int *cdim = e->s->cdim; const struct space *s = e->s; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; struct proxy *proxies = e->proxies; ticks tic = getticks(); @@ -2952,7 +2952,8 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->xparts = xparts_new; /* Re-link the gparts. */ - part_relink_gparts(s->parts, s->nr_parts, 0); + if (s->nr_parts > 0 && s->nr_gparts > 0) + part_relink_gparts(s->parts, s->nr_parts, 0); /* Re-allocate the local gparts. */ if (e->verbose) @@ -2968,7 +2969,8 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->gparts = gparts_new; /* Re-link the parts. */ - part_relink_parts(s->gparts, s->nr_gparts, s->parts); + if (s->nr_parts > 0 && s->nr_gparts > 0) + part_relink_parts(s->gparts, s->nr_gparts, s->parts); #ifdef SWIFT_DEBUG_CHECKS @@ -2988,7 +2990,8 @@ void engine_split(struct engine *e, struct partition *initial_partition) { } for (size_t k = 0; k < s->nr_parts; ++k) { - if (s->parts[k].gpart != NULL && s->parts[k].gpart->id_or_neg_offset != -k) + if (s->parts[k].gpart != NULL && + s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) error("Linking problem !"); } diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index d4249c46a3150a357aaecfb02f9251901d97a157..2415e20ac5eb68f1b773b990bab232707166903c 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -21,6 +21,7 @@ #define SWIFT_DEFAULT_GRAVITY_H #include <float.h> +#include "minmax.h" #include "potentials.h" /** @@ -41,16 +42,14 @@ gravity_compute_timestep_external(const struct external_potential* potential, float dt = FLT_MAX; #ifdef EXTERNAL_POTENTIAL_POINTMASS - dt = - fminf(dt, external_gravity_pointmass_timestep(potential, phys_const, gp)); + dt = min(dt, external_gravity_pointmass_timestep(potential, phys_const, gp)); #endif #ifdef EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL - dt = fminf(dt, external_gravity_isothermalpotential_timestep(potential, - phys_const, gp)); + dt = min(dt, external_gravity_isothermalpotential_timestep(potential, + phys_const, gp)); #endif #ifdef EXTERNAL_POTENTIAL_DISK_PATCH - dt = fminf(dt, - external_gravity_disk_patch_timestep(potential, phys_const, gp)); + dt = min(dt, external_gravity_disk_patch_timestep(potential, phys_const, gp)); #endif return dt; } diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index f61bff55821809fe1f5da27c95d75afbecbc04cc..ccdd0cee32b9386eff54da655b75285b8e08a598 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -22,6 +22,7 @@ #include "adiabatic_index.h" #include "approx_math.h" #include "equation_of_state.h" +#include "minmax.h" #include <float.h> @@ -148,7 +149,7 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( (p->force.u_dt != 0.0f) ? fabsf(const_max_u_change * p->u / p->force.u_dt) : FLT_MAX; - return fminf(dt_cfl, dt_u_change); + return min(dt_cfl, dt_u_change); } /** @@ -273,7 +274,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed); /* Viscosity source term */ - const float S = fmaxf(-normDiv_v, 0.f); + const float S = max(-normDiv_v, 0.f); /* Compute the particle's viscosity parameter time derivative */ const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau + diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 51fa7d07229f86918ef2d7019a9708110cef02e3..7b1c8c3b91ce917af46efc28f6001a4d47747e2a 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -395,7 +395,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ - omega_ij = fminf(dvdr, 0.f); + omega_ij = min(dvdr, 0.f); /* Compute signal velocity */ v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij; @@ -441,8 +441,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->force.h_dt -= mi * dvdr / rhoi * wj_dr; /* Update the signal velocity. */ - pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); - pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); + pi->force.v_sig = max(pi->force.v_sig, v_sig); + pj->force.v_sig = max(pj->force.v_sig, v_sig); } /** @@ -635,8 +635,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pj[k]->force.u_dt += pju_dt.f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; pj[k]->force.h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); - pj[k]->force.v_sig = fmaxf(pj[k]->force.v_sig, v_sig.f[k]); + pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); + pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]); for (j = 0; j < 3; j++) { pi[k]->a_hydro[j] -= pia[j].f[k]; pj[k]->a_hydro[j] += pja[j].f[k]; @@ -696,7 +696,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ - omega_ij = fminf(dvdr, 0.f); + omega_ij = min(dvdr, 0.f); /* Compute signal velocity */ v_sig = pi->force.soundspeed + pj->force.soundspeed - 2.0f * omega_ij; @@ -737,7 +737,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.h_dt -= mj * dvdr / rhoj * wi_dr; /* Update the signal velocity. */ - pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); + pi->force.v_sig = max(pi->force.v_sig, v_sig); } /** @@ -920,7 +920,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( for (k = 0; k < VEC_SIZE; k++) { pi[k]->force.u_dt += piu_dt.f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); + pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; } diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 0108e0663c0d84ff6b5698456f6be34d5ee08c14..8a4edfe62f59a3fae551fdb65f46987509f89251 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -20,6 +20,8 @@ #ifndef SWIFT_GADGET2_HYDRO_IACT_H #define SWIFT_GADGET2_HYDRO_IACT_H +#include "minmax.h" + /** * @brief SPH interaction functions following the Gadget-2 version of SPH. * @@ -641,8 +643,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( } pi[k]->force.h_dt -= pih_dt.f[k]; pj[k]->force.h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); - pj[k]->force.v_sig = fmaxf(pj[k]->force.v_sig, v_sig.f[k]); + pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); + pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]); pi[k]->entropy_dt += entropy_dt.f[k] * mj.f[k]; pj[k]->entropy_dt += entropy_dt.f[k] * mi.f[k]; } @@ -900,7 +902,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( for (k = 0; k < VEC_SIZE; k++) { for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = fmaxf(pi[k]->force.v_sig, v_sig.f[k]); + pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); pi[k]->entropy_dt += entropy_dt.f[k]; } diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h index e24a44529dc1907c9ceadeedfbfc9e49c308bcda..9dab5d7fd96a833ba9ea56889139c04000634645 100644 --- a/src/hydro/Gizmo/hydro.h +++ b/src/hydro/Gizmo/hydro.h @@ -21,6 +21,7 @@ #include "adiabatic_index.h" #include "approx_math.h" #include "hydro_gradients.h" +#include "minmax.h" /** * @brief Computes the hydro time-step of a given particle diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h index 79973364617bb04855115bff9bfbf3808f46d04f..cf2b9a223b49c3ce2fbd6874b83c523e8213a5ce 100644 --- a/src/hydro/Gizmo/hydro_iact.h +++ b/src/hydro/Gizmo/hydro_iact.h @@ -242,14 +242,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( if (dvdotdx > 0.) { vmax -= dvdotdx / r; } - pi->timestepvars.vmax = fmaxf(pi->timestepvars.vmax, vmax); + pi->timestepvars.vmax = max(pi->timestepvars.vmax, vmax); if (mode == 1) { - pj->timestepvars.vmax = fmaxf(pj->timestepvars.vmax, vmax); + pj->timestepvars.vmax = max(pj->timestepvars.vmax, vmax); } /* The flux will be exchanged using the smallest time step of the two * particles */ - mindt = fminf(dti, dtj); + mindt = min(dti, dtj); dti = mindt; dtj = mindt; diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index a8a855d9db81f6927c1d8b45410a57d50a8366de..9e2028c978dc7cad03cfba17931f645bbfbfe1a0 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -34,6 +34,7 @@ */ #include "adiabatic_index.h" +#include "minmax.h" /** * @brief Density loop @@ -161,7 +162,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( (pi->v[2] - pj->v[2]) * dx[2]; /* Are the particles moving towards each others ? */ - const float omega_ij = fminf(dvdr, 0.f); + const float omega_ij = min(dvdr, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ @@ -212,8 +213,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->force.h_dt -= mi * dvdr * r_inv / rhoi * wj_dr; /* Update the signal velocity. */ - pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); - pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); + pi->force.v_sig = max(pi->force.v_sig, v_sig); + pj->force.v_sig = max(pj->force.v_sig, v_sig); } /** @@ -272,7 +273,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( (pi->v[2] - pj->v[2]) * dx[2]; /* Are the particles moving towards each others ? */ - const float omega_ij = fminf(dvdr, 0.f); + const float omega_ij = min(dvdr, 0.f); const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */ /* Compute sound speeds and signal velocity */ @@ -315,7 +316,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; /* Update the signal velocity. */ - pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); + pi->force.v_sig = max(pi->force.v_sig, v_sig); } /** diff --git a/src/minmax.h b/src/minmax.h index 8000df6edf93a30b964c578c6f25d324fed3f4cf..9d92cd71d849dba615fdb05bc342014e0593d989 100644 --- a/src/minmax.h +++ b/src/minmax.h @@ -24,11 +24,11 @@ * * This macro evaluates its arguments exactly once. */ -#define min(a, b) \ - ({ \ - __typeof__(a) _a = (a); \ - __typeof__(b) _b = (b); \ - _a < _b ? _a : _b; \ +#define min(a, b) \ + ({ \ + const __typeof__(a) _a = (a); \ + const __typeof__(b) _b = (b); \ + _a < _b ? _a : _b; \ }) /** @@ -36,11 +36,11 @@ * * This macro evaluates its arguments exactly once. */ -#define max(a, b) \ - ({ \ - __typeof__(a) _a = (a); \ - __typeof__(b) _b = (b); \ - _a > _b ? _a : _b; \ +#define max(a, b) \ + ({ \ + const __typeof__(a) _a = (a); \ + const __typeof__(b) _b = (b); \ + _a > _b ? _a : _b; \ }) #endif /* SWIFT_MINMAX_H */ diff --git a/src/partition.c b/src/partition.c index e216e12a5a23457b39b53070de3d84a2f257b927..8d17bedf0aaeadc64044b12ffe1bb8887b02d83e 100644 --- a/src/partition.c +++ b/src/partition.c @@ -143,7 +143,7 @@ static void split_vector(struct space *s, int nregions, int *samplecells) { select = l; } } - s->cells[n++].nodeID = select; + s->cells_top[n++].nodeID = select; } } } @@ -274,7 +274,7 @@ static void accumulate_counts(struct space *s, int *counts) { */ static void split_metis(struct space *s, int nregions, int *celllist) { - for (int i = 0; i < s->nr_cells; i++) s->cells[i].nodeID = celllist[i]; + for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; } #endif @@ -419,7 +419,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, /* Create weight arrays using task ticks for vertices and edges (edges * assume the same graph structure as used in the part_ calls). */ int nr_cells = s->nr_cells; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0; int wtot = 0; int wmax = 1e9 / nr_nodes; @@ -795,7 +795,7 @@ void partition_initial_partition(struct partition *initial_partition, /* Run through the cells and set their nodeID. */ // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]); for (k = 0; k < s->nr_cells; k++) { - c = &s->cells[k]; + c = &s->cells_top[k]; for (j = 0; j < 3; j++) ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j]; c->nodeID = ind[0] + @@ -1037,10 +1037,10 @@ static int check_complete(struct space *s, int verbose, int nregions) { int failed = 0; for (int i = 0; i < nregions; i++) present[i] = 0; for (int i = 0; i < s->nr_cells; i++) { - if (s->cells[i].nodeID <= nregions) - present[s->cells[i].nodeID]++; + if (s->cells_top[i].nodeID <= nregions) + present[s->cells_top[i].nodeID]++; else - message("Bad nodeID: %d", s->cells[i].nodeID); + message("Bad nodeID: %d", s->cells_top[i].nodeID); } for (int i = 0; i < nregions; i++) { if (!present[i]) { @@ -1085,13 +1085,13 @@ int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeIDs, for (int k = 0; k < s->cdim[2]; k++) { /* Scale indices to old cell space. */ - int ii = rint(i * s->iwidth[0] * oldh[0]); - int jj = rint(j * s->iwidth[1] * oldh[1]); - int kk = rint(k * s->iwidth[2] * oldh[2]); + const int ii = rint(i * s->iwidth[0] * oldh[0]); + const int jj = rint(j * s->iwidth[1] * oldh[1]); + const int kk = rint(k * s->iwidth[2] * oldh[2]); - int cid = cell_getid(s->cdim, i, j, k); - int oldcid = cell_getid(oldcdim, ii, jj, kk); - s->cells[cid].nodeID = oldnodeIDs[oldcid]; + const int cid = cell_getid(s->cdim, i, j, k); + const int oldcid = cell_getid(oldcdim, ii, jj, kk); + s->cells_top[cid].nodeID = oldnodeIDs[oldcid]; if (oldnodeIDs[oldcid] > nr_nodes) nr_nodes = oldnodeIDs[oldcid]; } diff --git a/src/queue.c b/src/queue.c index 38f8620fdc75d744df31513792e96323dbf83647..af4dfa3c94470814d4f6e7f53687a2fcba69d6dd 100644 --- a/src/queue.c +++ b/src/queue.c @@ -188,7 +188,7 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, /* Set some pointers we will use often. */ int *qtid = q->tid; struct task *qtasks = q->tasks; - const int qcount = q->count; + const int old_qcount = q->count; /* Data for the sliding window in which to try the task with the best overlap with the previous task. */ @@ -201,7 +201,7 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, int ind = -1; /* Loop over the queue entries. */ - for (int k = 0; k < qcount; k++) { + for (int k = 0; k < old_qcount; k++) { if (k < queue_search_window) { window[window_count].ind = k; window[window_count].tid = qtid[k]; diff --git a/src/riemann/riemann_exact.h b/src/riemann/riemann_exact.h index 9763d9f0d12da32d9142c24481105b9f139be588..10dfe56ef35a82e721a715bbb8c7a71979b8e6ce 100644 --- a/src/riemann/riemann_exact.h +++ b/src/riemann/riemann_exact.h @@ -31,6 +31,7 @@ #include <float.h> #include "adiabatic_index.h" +#include "minmax.h" #include "riemann_vacuum.h" /** @@ -145,12 +146,12 @@ __attribute__((always_inline)) INLINE static float riemann_guess_p( float pguess, pmin, pmax, qmax; float ppv; - pmin = fminf(WL[4], WR[4]); - pmax = fmaxf(WL[4], WR[4]); + pmin = min(WL[4], WR[4]); + pmax = max(WL[4], WR[4]); qmax = pmax / pmin; ppv = 0.5f * (WL[4] + WR[4]) - 0.125f * (vR - vL) * (WL[0] + WR[0]) * (aL + aR); - ppv = fmaxf(1.e-8f, ppv); + ppv = max(1.e-8f, ppv); if (qmax <= 2.0f && pmin <= ppv && ppv <= pmax) { pguess = ppv; } else { @@ -171,7 +172,7 @@ __attribute__((always_inline)) INLINE static float riemann_guess_p( value for pressure (...). Thus in order to avoid negative guess values we introduce the small positive constant _tolerance" */ - pguess = fmaxf(1.e-8f, pguess); + pguess = max(1.e-8f, pguess); return pguess; } diff --git a/src/riemann/riemann_hllc.h b/src/riemann/riemann_hllc.h index fdc22ce05b8d63bdba66e530d1a5a968801a9f10..b8b1239d7799221c98522c06631aba5cabe69183 100644 --- a/src/riemann/riemann_hllc.h +++ b/src/riemann/riemann_hllc.h @@ -21,6 +21,7 @@ #define SWIFT_RIEMANN_HLLC_H #include "adiabatic_index.h" +#include "minmax.h" #include "riemann_vacuum.h" __attribute__((always_inline)) INLINE static void riemann_solve_for_flux( @@ -57,7 +58,7 @@ __attribute__((always_inline)) INLINE static void riemann_solve_for_flux( rhobar = 0.5 * (WL[0] + WR[0]); abar = 0.5 * (aL + aR); pPVRS = 0.5 * (WL[4] + WR[4]) - 0.5 * (uR - uL) * rhobar * abar; - pstar = fmaxf(0., pPVRS); + pstar = max(0., pPVRS); /* STEP 2: wave speed estimates all these speeds are along the interface normal, since uL and uR are */ diff --git a/src/runner.c b/src/runner.c index d3fc7bc9e04596bc2fb848b14c1c1cde56cd3fe7..44ab5d5d3f31b6a9b6a1d1fdbb9bfea17419536a 100644 --- a/src/runner.c +++ b/src/runner.c @@ -58,6 +58,18 @@ #include "timers.h" #include "timestep.h" +/** + * @brief Entry in a list of sorted indices. + */ +struct entry { + + /*! Distance on the axis */ + float d; + + /*! Particle index */ + int i; +}; + /* Orientation of the cell pairs */ const double runner_shift[13][3] = { {5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01}, @@ -811,9 +823,8 @@ static void runner_do_drift(struct cell *c, struct engine *e) { /* Recurse. */ runner_do_drift(cp, e); - /* Collect */ - dx_max = fmaxf(dx_max, cp->dx_max); - h_max = fmaxf(h_max, cp->h_max); + dx_max = max(dx_max, cp->dx_max); + h_max = max(h_max, cp->h_max); mass += cp->mass; e_kin += cp->e_kin; e_int += cp->e_int; @@ -1141,7 +1152,7 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { // if(ti_end < ti_current) error("Received invalid particle !"); ti_end_min = min(ti_end_min, ti_end); ti_end_max = max(ti_end_max, ti_end); - h_max = fmaxf(h_max, parts[k].h); + h_max = max(h_max, parts[k].h); } for (size_t k = 0; k < nr_gparts; k++) { const int ti_end = gparts[k].ti_end; @@ -1159,7 +1170,7 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { runner_do_recv_cell(r, c->progeny[k], 0); ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); - h_max = fmaxf(h_max, c->progeny[k]->h_max); + h_max = max(h_max, c->progeny[k]->h_max); } } } diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 376400926432a9e9b6b9c736260dc3e119c2c64d..3c968cbf7d955198ad6bb44ab70e93af17735e99 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1739,7 +1739,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; /* Get the cell dimensions. */ - const float h = fminf(ci->width[0], fminf(ci->width[1], ci->width[2])); + const float h = min(ci->width[0], min(ci->width[1], ci->width[2])); /* Get the type of pair if not specified explicitly. */ // if ( sid < 0 ) @@ -1748,7 +1748,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Recurse? */ if (ci->split && cj->split && - fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < h / 2) { /* Different types of flags. */ @@ -2023,7 +2023,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; /* Get the cell dimensions. */ - const float h = fminf(ci->width[0], fminf(ci->width[1], ci->width[2])); + const float h = min(ci->width[0], min(ci->width[1], ci->width[2])); /* Get the type of pair if not specified explicitly. */ // if ( sid < 0 ) @@ -2032,7 +2032,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Recurse? */ if (ci->split && cj->split && - fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < h / 2) { /* Different types of flags. */ @@ -2336,11 +2336,11 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, else { /* Get the cell dimensions. */ - const float h = fminf(ci->width[0], fminf(ci->width[1], ci->width[2])); + const float h = min(ci->width[0], min(ci->width[1], ci->width[2])); /* Recurse? */ if (ci->split && cj->split && - fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < h / 2) { /* Get the type of pair if not specified explicitly. */ diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index a220ad1794d23999ff16752e797a499071fa2e65..0fcd2d2e80a72b92588acd5b8275b9dafc68df45 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -488,7 +488,7 @@ static void runner_do_grav_mm(struct runner *r, struct cell *ci, int timer) { /* Recover the list of top-level cells */ const struct engine *e = r->e; - struct cell *cells = e->s->cells; + struct cell *cells = e->s->cells_top; const int nr_cells = e->s->nr_cells; const int ti_current = e->ti_current; const double max_d = diff --git a/src/scheduler.c b/src/scheduler.c index ef4d19107fb48684ca299f286436a155a6fe0151..fa065879f7daeb60cfeeb67e52c64eb2036cf3cb 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -152,8 +152,9 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { if (ci->split) { /* Make a sub? */ - if (scheduler_dosub && (ci->count * ci->count < space_subsize || - ci->gcount * ci->gcount < space_subsize)) { + if (scheduler_dosub && + ((ci->count > 0 && ci->count < space_subsize / ci->count) || + (ci->gcount > 0 && ci->gcount < space_subsize / ci->gcount))) { /* convert to a self-subtask. */ t->type = task_type_sub_self; @@ -763,8 +764,9 @@ void scheduler_set_unlocks(struct scheduler *s) { t->unlock_tasks = &s->unlocks[offsets[k]]; } +#ifdef SWIFT_DEBUG_CHECKS /* Verify that there are no duplicate unlocks. */ - /* for (int k = 0; k < s->nr_tasks; k++) { + for (int k = 0; k < s->nr_tasks; k++) { struct task *t = &s->tasks[k]; for (int i = 0; i < t->nr_unlock_tasks; i++) { for (int j = i + 1; j < t->nr_unlock_tasks; j++) { @@ -772,7 +774,8 @@ void scheduler_set_unlocks(struct scheduler *s) { error("duplicate unlock!"); } } - } */ + } +#endif /* Clean up. */ free(counts); @@ -861,9 +864,11 @@ void scheduler_reset(struct scheduler *s, int size) { if (s->tasks_ind != NULL) free(s->tasks_ind); /* Allocate the new lists. */ - if ((s->tasks = (struct task *)malloc(sizeof(struct task) * size)) == - NULL || - (s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL) + if (posix_memalign((void *)&s->tasks, task_align, + size * sizeof(struct task)) != 0) + error("Failed to allocate task array."); + + if ((s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL) error("Failed to allocate task lists."); } @@ -917,7 +922,7 @@ void scheduler_reweight(struct scheduler *s) { (sizeof(int) * 8 - intrinsics_clz(t->ci->count)); break; case task_type_self: - t->weight += 1 * t->ci->count * t->ci->count; + t->weight += 1 * wscale * t->ci->count * t->ci->count; break; case task_type_pair: if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) @@ -962,7 +967,7 @@ void scheduler_reweight(struct scheduler *s) { // clocks_from_ticks( getticks() - tic ), clocks_getunit()); /* int min = tasks[0].weight, max = tasks[0].weight; - for ( k = 1 ; k < nr_tasks ; k++ ) + for ( int k = 1 ; k < nr_tasks ; k++ ) if ( tasks[k].weight < min ) min = tasks[k].weight; else if ( tasks[k].weight > max ) @@ -1104,10 +1109,20 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_pair: case task_type_sub_pair: - qid = t->ci->super->owner; - if (qid < 0 || - s->queues[qid].count > s->queues[t->cj->super->owner].count) - qid = t->cj->super->owner; + if (t->subtype == task_subtype_grav) { + + qid = t->ci->gsuper->owner; + if (qid < 0 || + s->queues[qid].count > s->queues[t->cj->gsuper->owner].count) + qid = t->cj->gsuper->owner; + + } else { + + qid = t->ci->super->owner; + if (qid < 0 || + s->queues[qid].count > s->queues[t->cj->super->owner].count) + qid = t->cj->super->owner; + } break; case task_type_recv: #ifdef WITH_MPI diff --git a/src/space.c b/src/space.c index de6ac7226f01b815d39f3c689bdf23d7ad234c20..cdd5958cbc515003f4a86a41c9a7075fa3b4364f 100644 --- a/src/space.c +++ b/src/space.c @@ -88,6 +88,28 @@ const int sortlistID[27] = { /* ( 1 , 1 , 0 ) */ 1, /* ( 1 , 1 , 1 ) */ 0}; +/** + * @brief Interval stack necessary for parallel particle sorting. + */ +struct qstack { + volatile ptrdiff_t i, j; + volatile int min, max; + volatile int ready; +}; + +/** + * @brief Parallel particle-sorting stack + */ +struct parallel_sort { + struct part *parts; + struct gpart *gparts; + struct xpart *xparts; + int *ind; + struct qstack *stack; + unsigned int stack_size; + volatile unsigned int first, last, waiting; +}; + /** * @brief Get the shift-id of the given pair of cells, swapping them * if need be. @@ -99,7 +121,6 @@ const int sortlistID[27] = { * * @return The shift ID and set shift, may or may not swap ci and cj. */ - int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift) { @@ -138,8 +159,9 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, /** * @brief Recursively dismantle a cell tree. * + * @param s The #space. + * @param c The #cell to recycle. */ - void space_rebuild_recycle(struct space *s, struct cell *c) { if (c->split) @@ -152,28 +174,27 @@ void space_rebuild_recycle(struct space *s, struct cell *c) { } /** - * @brief Re-build the cell grid. + * @brief Re-build the top-level cell grid. * * @param s The #space. * @param cell_max Maximum cell edge length. * @param verbose Print messages to stdout or not. */ - void space_regrid(struct space *s, double cell_max, int verbose) { const size_t nr_parts = s->nr_parts; - struct cell *restrict c; - ticks tic = getticks(); + const ticks tic = getticks(); const int ti_current = (s->e != NULL) ? s->e->ti_current : 0; /* Run through the cells and get the current h_max. */ // tic = getticks(); float h_max = s->cell_min / kernel_gamma / space_stretch; if (nr_parts > 0) { - if (s->cells != NULL) { + if (s->cells_top != NULL) { for (int k = 0; k < s->nr_cells; k++) { - if (s->cells[k].nodeID == engine_rank && s->cells[k].h_max > h_max) { - h_max = s->cells[k].h_max; + if (s->cells_top[k].nodeID == engine_rank && + s->cells_top[k].h_max > h_max) { + h_max = s->cells_top[k].h_max; } } } else { @@ -197,10 +218,10 @@ void space_regrid(struct space *s, double cell_max, int verbose) { if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max); /* Get the new putative cell dimensions. */ - int cdim[3]; - for (int k = 0; k < 3; k++) - cdim[k] = - floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max)); + const int cdim[3] = { + floor(s->dim[0] / fmax(h_max * kernel_gamma * space_stretch, cell_max)), + floor(s->dim[1] / fmax(h_max * kernel_gamma * space_stretch, cell_max)), + floor(s->dim[2] / fmax(h_max * kernel_gamma * space_stretch, cell_max))}; /* Check if we have enough cells for periodicity. */ if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3)) @@ -239,7 +260,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { for (int j = 0; j < s->cdim[1]; j++) { for (int k = 0; k < s->cdim[2]; k++) { cid = cell_getid(oldcdim, i, j, k); - oldnodeIDs[cid] = s->cells[cid].nodeID; + oldnodeIDs[cid] = s->cells_top[cid].nodeID; } } } @@ -249,16 +270,16 @@ void space_regrid(struct space *s, double cell_max, int verbose) { /* Do we need to re-build the upper-level cells? */ // tic = getticks(); - if (s->cells == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || + if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) { /* Free the old cells, if they were allocated. */ - if (s->cells != NULL) { + if (s->cells_top != NULL) { for (int k = 0; k < s->nr_cells; k++) { - space_rebuild_recycle(s, &s->cells[k]); - if (s->cells[k].sort != NULL) free(s->cells[k].sort); + space_rebuild_recycle(s, &s->cells_top[k]); + if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort); } - free(s->cells); + free(s->cells_top); s->maxdepth = 0; } @@ -268,22 +289,23 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->width[k] = s->dim[k] / cdim[k]; s->iwidth[k] = 1.0 / s->width[k]; } - const float dmin = fminf(s->width[0], fminf(s->width[1], s->width[2])); + const float dmin = min(s->width[0], min(s->width[1], s->width[2])); /* Allocate the highest level of cells. */ s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2]; - if (posix_memalign((void *)&s->cells, cell_align, + if (posix_memalign((void *)&s->cells_top, cell_align, s->nr_cells * sizeof(struct cell)) != 0) error("Failed to allocate cells."); - bzero(s->cells, s->nr_cells * sizeof(struct cell)); + bzero(s->cells_top, s->nr_cells * sizeof(struct cell)); for (int k = 0; k < s->nr_cells; k++) - if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock."); + if (lock_init(&s->cells_top[k].lock) != 0) + error("Failed to init spinlock."); /* Set the cell location and sizes. */ for (int i = 0; i < cdim[0]; i++) for (int j = 0; j < cdim[1]; j++) for (int k = 0; k < cdim[2]; k++) { - c = &s->cells[cell_getid(cdim, i, j, k)]; + struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)]; c->loc[0] = i * s->width[0]; c->loc[1] = j * s->width[1]; c->loc[2] = k * s->width[2]; @@ -295,6 +317,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { c->count = 0; c->gcount = 0; c->super = c; + c->gsuper = c; c->ti_old = ti_current; lock_init(&c->lock); } @@ -339,33 +362,35 @@ void space_regrid(struct space *s, double cell_max, int verbose) { free(oldnodeIDs); } #endif + + // message( "rebuilding upper-level cells took %.3f %s." , + // clocks_from_ticks(double)(getticks() - tic), clocks_getunit()); + } /* re-build upper-level cells? */ - // message( "rebuilding upper-level cells took %.3f %s." , - // clocks_from_ticks(double)(getticks() - tic), clocks_getunit()); - /* Otherwise, just clean up the cells. */ - else { + else { /* Otherwise, just clean up the cells. */ /* Free the old cells, if they were allocated. */ for (int k = 0; k < s->nr_cells; k++) { - space_rebuild_recycle(s, &s->cells[k]); - s->cells[k].sorts = NULL; - s->cells[k].nr_tasks = 0; - s->cells[k].nr_density = 0; - s->cells[k].nr_gradient = 0; - s->cells[k].nr_force = 0; - s->cells[k].density = NULL; - s->cells[k].gradient = NULL; - s->cells[k].force = NULL; - s->cells[k].dx_max = 0.0f; - s->cells[k].sorted = 0; - s->cells[k].count = 0; - s->cells[k].gcount = 0; - s->cells[k].init = NULL; - s->cells[k].extra_ghost = NULL; - s->cells[k].ghost = NULL; - s->cells[k].kick = NULL; - s->cells[k].super = &s->cells[k]; + space_rebuild_recycle(s, &s->cells_top[k]); + s->cells_top[k].sorts = NULL; + s->cells_top[k].nr_tasks = 0; + s->cells_top[k].nr_density = 0; + s->cells_top[k].nr_gradient = 0; + s->cells_top[k].nr_force = 0; + s->cells_top[k].density = NULL; + s->cells_top[k].gradient = NULL; + s->cells_top[k].force = NULL; + s->cells_top[k].dx_max = 0.0f; + s->cells_top[k].sorted = 0; + s->cells_top[k].count = 0; + s->cells_top[k].gcount = 0; + s->cells_top[k].init = NULL; + s->cells_top[k].extra_ghost = NULL; + s->cells_top[k].ghost = NULL; + s->cells_top[k].kick = NULL; + s->cells_top[k].super = &s->cells_top[k]; + s->cells_top[k].gsuper = &s->cells_top[k]; } s->maxdepth = 0; } @@ -383,7 +408,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) { * @param verbose Print messages to stdout or not * */ - void space_rebuild(struct space *s, double cell_max, int verbose) { const ticks tic = getticks(); @@ -396,7 +420,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { size_t nr_parts = s->nr_parts; size_t nr_gparts = s->nr_gparts; - struct cell *restrict cells = s->cells; + struct cell *restrict cells_top = s->cells_top; const int ti_current = (s->e != NULL) ? s->e->ti_current : 0; const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; @@ -418,7 +442,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { p->x[j] -= dim[j]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[ind[k]].count++; + cells_top[ind[k]].count++; } // message( "getting particle indices took %.3f %s." , // clocks_from_ticks(getticks() - tic), clocks_getunit()): @@ -438,9 +462,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { gp->x[j] -= dim[j]; gind[k] = cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); - cells[gind[k]].gcount++; + cells_top[gind[k]].gcount++; } -// message( "getting particle indices took %.3f %s." , +// message( "getting g-particle indices took %.3f %s." , // clocks_from_ticks(getticks() - tic), clocks_getunit()); #ifdef WITH_MPI @@ -448,8 +472,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Move non-local parts to the end of the list. */ const int local_nodeID = s->e->nodeID; for (size_t k = 0; k < nr_parts;) { - if (cells[ind[k]].nodeID != local_nodeID) { - cells[ind[k]].count -= 1; + if (cells_top[ind[k]].nodeID != local_nodeID) { + cells_top[ind[k]].count -= 1; nr_parts -= 1; const struct part tp = s->parts[k]; s->parts[k] = s->parts[nr_parts]; @@ -475,12 +499,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { #ifdef SWIFT_DEBUG_CHECKS /* Check that all parts are in the correct places. */ for (size_t k = 0; k < nr_parts; k++) { - if (cells[ind[k]].nodeID != local_nodeID) { + if (cells_top[ind[k]].nodeID != local_nodeID) { error("Failed to move all non-local parts to send list"); } } for (size_t k = nr_parts; k < s->nr_parts; k++) { - if (cells[ind[k]].nodeID == local_nodeID) { + if (cells_top[ind[k]].nodeID == local_nodeID) { error("Failed to remove local parts from send list"); } } @@ -488,8 +512,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Move non-local gparts to the end of the list. */ for (size_t k = 0; k < nr_gparts;) { - if (cells[gind[k]].nodeID != local_nodeID) { - cells[gind[k]].gcount -= 1; + if (cells_top[gind[k]].nodeID != local_nodeID) { + cells_top[gind[k]].gcount -= 1; nr_gparts -= 1; const struct gpart tp = s->gparts[k]; s->gparts[k] = s->gparts[nr_gparts]; @@ -513,12 +537,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { #ifdef SWIFT_DEBUG_CHECKS /* Check that all gparts are in the correct place (untested). */ for (size_t k = 0; k < nr_gparts; k++) { - if (cells[gind[k]].nodeID != local_nodeID) { + if (cells_top[gind[k]].nodeID != local_nodeID) { error("Failed to move all non-local gparts to send list"); } } for (size_t k = nr_gparts; k < s->nr_gparts; k++) { - if (cells[gind[k]].nodeID == local_nodeID) { + if (cells_top[gind[k]].nodeID == local_nodeID) { error("Failed to remove local gparts from send list"); } } @@ -550,11 +574,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { const struct part *const p = &s->parts[k]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[ind[k]].count += 1; + cells_top[ind[k]].count += 1; #ifdef SWIFT_DEBUG_CHECKS - if (cells[ind[k]].nodeID != local_nodeID) + if (cells_top[ind[k]].nodeID != local_nodeID) error("Received part that does not belong to me (nodeID=%i).", - cells[ind[k]].nodeID); + cells_top[ind[k]].nodeID); #endif } nr_parts = s->nr_parts; @@ -565,7 +589,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose); /* Re-link the gparts. */ - part_relink_gparts(s->parts, nr_parts, 0); + if (nr_parts > 0 && nr_gparts > 0) part_relink_gparts(s->parts, nr_parts, 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ @@ -600,20 +624,24 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { const struct gpart *const p = &s->gparts[k]; gind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[gind[k]].gcount += 1; - /* if ( cells[ ind[k] ].nodeID != nodeID ) - error( "Received part that does not belong to me (nodeID=%i)." , cells[ - ind[k] ].nodeID ); */ + cells_top[gind[k]].gcount += 1; + +#ifdef SWIFT_DEBUG_CHECKS + if (cells_top[ind[k]].nodeID != s->e->nodeID) + error("Received part that does not belong to me (nodeID=%i).", + cells_top[ind[k]].nodeID); +#endif } nr_gparts = s->nr_gparts; #endif - /* Sort the parts according to their cells. */ + /* Sort the gparts according to their cells. */ space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose); /* Re-link the parts. */ - part_relink_parts(s->gparts, nr_gparts, s->parts); + if (nr_parts > 0 && nr_gparts > 0) + part_relink_parts(s->gparts, nr_gparts, s->parts); /* We no longer need the indices as of here. */ free(gind); @@ -636,7 +664,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { for (size_t k = 0; k < nr_parts; ++k) { if (s->parts[k].gpart != NULL && - s->parts[k].gpart->id_or_neg_offset != -k) { + s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) { error("Linking problem !"); } } @@ -648,7 +676,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { struct xpart *xfinger = s->xparts; struct gpart *gfinger = s->gparts; for (int k = 0; k < s->nr_cells; k++) { - struct cell *restrict c = &cells[k]; + struct cell *restrict c = &cells_top[k]; c->ti_old = ti_current; c->parts = finger; c->xparts = xfinger; @@ -662,7 +690,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ - space_split(s, cells, verbose); + space_split(s, cells_top, verbose); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -672,6 +700,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /** * @brief Split particles between cells of a hierarchy * + * This is done in parallel using threads in the #threadpool. + * * @param s The #space. * @param cells The cell hierarchy * @param verbose Are we talkative ? @@ -690,7 +720,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) { /** * @brief Sort the particles and condensed particles according to the given - *indices. + * indices. * * @param s The #space. * @param ind The indices with respect to which the parts are sorted. @@ -699,7 +729,6 @@ void space_split(struct space *s, struct cell *cells, int verbose) { * @param max highest index. * @param verbose Are we talkative ? */ - void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose) { @@ -734,9 +763,9 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ - for (int i = 1; i < N; i++) + for (size_t i = 1; i < N; i++) if (ind[i - 1] > ind[i]) - error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, + error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); message("Sorting succeeded."); #endif @@ -876,8 +905,7 @@ void space_parts_sort_mapper(void *map_data, int num_elements, } /** - * @brief Sort the g-particles and condensed particles according to the given - *indices. + * @brief Sort the g-particles according to the given indices. * * @param s The #space. * @param ind The indices with respect to which the gparts are sorted. @@ -919,9 +947,9 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ - for (int i = 1; i < N; i++) + for (size_t i = 1; i < N; i++) if (ind[i - 1] > ind[i]) - error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, + error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); message("Sorting succeeded."); #endif @@ -1059,7 +1087,6 @@ void space_gparts_sort_mapper(void *map_data, int num_elements, /** * @brief Mapping function to free the sorted indices buffers. */ - void space_map_clearsort(struct cell *c, void *data) { if (c->sort != NULL) { @@ -1075,21 +1102,17 @@ void space_map_clearsort(struct cell *c, void *data) { * @param fun Function pointer to apply on the cells. * @param data Data passed to the function fun. */ - static void rec_map_parts(struct cell *c, void (*fun)(struct part *p, struct cell *c, void *data), void *data) { - - int k; - /* No progeny? */ if (!c->split) - for (k = 0; k < c->count; k++) fun(&c->parts[k], c, data); + for (int k = 0; k < c->count; k++) fun(&c->parts[k], c, data); /* Otherwise, recurse. */ else - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) rec_map_parts(c->progeny[k], fun, data); } @@ -1100,16 +1123,13 @@ static void rec_map_parts(struct cell *c, * @param fun Function pointer to apply on the cells. * @param data Data passed to the function fun. */ - void space_map_parts(struct space *s, void (*fun)(struct part *p, struct cell *c, void *data), void *data) { - int cid = 0; - /* Call the recursive function on all higher-level cells. */ - for (cid = 0; cid < s->nr_cells; cid++) - rec_map_parts(&s->cells[cid], fun, data); + for (int cid = 0; cid < s->nr_cells; cid++) + rec_map_parts(&s->cells_top[cid], fun, data); } /** @@ -1118,20 +1138,17 @@ void space_map_parts(struct space *s, * @param c The #cell we are working in. * @param fun Function pointer to apply on the cells. */ - static void rec_map_parts_xparts(struct cell *c, void (*fun)(struct part *p, struct xpart *xp, struct cell *c)) { - int k; - /* No progeny? */ if (!c->split) - for (k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c); + for (int k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c); /* Otherwise, recurse. */ else - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) rec_map_parts_xparts(c->progeny[k], fun); } @@ -1141,16 +1158,13 @@ static void rec_map_parts_xparts(struct cell *c, * @param s The #space we are working in. * @param fun Function pointer to apply on the particles in the cells. */ - void space_map_parts_xparts(struct space *s, void (*fun)(struct part *p, struct xpart *xp, struct cell *c)) { - int cid = 0; - /* Call the recursive function on all higher-level cells. */ - for (cid = 0; cid < s->nr_cells; cid++) - rec_map_parts_xparts(&s->cells[cid], fun); + for (int cid = 0; cid < s->nr_cells; cid++) + rec_map_parts_xparts(&s->cells_top[cid], fun); } /** @@ -1161,16 +1175,12 @@ void space_map_parts_xparts(struct space *s, * @param fun Function pointer to apply on the cells. * @param data Data passed to the function fun. */ - static void rec_map_cells_post(struct cell *c, int full, void (*fun)(struct cell *c, void *data), void *data) { - - int k; - /* Recurse. */ if (c->split) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) rec_map_cells_post(c->progeny[k], full, fun, data); @@ -1186,29 +1196,24 @@ static void rec_map_cells_post(struct cell *c, int full, * @param fun Function pointer to apply on the cells. * @param data Data passed to the function fun. */ - void space_map_cells_post(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data) { - int cid = 0; - /* Call the recursive function on all higher-level cells. */ - for (cid = 0; cid < s->nr_cells; cid++) - rec_map_cells_post(&s->cells[cid], full, fun, data); + for (int cid = 0; cid < s->nr_cells; cid++) + rec_map_cells_post(&s->cells_top[cid], full, fun, data); } static void rec_map_cells_pre(struct cell *c, int full, void (*fun)(struct cell *c, void *data), void *data) { - int k; - /* No progeny? */ if (full || !c->split) fun(c, data); /* Recurse. */ if (c->split) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) rec_map_cells_pre(c->progeny[k], full, fun, data); } @@ -1224,28 +1229,29 @@ static void rec_map_cells_pre(struct cell *c, int full, void space_map_cells_pre(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data) { - int cid = 0; - /* Call the recursive function on all higher-level cells. */ - for (cid = 0; cid < s->nr_cells; cid++) - rec_map_cells_pre(&s->cells[cid], full, fun, data); + for (int cid = 0; cid < s->nr_cells; cid++) + rec_map_cells_pre(&s->cells_top[cid], full, fun, data); } /** * @brief #threadpool mapper function to split cells if they contain * too many particles. + * + * @param map_data Pointer towards the top-cells. + * @param num_elements The number of cells to treat. + * @param extra_data Pointers to the #space. */ - void space_split_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the inputs. */ struct space *s = (struct space *)extra_data; - struct cell *cells = (struct cell *)map_data; + struct cell *restrict cells_top = (struct cell *)map_data; struct engine *e = s->e; for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &cells[ind]; + struct cell *c = &cells_top[ind]; const int count = c->count; const int gcount = c->gcount; @@ -1290,6 +1296,8 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) { temp->dx_max = 0.f; temp->nodeID = c->nodeID; temp->parent = c; + temp->super = NULL; + temp->gsuper = NULL; c->progeny[k] = temp; } @@ -1303,7 +1311,7 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) { c->progeny[k] = NULL; } else { space_split_mapper(c->progeny[k], 1, s); - h_max = fmaxf(h_max, c->progeny[k]->h_max); + h_max = max(h_max, c->progeny[k]->h_max); ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); if (c->progeny[k]->maxdepth > maxdepth) @@ -1363,12 +1371,11 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) { } /** - * @brief Return a used cell to the cell buffer. + * @brief Return a used cell to the buffer od unused sub-cells. * * @param s The #space. * @param c The #cell. */ - void space_recycle(struct space *s, struct cell *c) { /* Lock the space. */ @@ -1384,8 +1391,8 @@ void space_recycle(struct space *s, struct cell *c) { bzero(c, sizeof(struct cell)); /* Hook this cell into the buffer. */ - c->next = s->cells_new; - s->cells_new = c; + c->next = s->cells_sub; + s->cells_sub = c; s->tot_cells -= 1; /* Unlock the space. */ @@ -1393,39 +1400,42 @@ void space_recycle(struct space *s, struct cell *c) { } /** - * @brief Get a new empty cell. + * @brief Get a new empty (sub-)#cell. + * + * If there are cells in the buffer, use the one at the end of the linked list. + * If we have no cells, allocate a new chunk of memory and pick one from there. * * @param s The #space. */ - struct cell *space_getcell(struct space *s) { - struct cell *c; - int k; - /* Lock the space. */ lock_lock(&s->lock); /* Is the buffer empty? */ - if (s->cells_new == NULL) { - if (posix_memalign((void *)&s->cells_new, cell_align, + if (s->cells_sub == NULL) { + if (posix_memalign((void *)&s->cells_sub, cell_align, space_cellallocchunk * sizeof(struct cell)) != 0) error("Failed to allocate more cells."); - bzero(s->cells_new, space_cellallocchunk * sizeof(struct cell)); - for (k = 0; k < space_cellallocchunk - 1; k++) - s->cells_new[k].next = &s->cells_new[k + 1]; - s->cells_new[space_cellallocchunk - 1].next = NULL; + + /* Zero everything for good measure */ + bzero(s->cells_sub, space_cellallocchunk * sizeof(struct cell)); + + /* Constructed a linked list */ + for (int k = 0; k < space_cellallocchunk - 1; k++) + s->cells_sub[k].next = &s->cells_sub[k + 1]; + s->cells_sub[space_cellallocchunk - 1].next = NULL; } /* Pick off the next cell. */ - c = s->cells_new; - s->cells_new = c->next; + struct cell *c = s->cells_sub; + s->cells_sub = c->next; s->tot_cells += 1; /* Unlock the space. */ lock_unlock_blind(&s->lock); - /* Init some things in the cell. */ + /* Init some things in the cell we just got. */ bzero(c, sizeof(struct cell)); c->nodeID = -1; if (lock_init(&c->lock) != 0 || lock_init(&c->glock) != 0) @@ -1507,7 +1517,6 @@ void space_init_gparts(struct space *s) { * parts with a cutoff below half the cell width are then split * recursively. */ - void space_init(struct space *s, const struct swift_params *params, double dim[3], struct part *parts, struct gpart *gparts, size_t Npart, size_t Ngpart, int periodic, int gravity, @@ -1530,7 +1539,6 @@ void space_init(struct space *s, const struct swift_params *params, s->gparts = gparts; s->cell_min = parser_get_param_double(params, "SPH:max_smoothing_length"); s->nr_queues = 1; /* Temporary value until engine construction */ - s->size_parts_foreign = 0; /* Get the constants for the scheduler */ space_maxsize = parser_get_opt_param_int(params, "Scheduler:cell_max_size", @@ -1647,8 +1655,8 @@ void space_link_cleanup(struct space *s) { */ void space_clean(struct space *s) { - for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells[i]); - free(s->cells); + for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells_top[i]); + free(s->cells_top); free(s->parts); free(s->xparts); free(s->gparts); diff --git a/src/space.h b/src/space.h index 90313be8dbe817d65fbd0e6a8c30c156747594b1..66d82c6f78a08447851a8dfdaea1231e5778693b 100644 --- a/src/space.h +++ b/src/space.h @@ -37,7 +37,6 @@ #include "space.h" /* Some constants. */ -#define space_maxdepth 10 #define space_cellallocchunk 1000 #define space_splitsize_default 400 #define space_maxsize_default 8000000 @@ -53,84 +52,85 @@ extern int space_subsize; /* Map shift vector to sortlist. */ extern const int sortlistID[27]; -/* Entry in a list of sorted indices. */ -struct entry { - float d; - int i; -}; - -/* The space in which the cells reside. */ +/** + * @brief The space in which the cells and particles reside. + */ struct space { - /* Spatial extent. */ + /*! Spatial extent. */ double dim[3]; - /* Cell widths. */ - double width[3], iwidth[3]; + /*! Is the space periodic? */ + int periodic; + + /*! Are we doing gravity? */ + int gravity; + + /*! Width of the top-level cells. */ + double width[3]; + + /*! Inverse of the top-level cell width */ + double iwidth[3]; - /* The minimum cell width. */ + /*! The minimum top-level cell width allowed. */ double cell_min; - /* Current maximum displacement for particles. */ + /*! Current maximum displacement for particles. */ float dx_max; - /* Number of cells. */ - int nr_cells, tot_cells; + /*! Space dimensions in number of top-cells. */ + int cdim[3]; - /* Space dimensions in number of cells. */ - int maxdepth, cdim[3]; + /*! Maximal depth reached by the tree */ + int maxdepth; - /* The (level 0) cells themselves. */ - struct cell *cells; + /*! Number of top-level cells. */ + int nr_cells; - /* Buffer of unused cells. */ - struct cell *cells_new; + /*! Total number of cells (top- and sub-) */ + int tot_cells; - /* The particle data (cells have pointers to this). */ - struct part *parts; - struct xpart *xparts; - struct gpart *gparts; + /*! The (level 0) cells themselves. */ + struct cell *cells_top; - /* The total number of parts in the space. */ + /*! Buffer of unused cells for the sub-cells. */ + struct cell *cells_sub; + + /*! The total number of parts in the space. */ size_t nr_parts, size_parts; + + /*! The total number of g-parts in the space. */ size_t nr_gparts, size_gparts; - /* Is the space periodic? */ - int periodic; + /*! The particle data (cells have pointers to this). */ + struct part *parts; - /* Are we doing gravity? */ - int gravity; + /*! The extended particle data (cells have pointers to this). */ + struct xpart *xparts; - /* General-purpose lock for this space. */ + /*! The g-particle data (cells have pointers to this). */ + struct gpart *gparts; + + /*! General-purpose lock for this space. */ swift_lock_type lock; - /* Number of queues in the system. */ + /*! Number of queues in the system. */ int nr_queues; - /* The associated engine. */ + /*! The associated engine. */ struct engine *e; - /* Buffers for parts that we will receive from foreign cells. */ +#ifdef WITH_MPI + + /*! Buffers for parts that we will receive from foreign cells. */ struct part *parts_foreign; size_t nr_parts_foreign, size_parts_foreign; + + /*! Buffers for g-parts that we will receive from foreign cells. */ struct gpart *gparts_foreign; size_t nr_gparts_foreign, size_gparts_foreign; -}; -/* Interval stack necessary for parallel particle sorting. */ -struct qstack { - volatile ptrdiff_t i, j; - volatile int min, max; - volatile int ready; -}; -struct parallel_sort { - struct part *parts; - struct gpart *gparts; - struct xpart *xparts; - int *ind; - struct qstack *stack; - unsigned int stack_size; - volatile unsigned int first, last, waiting; +#endif }; /* function prototypes. */ diff --git a/src/task.h b/src/task.h index 0b28dba5fd6fac6929ee492b29cdd64ed2073dcf..f070451fe4e79e0c16dc3dcca1ce145c08841c47 100644 --- a/src/task.h +++ b/src/task.h @@ -29,6 +29,8 @@ #include "cell.h" #include "cycle.h" +#define task_align 128 + /** * @brief The different task types. */ @@ -53,7 +55,7 @@ enum task_types { task_type_grav_external, task_type_cooling, task_type_count -}; +} __attribute__((packed)); /** * @brief The different task sub-types (for pairs, selfs and sub-tasks). @@ -66,7 +68,7 @@ enum task_subtypes { task_subtype_grav, task_subtype_tend, task_subtype_count -}; +} __attribute__((packed)); /** * @brief The type of particles/objects this task acts upon in a given cell. @@ -95,48 +97,48 @@ extern const char *subtaskID_names[]; */ struct task { - /*! Type of the task */ - enum task_types type; + /*! Pointers to the cells this task acts upon */ + struct cell *ci, *cj; - /*! Sub-type of the task (for the tasks that have one */ - enum task_subtypes subtype; + /*! List of tasks unlocked by this one */ + struct task **unlock_tasks; + + /*! Start and end time of this task */ + ticks tic, toc; + +#ifdef WITH_MPI + + /*! Buffer for this task's communications */ + void *buff; + + /*! MPI request corresponding to this task */ + MPI_Request req; + +#endif /*! Flags used to carry additional information (e.g. sort directions) */ int flags; - /*! Number of unsatisfied dependencies */ - int wait; - /*! Rank of a task in the order */ int rank; /*! Weight of the task */ int weight; - /*! Pointers to the cells this task acts upon */ - struct cell *ci, *cj; - /*! ID of the queue or runner owning this task */ - int rid; + short int rid; /*! Number of tasks unlocked by this one */ - int nr_unlock_tasks; - - /*! List of tasks unlocked by this one */ - struct task **unlock_tasks; - -#ifdef WITH_MPI - - /*! Buffer for this task's communications */ - void *buff; + short int nr_unlock_tasks; - /*! MPI request corresponding to this task */ - MPI_Request req; + /*! Number of unsatisfied dependencies */ + short int wait; -#endif + /*! Type of the task */ + enum task_types type; - /*! Start and end time of this task */ - ticks tic, toc; + /*! Sub-type of the task (for the tasks that have one */ + enum task_subtypes subtype; /*! Should the scheduler skip this task ? */ char skip; @@ -146,7 +148,8 @@ struct task { /*! Is this task implicit (i.e. does not do anything) ? */ char implicit; -}; + +} __attribute__((aligned(32))); /* Function prototypes. */ void task_unlock(struct task *t); diff --git a/tests/test125cells.c b/tests/test125cells.c index c7e01693b45f76fa21ffd397289fc06ad36af03d..a385a7c890fe27ed11d3c5d87d6903fa6d254516 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -564,7 +564,7 @@ int main(int argc, char *argv[]) { /* Start the test */ ticks time = 0; - for (size_t i = 0; i < runs; ++i) { + for (size_t n = 0; n < runs; ++n) { const ticks tic = getticks(); @@ -642,7 +642,7 @@ int main(int argc, char *argv[]) { time += toc - tic; /* Dump if necessary */ - if (i == 0) { + if (n == 0) { sprintf(outputFileName, "swift_dopair_125_%s.dat", outputFileNameExtension); dump_particle_fields(outputFileName, main_cell, solution, 0); diff --git a/tests/testInteractions.c b/tests/testInteractions.c index 9d8dca79f3f84ea06e42d61390e68467a5f1b415..52ad0c54258848883a9025bbcd9d68133eddc4b9 100644 --- a/tests/testInteractions.c +++ b/tests/testInteractions.c @@ -173,7 +173,7 @@ void test_interactions(struct part *parts, int count, /* Dump state of particles before serial interaction. */ dump_indv_particle_fields(serial_filename, &pi); - for (size_t i = 1; i < count; i++) + for (int i = 1; i < count; i++) dump_indv_particle_fields(serial_filename, &parts[i]); /* Make copy of pi to be used in vectorised version. */ @@ -206,7 +206,7 @@ void test_interactions(struct part *parts, int count, /* Dump result of serial interaction. */ dump_indv_particle_fields(serial_filename, &pi); - for (size_t i = 1; i < count; i++) + for (int i = 1; i < count; i++) dump_indv_particle_fields(serial_filename, &parts[i]); /* Setup arrays for vector interaction. */