diff --git a/src/debug.c b/src/debug.c index 15354b7d419544a8456543b79c38235eb3b68b2c..d73bc86a92cf5ca28c202e7b567cf7c40ba6eccb 100644 --- a/src/debug.c +++ b/src/debug.c @@ -154,8 +154,9 @@ int checkSpacehmax(struct space *s) { /* Loop over local cells. */ float cell_h_max = 0.0f; for (int k = 0; k < s->nr_cells; k++) { - if (s->cells[k].nodeID == s->e->nodeID && s->cells[k].h_max > cell_h_max) { - cell_h_max = s->cells[k].h_max; + if (s->cells_top[k].nodeID == s->e->nodeID && + s->cells_top[k].h_max > cell_h_max) { + cell_h_max = s->cells_top[k].h_max; } } @@ -172,9 +173,9 @@ int checkSpacehmax(struct space *s) { /* There is a problem. Hunt it down. */ for (int k = 0; k < s->nr_cells; k++) { - if (s->cells[k].nodeID == s->e->nodeID) { - if (s->cells[k].h_max > part_h_max) { - message("cell %d is inconsistent (%f > %f)", k, s->cells[k].h_max, + if (s->cells_top[k].nodeID == s->e->nodeID) { + if (s->cells_top[k].h_max > part_h_max) { + message("cell %d is inconsistent (%f > %f)", k, s->cells_top[k].h_max, part_h_max); } } diff --git a/src/engine.c b/src/engine.c index 8dde99b8eb6f6b4fe3364149635fc9aa7be89c70..64d08d64f952efc4997b62ed8b346d5a8552a259 100644 --- a/src/engine.c +++ b/src/engine.c @@ -256,7 +256,7 @@ void engine_redistribute(struct engine *e) { const int nr_nodes = e->nr_nodes; const int nodeID = e->nodeID; struct space *s = e->s; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; const int *cdim = s->cdim; const double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; @@ -535,12 +535,12 @@ void engine_redistribute(struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS /* Verify that all parts are in the right place. */ - for (int k = 0; k < nr_parts; k++) { - int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0], - parts_new[k].x[1] * iwidth[1], - parts_new[k].x[2] * iwidth[2]); + for (size_t k = 0; k < nr_parts; k++) { + const int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0], + parts_new[k].x[1] * iwidth[1], + parts_new[k].x[2] * iwidth[2]); if (cells[cid].nodeID != nodeID) - error("Received particle (%i) that does not belong here (nodeID=%i).", k, + error("Received particle (%zu) that does not belong here (nodeID=%i).", k, cells[cid].nodeID); } @@ -561,7 +561,7 @@ void engine_redistribute(struct engine *e) { for (size_t k = 0; k < nr_parts; ++k) { if (parts_new[k].gpart != NULL && - parts_new[k].gpart->id_or_neg_offset != -k) { + parts_new[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) { error("Linking problem !"); } } @@ -852,7 +852,7 @@ void engine_exchange_cells(struct engine *e) { #ifdef WITH_MPI struct space *s = e->s; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; const int nr_proxies = e->nr_proxies; int offset[nr_cells]; @@ -1012,7 +1012,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Put the parts and gparts into the corresponding proxies. */ for (size_t k = 0; k < *Npart; k++) { /* Get the target node and proxy ID. */ - const int node_id = e->s->cells[ind_part[k]].nodeID; + const int node_id = e->s->cells_top[ind_part[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) error("Bad node ID %i.", node_id); const int pid = e->proxy_ind[node_id]; @@ -1036,7 +1036,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, &s->xparts[offset_parts + k], 1); } for (size_t k = 0; k < *Ngpart; k++) { - const int node_id = e->s->cells[ind_gpart[k]].nodeID; + const int node_id = e->s->cells_top[ind_gpart[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) error("Bad node ID %i.", node_id); const int pid = e->proxy_ind[node_id]; @@ -1241,7 +1241,7 @@ void engine_make_gravity_tasks(struct engine *e) { struct space *s = e->s; struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; for (int cid = 0; cid < nr_cells; ++cid) { @@ -1296,7 +1296,7 @@ void engine_make_hydroloop_tasks(struct engine *e) { struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; const int *cdim = s->cdim; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; /* Run through the highest level of cells and add pairs. */ for (int i = 0; i < cdim[0]; i++) { @@ -1802,7 +1802,7 @@ void engine_make_gravityrecursive_tasks(struct engine *e) { struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; const int nr_cells = s->nr_cells; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; for (int k = 0; k < nr_cells; k++) { @@ -1835,7 +1835,7 @@ void engine_maketasks(struct engine *e) { struct space *s = e->s; struct scheduler *sched = &e->sched; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; const ticks tic = getticks(); @@ -2278,7 +2278,7 @@ void engine_prepare(struct engine *e) { /* First drift all particles to the current time */ e->drift_all = 1; - threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells, + threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); /* Restore the default drifting policy */ @@ -2396,8 +2396,8 @@ void engine_collect_timestep(struct engine *e) { /* Collect the cell data. */ for (int k = 0; k < s->nr_cells; k++) - if (s->cells[k].nodeID == e->nodeID) { - struct cell *c = &s->cells[k]; + if (s->cells_top[k].nodeID == e->nodeID) { + struct cell *c = &s->cells_top[k]; /* Make the top-cells recurse */ engine_collect_kick(c); @@ -2450,8 +2450,8 @@ void engine_print_stats(struct engine *e) { /* Collect the cell data. */ for (int k = 0; k < s->nr_cells; k++) - if (s->cells[k].nodeID == e->nodeID) { - struct cell *c = &s->cells[k]; + if (s->cells_top[k].nodeID == e->nodeID) { + struct cell *c = &s->cells_top[k]; mass += c->mass; e_kin += c->e_kin; e_int += c->e_int; @@ -2666,7 +2666,7 @@ void engine_step(struct engine *e) { /* Drift everybody to the snapshot position */ e->drift_all = 1; - threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells, + threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); /* Restore the default drifting policy */ @@ -2706,7 +2706,7 @@ void engine_step(struct engine *e) { } /* Drift only the necessary particles */ - threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells, + threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); /* Re-distribute the particles amongst the nodes? */ @@ -2805,7 +2805,7 @@ void engine_makeproxies(struct engine *e) { #ifdef WITH_MPI const int *cdim = e->s->cdim; const struct space *s = e->s; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; struct proxy *proxies = e->proxies; ticks tic = getticks(); @@ -2972,7 +2972,8 @@ void engine_split(struct engine *e, struct partition *initial_partition) { } for (size_t k = 0; k < s->nr_parts; ++k) { - if (s->parts[k].gpart != NULL && s->parts[k].gpart->id_or_neg_offset != -k) + if (s->parts[k].gpart != NULL && + s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) error("Linking problem !"); } diff --git a/src/partition.c b/src/partition.c index e216e12a5a23457b39b53070de3d84a2f257b927..8d17bedf0aaeadc64044b12ffe1bb8887b02d83e 100644 --- a/src/partition.c +++ b/src/partition.c @@ -143,7 +143,7 @@ static void split_vector(struct space *s, int nregions, int *samplecells) { select = l; } } - s->cells[n++].nodeID = select; + s->cells_top[n++].nodeID = select; } } } @@ -274,7 +274,7 @@ static void accumulate_counts(struct space *s, int *counts) { */ static void split_metis(struct space *s, int nregions, int *celllist) { - for (int i = 0; i < s->nr_cells; i++) s->cells[i].nodeID = celllist[i]; + for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i]; } #endif @@ -419,7 +419,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, /* Create weight arrays using task ticks for vertices and edges (edges * assume the same graph structure as used in the part_ calls). */ int nr_cells = s->nr_cells; - struct cell *cells = s->cells; + struct cell *cells = s->cells_top; float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0; int wtot = 0; int wmax = 1e9 / nr_nodes; @@ -795,7 +795,7 @@ void partition_initial_partition(struct partition *initial_partition, /* Run through the cells and set their nodeID. */ // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]); for (k = 0; k < s->nr_cells; k++) { - c = &s->cells[k]; + c = &s->cells_top[k]; for (j = 0; j < 3; j++) ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j]; c->nodeID = ind[0] + @@ -1037,10 +1037,10 @@ static int check_complete(struct space *s, int verbose, int nregions) { int failed = 0; for (int i = 0; i < nregions; i++) present[i] = 0; for (int i = 0; i < s->nr_cells; i++) { - if (s->cells[i].nodeID <= nregions) - present[s->cells[i].nodeID]++; + if (s->cells_top[i].nodeID <= nregions) + present[s->cells_top[i].nodeID]++; else - message("Bad nodeID: %d", s->cells[i].nodeID); + message("Bad nodeID: %d", s->cells_top[i].nodeID); } for (int i = 0; i < nregions; i++) { if (!present[i]) { @@ -1085,13 +1085,13 @@ int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeIDs, for (int k = 0; k < s->cdim[2]; k++) { /* Scale indices to old cell space. */ - int ii = rint(i * s->iwidth[0] * oldh[0]); - int jj = rint(j * s->iwidth[1] * oldh[1]); - int kk = rint(k * s->iwidth[2] * oldh[2]); + const int ii = rint(i * s->iwidth[0] * oldh[0]); + const int jj = rint(j * s->iwidth[1] * oldh[1]); + const int kk = rint(k * s->iwidth[2] * oldh[2]); - int cid = cell_getid(s->cdim, i, j, k); - int oldcid = cell_getid(oldcdim, ii, jj, kk); - s->cells[cid].nodeID = oldnodeIDs[oldcid]; + const int cid = cell_getid(s->cdim, i, j, k); + const int oldcid = cell_getid(oldcdim, ii, jj, kk); + s->cells_top[cid].nodeID = oldnodeIDs[oldcid]; if (oldnodeIDs[oldcid] > nr_nodes) nr_nodes = oldnodeIDs[oldcid]; } diff --git a/src/runner.c b/src/runner.c index deb52cfa133cc2fa1bc0c816cd4787810fa17a35..3cc42f59c21eea03802d8797081c8a0f6866ef58 100644 --- a/src/runner.c +++ b/src/runner.c @@ -57,6 +57,18 @@ #include "timers.h" #include "timestep.h" +/** + * @brief Entry in a list of sorted indices. + */ +struct entry { + + /*! Distance on the axis */ + float d; + + /*! Particle index */ + int i; +}; + /* Orientation of the cell pairs */ const double runner_shift[13][3] = { {5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01}, diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index a220ad1794d23999ff16752e797a499071fa2e65..0fcd2d2e80a72b92588acd5b8275b9dafc68df45 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -488,7 +488,7 @@ static void runner_do_grav_mm(struct runner *r, struct cell *ci, int timer) { /* Recover the list of top-level cells */ const struct engine *e = r->e; - struct cell *cells = e->s->cells; + struct cell *cells = e->s->cells_top; const int nr_cells = e->s->nr_cells; const int ti_current = e->ti_current; const double max_d = diff --git a/src/space.c b/src/space.c index cccd40ddd7ba5e1de75b1b609d77a919e0d07835..b308420f4b8a63e1a5d08e73441b121b2d416e1e 100644 --- a/src/space.c +++ b/src/space.c @@ -88,6 +88,28 @@ const int sortlistID[27] = { /* ( 1 , 1 , 0 ) */ 1, /* ( 1 , 1 , 1 ) */ 0}; +/** + * @brief Interval stack necessary for parallel particle sorting. + */ +struct qstack { + volatile ptrdiff_t i, j; + volatile int min, max; + volatile int ready; +}; + +/** + * @brief Parallel particle-sorting stack + */ +struct parallel_sort { + struct part *parts; + struct gpart *gparts; + struct xpart *xparts; + int *ind; + struct qstack *stack; + unsigned int stack_size; + volatile unsigned int first, last, waiting; +}; + /** * @brief Get the shift-id of the given pair of cells, swapping them * if need be. @@ -99,7 +121,6 @@ const int sortlistID[27] = { * * @return The shift ID and set shift, may or may not swap ci and cj. */ - int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift) { @@ -138,8 +159,9 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, /** * @brief Recursively dismantle a cell tree. * + * @param s The #space. + * @param c The #cell to recycle. */ - void space_rebuild_recycle(struct space *s, struct cell *c) { if (c->split) @@ -152,28 +174,27 @@ void space_rebuild_recycle(struct space *s, struct cell *c) { } /** - * @brief Re-build the cell grid. + * @brief Re-build the top-level cell grid. * * @param s The #space. * @param cell_max Maximum cell edge length. * @param verbose Print messages to stdout or not. */ - void space_regrid(struct space *s, double cell_max, int verbose) { const size_t nr_parts = s->nr_parts; - struct cell *restrict c; - ticks tic = getticks(); + const ticks tic = getticks(); const int ti_current = (s->e != NULL) ? s->e->ti_current : 0; /* Run through the cells and get the current h_max. */ // tic = getticks(); float h_max = s->cell_min / kernel_gamma / space_stretch; if (nr_parts > 0) { - if (s->cells != NULL) { + if (s->cells_top != NULL) { for (int k = 0; k < s->nr_cells; k++) { - if (s->cells[k].nodeID == engine_rank && s->cells[k].h_max > h_max) { - h_max = s->cells[k].h_max; + if (s->cells_top[k].nodeID == engine_rank && + s->cells_top[k].h_max > h_max) { + h_max = s->cells_top[k].h_max; } } } else { @@ -197,10 +218,10 @@ void space_regrid(struct space *s, double cell_max, int verbose) { if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max); /* Get the new putative cell dimensions. */ - int cdim[3]; - for (int k = 0; k < 3; k++) - cdim[k] = - floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max)); + const int cdim[3] = { + floor(s->dim[0] / fmax(h_max * kernel_gamma * space_stretch, cell_max)), + floor(s->dim[1] / fmax(h_max * kernel_gamma * space_stretch, cell_max)), + floor(s->dim[2] / fmax(h_max * kernel_gamma * space_stretch, cell_max))}; /* Check if we have enough cells for periodicity. */ if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3)) @@ -239,7 +260,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { for (int j = 0; j < s->cdim[1]; j++) { for (int k = 0; k < s->cdim[2]; k++) { cid = cell_getid(oldcdim, i, j, k); - oldnodeIDs[cid] = s->cells[cid].nodeID; + oldnodeIDs[cid] = s->cells_top[cid].nodeID; } } } @@ -249,16 +270,16 @@ void space_regrid(struct space *s, double cell_max, int verbose) { /* Do we need to re-build the upper-level cells? */ // tic = getticks(); - if (s->cells == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || + if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) { /* Free the old cells, if they were allocated. */ - if (s->cells != NULL) { + if (s->cells_top != NULL) { for (int k = 0; k < s->nr_cells; k++) { - space_rebuild_recycle(s, &s->cells[k]); - if (s->cells[k].sort != NULL) free(s->cells[k].sort); + space_rebuild_recycle(s, &s->cells_top[k]); + if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort); } - free(s->cells); + free(s->cells_top); s->maxdepth = 0; } @@ -272,18 +293,19 @@ void space_regrid(struct space *s, double cell_max, int verbose) { /* Allocate the highest level of cells. */ s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2]; - if (posix_memalign((void *)&s->cells, cell_align, + if (posix_memalign((void *)&s->cells_top, cell_align, s->nr_cells * sizeof(struct cell)) != 0) error("Failed to allocate cells."); - bzero(s->cells, s->nr_cells * sizeof(struct cell)); + bzero(s->cells_top, s->nr_cells * sizeof(struct cell)); for (int k = 0; k < s->nr_cells; k++) - if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock."); + if (lock_init(&s->cells_top[k].lock) != 0) + error("Failed to init spinlock."); /* Set the cell location and sizes. */ for (int i = 0; i < cdim[0]; i++) for (int j = 0; j < cdim[1]; j++) for (int k = 0; k < cdim[2]; k++) { - c = &s->cells[cell_getid(cdim, i, j, k)]; + struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)]; c->loc[0] = i * s->width[0]; c->loc[1] = j * s->width[1]; c->loc[2] = k * s->width[2]; @@ -340,34 +362,35 @@ void space_regrid(struct space *s, double cell_max, int verbose) { free(oldnodeIDs); } #endif + + // message( "rebuilding upper-level cells took %.3f %s." , + // clocks_from_ticks(double)(getticks() - tic), clocks_getunit()); + } /* re-build upper-level cells? */ - // message( "rebuilding upper-level cells took %.3f %s." , - // clocks_from_ticks(double)(getticks() - tic), clocks_getunit()); - /* Otherwise, just clean up the cells. */ - else { + else { /* Otherwise, just clean up the cells. */ /* Free the old cells, if they were allocated. */ for (int k = 0; k < s->nr_cells; k++) { - space_rebuild_recycle(s, &s->cells[k]); - s->cells[k].sorts = NULL; - s->cells[k].nr_tasks = 0; - s->cells[k].nr_density = 0; - s->cells[k].nr_gradient = 0; - s->cells[k].nr_force = 0; - s->cells[k].density = NULL; - s->cells[k].gradient = NULL; - s->cells[k].force = NULL; - s->cells[k].dx_max = 0.0f; - s->cells[k].sorted = 0; - s->cells[k].count = 0; - s->cells[k].gcount = 0; - s->cells[k].init = NULL; - s->cells[k].extra_ghost = NULL; - s->cells[k].ghost = NULL; - s->cells[k].kick = NULL; - s->cells[k].super = &s->cells[k]; - s->cells[k].gsuper = &s->cells[k]; + space_rebuild_recycle(s, &s->cells_top[k]); + s->cells_top[k].sorts = NULL; + s->cells_top[k].nr_tasks = 0; + s->cells_top[k].nr_density = 0; + s->cells_top[k].nr_gradient = 0; + s->cells_top[k].nr_force = 0; + s->cells_top[k].density = NULL; + s->cells_top[k].gradient = NULL; + s->cells_top[k].force = NULL; + s->cells_top[k].dx_max = 0.0f; + s->cells_top[k].sorted = 0; + s->cells_top[k].count = 0; + s->cells_top[k].gcount = 0; + s->cells_top[k].init = NULL; + s->cells_top[k].extra_ghost = NULL; + s->cells_top[k].ghost = NULL; + s->cells_top[k].kick = NULL; + s->cells_top[k].super = &s->cells_top[k]; + s->cells_top[k].gsuper = &s->cells_top[k]; } s->maxdepth = 0; } @@ -385,7 +408,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) { * @param verbose Print messages to stdout or not * */ - void space_rebuild(struct space *s, double cell_max, int verbose) { const ticks tic = getticks(); @@ -398,7 +420,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { size_t nr_parts = s->nr_parts; size_t nr_gparts = s->nr_gparts; - struct cell *restrict cells = s->cells; + struct cell *restrict cells_top = s->cells_top; const int ti_current = (s->e != NULL) ? s->e->ti_current : 0; const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; @@ -420,7 +442,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { p->x[j] -= dim[j]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[ind[k]].count++; + cells_top[ind[k]].count++; } // message( "getting particle indices took %.3f %s." , // clocks_from_ticks(getticks() - tic), clocks_getunit()): @@ -440,9 +462,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { gp->x[j] -= dim[j]; gind[k] = cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); - cells[gind[k]].gcount++; + cells_top[gind[k]].gcount++; } -// message( "getting particle indices took %.3f %s." , +// message( "getting g-particle indices took %.3f %s." , // clocks_from_ticks(getticks() - tic), clocks_getunit()); #ifdef WITH_MPI @@ -450,8 +472,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Move non-local parts to the end of the list. */ const int local_nodeID = s->e->nodeID; for (size_t k = 0; k < nr_parts;) { - if (cells[ind[k]].nodeID != local_nodeID) { - cells[ind[k]].count -= 1; + if (cells_top[ind[k]].nodeID != local_nodeID) { + cells_top[ind[k]].count -= 1; nr_parts -= 1; const struct part tp = s->parts[k]; s->parts[k] = s->parts[nr_parts]; @@ -477,12 +499,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { #ifdef SWIFT_DEBUG_CHECKS /* Check that all parts are in the correct places. */ for (size_t k = 0; k < nr_parts; k++) { - if (cells[ind[k]].nodeID != local_nodeID) { + if (cells_top[ind[k]].nodeID != local_nodeID) { error("Failed to move all non-local parts to send list"); } } for (size_t k = nr_parts; k < s->nr_parts; k++) { - if (cells[ind[k]].nodeID == local_nodeID) { + if (cells_top[ind[k]].nodeID == local_nodeID) { error("Failed to remove local parts from send list"); } } @@ -490,8 +512,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Move non-local gparts to the end of the list. */ for (size_t k = 0; k < nr_gparts;) { - if (cells[gind[k]].nodeID != local_nodeID) { - cells[gind[k]].gcount -= 1; + if (cells_top[gind[k]].nodeID != local_nodeID) { + cells_top[gind[k]].gcount -= 1; nr_gparts -= 1; const struct gpart tp = s->gparts[k]; s->gparts[k] = s->gparts[nr_gparts]; @@ -515,12 +537,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { #ifdef SWIFT_DEBUG_CHECKS /* Check that all gparts are in the correct place (untested). */ for (size_t k = 0; k < nr_gparts; k++) { - if (cells[gind[k]].nodeID != local_nodeID) { + if (cells_top[gind[k]].nodeID != local_nodeID) { error("Failed to move all non-local gparts to send list"); } } for (size_t k = nr_gparts; k < s->nr_gparts; k++) { - if (cells[gind[k]].nodeID == local_nodeID) { + if (cells_top[gind[k]].nodeID == local_nodeID) { error("Failed to remove local gparts from send list"); } } @@ -552,11 +574,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { const struct part *const p = &s->parts[k]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[ind[k]].count += 1; + cells_top[ind[k]].count += 1; #ifdef SWIFT_DEBUG_CHECKS - if (cells[ind[k]].nodeID != local_nodeID) + if (cells_top[ind[k]].nodeID != local_nodeID) error("Received part that does not belong to me (nodeID=%i).", - cells[ind[k]].nodeID); + cells_top[ind[k]].nodeID); #endif } nr_parts = s->nr_parts; @@ -602,16 +624,19 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { const struct gpart *const p = &s->gparts[k]; gind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[gind[k]].gcount += 1; - /* if ( cells[ ind[k] ].nodeID != nodeID ) - error( "Received part that does not belong to me (nodeID=%i)." , cells[ - ind[k] ].nodeID ); */ + cells_top[gind[k]].gcount += 1; + +#ifdef SWIFT_DEBUG_CHECKS + if (cells_top[ind[k]].nodeID != s->e->nodeID) + error("Received part that does not belong to me (nodeID=%i).", + cells_top[ind[k]].nodeID); +#endif } nr_gparts = s->nr_gparts; #endif - /* Sort the parts according to their cells. */ + /* Sort the gparts according to their cells. */ space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose); /* Re-link the parts. */ @@ -638,7 +663,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { for (size_t k = 0; k < nr_parts; ++k) { if (s->parts[k].gpart != NULL && - s->parts[k].gpart->id_or_neg_offset != -k) { + s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) { error("Linking problem !"); } } @@ -650,7 +675,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { struct xpart *xfinger = s->xparts; struct gpart *gfinger = s->gparts; for (int k = 0; k < s->nr_cells; k++) { - struct cell *restrict c = &cells[k]; + struct cell *restrict c = &cells_top[k]; c->ti_old = ti_current; c->parts = finger; c->xparts = xfinger; @@ -664,7 +689,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ - space_split(s, cells, verbose); + space_split(s, cells_top, verbose); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -674,6 +699,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /** * @brief Split particles between cells of a hierarchy * + * This is done in parallel using threads in the #threadpool. + * * @param s The #space. * @param cells The cell hierarchy * @param verbose Are we talkative ? @@ -692,7 +719,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) { /** * @brief Sort the particles and condensed particles according to the given - *indices. + * indices. * * @param s The #space. * @param ind The indices with respect to which the parts are sorted. @@ -701,7 +728,6 @@ void space_split(struct space *s, struct cell *cells, int verbose) { * @param max highest index. * @param verbose Are we talkative ? */ - void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose) { @@ -736,9 +762,9 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ - for (int i = 1; i < N; i++) + for (size_t i = 1; i < N; i++) if (ind[i - 1] > ind[i]) - error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, + error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); message("Sorting succeeded."); #endif @@ -878,8 +904,7 @@ void space_parts_sort_mapper(void *map_data, int num_elements, } /** - * @brief Sort the g-particles and condensed particles according to the given - *indices. + * @brief Sort the g-particles according to the given indices. * * @param s The #space. * @param ind The indices with respect to which the gparts are sorted. @@ -921,9 +946,9 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ - for (int i = 1; i < N; i++) + for (size_t i = 1; i < N; i++) if (ind[i - 1] > ind[i]) - error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, + error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); message("Sorting succeeded."); #endif @@ -1061,7 +1086,6 @@ void space_gparts_sort_mapper(void *map_data, int num_elements, /** * @brief Mapping function to free the sorted indices buffers. */ - void space_map_clearsort(struct cell *c, void *data) { if (c->sort != NULL) { @@ -1077,21 +1101,17 @@ void space_map_clearsort(struct cell *c, void *data) { * @param fun Function pointer to apply on the cells. * @param data Data passed to the function fun. */ - static void rec_map_parts(struct cell *c, void (*fun)(struct part *p, struct cell *c, void *data), void *data) { - - int k; - /* No progeny? */ if (!c->split) - for (k = 0; k < c->count; k++) fun(&c->parts[k], c, data); + for (int k = 0; k < c->count; k++) fun(&c->parts[k], c, data); /* Otherwise, recurse. */ else - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) rec_map_parts(c->progeny[k], fun, data); } @@ -1102,16 +1122,13 @@ static void rec_map_parts(struct cell *c, * @param fun Function pointer to apply on the cells. * @param data Data passed to the function fun. */ - void space_map_parts(struct space *s, void (*fun)(struct part *p, struct cell *c, void *data), void *data) { - int cid = 0; - /* Call the recursive function on all higher-level cells. */ - for (cid = 0; cid < s->nr_cells; cid++) - rec_map_parts(&s->cells[cid], fun, data); + for (int cid = 0; cid < s->nr_cells; cid++) + rec_map_parts(&s->cells_top[cid], fun, data); } /** @@ -1120,20 +1137,17 @@ void space_map_parts(struct space *s, * @param c The #cell we are working in. * @param fun Function pointer to apply on the cells. */ - static void rec_map_parts_xparts(struct cell *c, void (*fun)(struct part *p, struct xpart *xp, struct cell *c)) { - int k; - /* No progeny? */ if (!c->split) - for (k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c); + for (int k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c); /* Otherwise, recurse. */ else - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) rec_map_parts_xparts(c->progeny[k], fun); } @@ -1143,16 +1157,13 @@ static void rec_map_parts_xparts(struct cell *c, * @param s The #space we are working in. * @param fun Function pointer to apply on the particles in the cells. */ - void space_map_parts_xparts(struct space *s, void (*fun)(struct part *p, struct xpart *xp, struct cell *c)) { - int cid = 0; - /* Call the recursive function on all higher-level cells. */ - for (cid = 0; cid < s->nr_cells; cid++) - rec_map_parts_xparts(&s->cells[cid], fun); + for (int cid = 0; cid < s->nr_cells; cid++) + rec_map_parts_xparts(&s->cells_top[cid], fun); } /** @@ -1163,16 +1174,12 @@ void space_map_parts_xparts(struct space *s, * @param fun Function pointer to apply on the cells. * @param data Data passed to the function fun. */ - static void rec_map_cells_post(struct cell *c, int full, void (*fun)(struct cell *c, void *data), void *data) { - - int k; - /* Recurse. */ if (c->split) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) rec_map_cells_post(c->progeny[k], full, fun, data); @@ -1188,29 +1195,24 @@ static void rec_map_cells_post(struct cell *c, int full, * @param fun Function pointer to apply on the cells. * @param data Data passed to the function fun. */ - void space_map_cells_post(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data) { - int cid = 0; - /* Call the recursive function on all higher-level cells. */ - for (cid = 0; cid < s->nr_cells; cid++) - rec_map_cells_post(&s->cells[cid], full, fun, data); + for (int cid = 0; cid < s->nr_cells; cid++) + rec_map_cells_post(&s->cells_top[cid], full, fun, data); } static void rec_map_cells_pre(struct cell *c, int full, void (*fun)(struct cell *c, void *data), void *data) { - int k; - /* No progeny? */ if (full || !c->split) fun(c, data); /* Recurse. */ if (c->split) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) rec_map_cells_pre(c->progeny[k], full, fun, data); } @@ -1226,28 +1228,29 @@ static void rec_map_cells_pre(struct cell *c, int full, void space_map_cells_pre(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data) { - int cid = 0; - /* Call the recursive function on all higher-level cells. */ - for (cid = 0; cid < s->nr_cells; cid++) - rec_map_cells_pre(&s->cells[cid], full, fun, data); + for (int cid = 0; cid < s->nr_cells; cid++) + rec_map_cells_pre(&s->cells_top[cid], full, fun, data); } /** * @brief #threadpool mapper function to split cells if they contain * too many particles. + * + * @param map_data Pointer towards the top-cells. + * @param num_elements The number of cells to treat. + * @param extra_data Pointers to the #space. */ - void space_split_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the inputs. */ struct space *s = (struct space *)extra_data; - struct cell *cells = (struct cell *)map_data; + struct cell *restrict cells_top = (struct cell *)map_data; struct engine *e = s->e; for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &cells[ind]; + struct cell *c = &cells_top[ind]; const int count = c->count; const int gcount = c->gcount; @@ -1367,12 +1370,11 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) { } /** - * @brief Return a used cell to the cell buffer. + * @brief Return a used cell to the buffer od unused sub-cells. * * @param s The #space. * @param c The #cell. */ - void space_recycle(struct space *s, struct cell *c) { /* Lock the space. */ @@ -1388,8 +1390,8 @@ void space_recycle(struct space *s, struct cell *c) { bzero(c, sizeof(struct cell)); /* Hook this cell into the buffer. */ - c->next = s->cells_new; - s->cells_new = c; + c->next = s->cells_sub; + s->cells_sub = c; s->tot_cells -= 1; /* Unlock the space. */ @@ -1397,39 +1399,42 @@ void space_recycle(struct space *s, struct cell *c) { } /** - * @brief Get a new empty cell. + * @brief Get a new empty (sub-)#cell. + * + * If there are cells in the buffer, use the one at the end of the linked list. + * If we have no cells, allocate a new chunk of memory and pick one from there. * * @param s The #space. */ - struct cell *space_getcell(struct space *s) { - struct cell *c; - int k; - /* Lock the space. */ lock_lock(&s->lock); /* Is the buffer empty? */ - if (s->cells_new == NULL) { - if (posix_memalign((void *)&s->cells_new, cell_align, + if (s->cells_sub == NULL) { + if (posix_memalign((void *)&s->cells_sub, cell_align, space_cellallocchunk * sizeof(struct cell)) != 0) error("Failed to allocate more cells."); - bzero(s->cells_new, space_cellallocchunk * sizeof(struct cell)); - for (k = 0; k < space_cellallocchunk - 1; k++) - s->cells_new[k].next = &s->cells_new[k + 1]; - s->cells_new[space_cellallocchunk - 1].next = NULL; + + /* Zero everything for good measure */ + bzero(s->cells_sub, space_cellallocchunk * sizeof(struct cell)); + + /* Constructed a linked list */ + for (int k = 0; k < space_cellallocchunk - 1; k++) + s->cells_sub[k].next = &s->cells_sub[k + 1]; + s->cells_sub[space_cellallocchunk - 1].next = NULL; } /* Pick off the next cell. */ - c = s->cells_new; - s->cells_new = c->next; + struct cell *c = s->cells_sub; + s->cells_sub = c->next; s->tot_cells += 1; /* Unlock the space. */ lock_unlock_blind(&s->lock); - /* Init some things in the cell. */ + /* Init some things in the cell we just got. */ bzero(c, sizeof(struct cell)); c->nodeID = -1; if (lock_init(&c->lock) != 0 || lock_init(&c->glock) != 0) @@ -1511,7 +1516,6 @@ void space_init_gparts(struct space *s) { * parts with a cutoff below half the cell width are then split * recursively. */ - void space_init(struct space *s, const struct swift_params *params, double dim[3], struct part *parts, struct gpart *gparts, size_t Npart, size_t Ngpart, int periodic, int gravity, @@ -1534,7 +1538,6 @@ void space_init(struct space *s, const struct swift_params *params, s->gparts = gparts; s->cell_min = parser_get_param_double(params, "SPH:max_smoothing_length"); s->nr_queues = 1; /* Temporary value until engine construction */ - s->size_parts_foreign = 0; /* Get the constants for the scheduler */ space_maxsize = parser_get_opt_param_int(params, "Scheduler:cell_max_size", @@ -1651,8 +1654,8 @@ void space_link_cleanup(struct space *s) { */ void space_clean(struct space *s) { - for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells[i]); - free(s->cells); + for (int i = 0; i < s->nr_cells; ++i) cell_clean(&s->cells_top[i]); + free(s->cells_top); free(s->parts); free(s->xparts); free(s->gparts); diff --git a/src/space.h b/src/space.h index 90313be8dbe817d65fbd0e6a8c30c156747594b1..66d82c6f78a08447851a8dfdaea1231e5778693b 100644 --- a/src/space.h +++ b/src/space.h @@ -37,7 +37,6 @@ #include "space.h" /* Some constants. */ -#define space_maxdepth 10 #define space_cellallocchunk 1000 #define space_splitsize_default 400 #define space_maxsize_default 8000000 @@ -53,84 +52,85 @@ extern int space_subsize; /* Map shift vector to sortlist. */ extern const int sortlistID[27]; -/* Entry in a list of sorted indices. */ -struct entry { - float d; - int i; -}; - -/* The space in which the cells reside. */ +/** + * @brief The space in which the cells and particles reside. + */ struct space { - /* Spatial extent. */ + /*! Spatial extent. */ double dim[3]; - /* Cell widths. */ - double width[3], iwidth[3]; + /*! Is the space periodic? */ + int periodic; + + /*! Are we doing gravity? */ + int gravity; + + /*! Width of the top-level cells. */ + double width[3]; + + /*! Inverse of the top-level cell width */ + double iwidth[3]; - /* The minimum cell width. */ + /*! The minimum top-level cell width allowed. */ double cell_min; - /* Current maximum displacement for particles. */ + /*! Current maximum displacement for particles. */ float dx_max; - /* Number of cells. */ - int nr_cells, tot_cells; + /*! Space dimensions in number of top-cells. */ + int cdim[3]; - /* Space dimensions in number of cells. */ - int maxdepth, cdim[3]; + /*! Maximal depth reached by the tree */ + int maxdepth; - /* The (level 0) cells themselves. */ - struct cell *cells; + /*! Number of top-level cells. */ + int nr_cells; - /* Buffer of unused cells. */ - struct cell *cells_new; + /*! Total number of cells (top- and sub-) */ + int tot_cells; - /* The particle data (cells have pointers to this). */ - struct part *parts; - struct xpart *xparts; - struct gpart *gparts; + /*! The (level 0) cells themselves. */ + struct cell *cells_top; - /* The total number of parts in the space. */ + /*! Buffer of unused cells for the sub-cells. */ + struct cell *cells_sub; + + /*! The total number of parts in the space. */ size_t nr_parts, size_parts; + + /*! The total number of g-parts in the space. */ size_t nr_gparts, size_gparts; - /* Is the space periodic? */ - int periodic; + /*! The particle data (cells have pointers to this). */ + struct part *parts; - /* Are we doing gravity? */ - int gravity; + /*! The extended particle data (cells have pointers to this). */ + struct xpart *xparts; - /* General-purpose lock for this space. */ + /*! The g-particle data (cells have pointers to this). */ + struct gpart *gparts; + + /*! General-purpose lock for this space. */ swift_lock_type lock; - /* Number of queues in the system. */ + /*! Number of queues in the system. */ int nr_queues; - /* The associated engine. */ + /*! The associated engine. */ struct engine *e; - /* Buffers for parts that we will receive from foreign cells. */ +#ifdef WITH_MPI + + /*! Buffers for parts that we will receive from foreign cells. */ struct part *parts_foreign; size_t nr_parts_foreign, size_parts_foreign; + + /*! Buffers for g-parts that we will receive from foreign cells. */ struct gpart *gparts_foreign; size_t nr_gparts_foreign, size_gparts_foreign; -}; -/* Interval stack necessary for parallel particle sorting. */ -struct qstack { - volatile ptrdiff_t i, j; - volatile int min, max; - volatile int ready; -}; -struct parallel_sort { - struct part *parts; - struct gpart *gparts; - struct xpart *xparts; - int *ind; - struct qstack *stack; - unsigned int stack_size; - volatile unsigned int first, last, waiting; +#endif }; /* function prototypes. */