diff --git a/examples/process_cells b/examples/process_cells index b57ed9e73c76807707d158485e31a5b643d8dec0..a18148f0890285d1aebd72e5f13f6c327b202619 100755 --- a/examples/process_cells +++ b/examples/process_cells @@ -38,7 +38,7 @@ ranks=$(ls -v cells_*.dat | sed 's,cells_\(.*\)_.*.dat,\1,' | sort | uniq | wc - echo "Number of ranks = $ranks" # Now construct a list of files ordered by rank, not step. -files=$(ls cells_*.dat | sort -t "_" -k 3,3 -n | xargs -n 4) +files=$(ls cells_*.dat | sort -t "_" -k 3,3 -n | xargs -n $ranks) # Need number of steps. nfiles=$(echo $files| wc -w) @@ -48,10 +48,10 @@ echo "Number of steps = $steps" # And process them, echo "Processing cell dumps files..." -echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper $NX $NY $NZ \$0 \$1 \$2 \$3" +echo $files | xargs -P $NPROCS -n $ranks ${SCRIPTHOME}/process_cells_helper $NX $NY $NZ # Create summary. -grep "top cells" step*-active-cells.dat | sort -h > active_cells.log +grep "top cells" step*-active-cells.dat | sort -n -k 1.5 > active_cells.log # And plot of active cells to edge cells. stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=0 ymax=$steps grid=1 \ diff --git a/src/cell.c b/src/cell.c index a1f57edcca3ff1fb42a93cdf6e222d48a00797ac..118cf656cf7defd6f987b666ea8f65571c023d9a 100644 --- a/src/cell.c +++ b/src/cell.c @@ -835,11 +835,15 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, memswap(&parts[j], &part, sizeof(struct part)); memswap(&xparts[j], &xpart, sizeof(struct xpart)); memswap(&buff[j], &temp_buff, sizeof(struct cell_buff)); + if (parts[j].gpart) + parts[j].gpart->id_or_neg_offset = -(j + parts_offset); bid = temp_buff.ind; } parts[k] = part; xparts[k] = xpart; buff[k] = temp_buff; + if (parts[k].gpart) + parts[k].gpart->id_or_neg_offset = -(k + parts_offset); } bucket_count[bid]++; } @@ -852,10 +856,6 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, c->progeny[k]->xparts = &c->xparts[bucket_offset[k]]; } - /* Re-link the gparts. */ - if (count > 0 && gcount > 0) - part_relink_gparts_to_parts(parts, count, parts_offset); - #ifdef SWIFT_DEBUG_CHECKS /* Check that the buffs are OK. */ for (int k = 1; k < count; k++) { @@ -952,10 +952,14 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, } memswap(&sparts[j], &spart, sizeof(struct spart)); memswap(&sbuff[j], &temp_buff, sizeof(struct cell_buff)); + if (sparts[j].gpart) + sparts[j].gpart->id_or_neg_offset = -(j + sparts_offset); bid = temp_buff.ind; } sparts[k] = spart; sbuff[k] = temp_buff; + if (sparts[k].gpart) + sparts[k].gpart->id_or_neg_offset = -(k + sparts_offset); } bucket_count[bid]++; } @@ -967,10 +971,6 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, c->progeny[k]->sparts = &c->sparts[bucket_offset[k]]; } - /* Re-link the gparts. */ - if (scount > 0 && gcount > 0) - part_relink_gparts_to_sparts(sparts, scount, sparts_offset); - /* Finally, do the same song and dance for the gparts. */ for (int k = 0; k < 8; k++) bucket_count[k] = 0; @@ -1005,10 +1005,23 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, } memswap(&gparts[j], &gpart, sizeof(struct gpart)); memswap(&gbuff[j], &temp_buff, sizeof(struct cell_buff)); + if (gparts[j].type == swift_type_gas) { + parts[-gparts[j].id_or_neg_offset - parts_offset].gpart = + &gparts[j]; + } else if (gparts[j].type == swift_type_star) { + sparts[-gparts[j].id_or_neg_offset - sparts_offset].gpart = + &gparts[j]; + } bid = temp_buff.ind; } gparts[k] = gpart; gbuff[k] = temp_buff; + if (gparts[k].type == swift_type_gas) { + parts[-gparts[k].id_or_neg_offset - parts_offset].gpart = &gparts[k]; + } else if (gparts[k].type == swift_type_star) { + sparts[-gparts[k].id_or_neg_offset - sparts_offset].gpart = + &gparts[k]; + } } bucket_count[bid]++; } @@ -1019,14 +1032,6 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, c->progeny[k]->gcount = bucket_count[k]; c->progeny[k]->gparts = &c->gparts[bucket_offset[k]]; } - - /* Re-link the parts. */ - if (count > 0 && gcount > 0) - part_relink_parts_to_gparts(gparts, gcount, parts - parts_offset); - - /* Re-link the sparts. */ - if (scount > 0 && gcount > 0) - part_relink_sparts_to_gparts(gparts, gcount, sparts - sparts_offset); } /** diff --git a/src/debug.c b/src/debug.c index b1e2cb08bc7fa99330da3d9c9382dbef81b3215a..25301ea5caefe69b7daef2c048b2e4b68d04c289 100644 --- a/src/debug.c +++ b/src/debug.c @@ -326,8 +326,7 @@ static void dumpCells_map(struct cell *c, void *data) { /* So output local super cells that are active and have MPI * tasks as requested. */ - if (c->nodeID == e->nodeID && (!super || (super && c->super == c)) && - active && mpiactive) { + if (c->nodeID == e->nodeID && (!super ||((super && c->super == c) || (c->parent == NULL))) && active && mpiactive) { /* If requested we work out how many particles are active in this cell. */ int pactcount = 0; diff --git a/src/engine.c b/src/engine.c index d74a44b5cf041ea9c4f400e077bcfd057f45e0b7..f5d8cca878de2088dc1a7be322d2a64fd7e0122a 100644 --- a/src/engine.c +++ b/src/engine.c @@ -565,7 +565,8 @@ void engine_redistribute(struct engine *e) { /* Sort the particles according to their cell index. */ if (s->nr_parts > 0) - space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose); + space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes], + nr_nodes, 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the part have been sorted correctly. */ @@ -656,7 +657,8 @@ void engine_redistribute(struct engine *e) { /* Sort the particles according to their cell index. */ if (s->nr_sparts > 0) - space_sparts_sort(s, s_dest, s->nr_sparts, 0, nr_nodes - 1, e->verbose); + space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes, + 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the spart have been sorted correctly. */ @@ -748,7 +750,8 @@ void engine_redistribute(struct engine *e) { /* Sort the gparticles according to their cell index. */ if (s->nr_gparts > 0) - space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose); + space_gparts_sort(s->gparts, s->parts, s->sparts, g_dest, + &g_counts[nodeID * nr_nodes], nr_nodes); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the gpart have been sorted correctly. */ @@ -3678,6 +3681,11 @@ void engine_rebuild(struct engine *e, int clean_smoothing_length_values) { /* Re-build the space. */ space_rebuild(e->s, e->verbose); +#ifdef SWIFT_DEBUG_CHECKS + part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts, + e->s->nr_gparts, e->s->nr_sparts, e->verbose); +#endif + /* Initial cleaning up session ? */ if (clean_smoothing_length_values) space_sanitize(e->s); @@ -4151,7 +4159,7 @@ void engine_first_init_particles(struct engine *e) { const ticks tic = getticks(); - /* Set the particles in a state where they are ready for a run */ + /* Set the particles in a state where they are ready for a run. */ space_first_init_parts(e->s, e->chemistry, e->cooling_func); space_first_init_gparts(e->s, e->gravity_properties); space_first_init_sparts(e->s); diff --git a/src/part.c b/src/part.c index c43ffa4820504282b4fb02e63b5cfc4196f3be77..1b696a8cbc135fd2c128b5ad705a0e6e24a2d5c8 100644 --- a/src/part.c +++ b/src/part.c @@ -94,6 +94,26 @@ void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, } } +/** + * @brief Re-link both the #part%s and #spart%s associated with the list of + * #gpart%s. + * + * @param gparts The list of #gpart. + * @param N The number of particles to re-link; + * @param parts The global #part array in which to find the #gpart offsets. + * @param sparts The global #spart array in which to find the #gpart offsets. + */ +void part_relink_all_parts_to_gparts(struct gpart *gparts, size_t N, + struct part *parts, struct spart *sparts) { + for (size_t k = 0; k < N; k++) { + if (gparts[k].type == swift_type_gas) { + parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; + } else if (gparts[k].type == swift_type_star) { + sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; + } + } +} + /** * @brief Verifies that the #gpart, #part and #spart are correctly linked * together @@ -128,19 +148,19 @@ void part_verify_links(struct part *parts, struct gpart *gparts, /* Check that it is linked */ if (gparts[k].id_or_neg_offset > 0) - error("Gas gpart not linked to anything !"); + error("Gas gpart not linked to anything!"); /* Find its link */ const struct part *part = &parts[-gparts[k].id_or_neg_offset]; /* Check the reverse link */ - if (part->gpart != &gparts[k]) error("Linking problem !"); + if (part->gpart != &gparts[k]) error("Linking problem!"); /* Check that the particles are at the same place */ if (gparts[k].x[0] != part->x[0] || gparts[k].x[1] != part->x[1] || gparts[k].x[2] != part->x[2]) error( - "Linked particles are not at the same position !\n" + "Linked particles are not at the same position!\n" "gp->x=[%e %e %e] p->x=[%e %e %e] diff=[%e %e %e]", gparts[k].x[0], gparts[k].x[1], gparts[k].x[2], part->x[0], part->x[1], part->x[2], gparts[k].x[0] - part->x[0], diff --git a/src/part.h b/src/part.h index 1b40aee0db3deb4790e07e3da9807060900d0c55..3e9ab5180e7cbfb4622b4a05d396a7f785f08b0b 100644 --- a/src/part.h +++ b/src/part.h @@ -80,6 +80,8 @@ void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, struct part *parts); void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, struct spart *sparts); +void part_relink_all_parts_to_gparts(struct gpart *gparts, size_t N, + struct part *parts, struct spart *sparts); void part_verify_links(struct part *parts, struct gpart *gparts, struct spart *sparts, size_t nr_parts, size_t nr_gparts, size_t nr_sparts, int verbose); diff --git a/src/space.c b/src/space.c index b1d25fb85fd8d412d76a6900d136966b58a44552..b6aef6ea7db1b7bfe0ecbe6a39157f9be5994b50 100644 --- a/src/space.c +++ b/src/space.c @@ -101,6 +101,7 @@ struct index_data { struct space *s; struct cell *cells; int *ind; + int *cell_counts; }; /** @@ -580,26 +581,33 @@ void space_rebuild(struct space *s, int verbose) { an index that is larger than the number of particles to avoid re-allocating after shuffling. */ const size_t ind_size = s->size_parts + 100; - int *ind; - if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) - error("Failed to allocate temporary particle indices."); - if (s->size_parts > 0) space_parts_get_cell_index(s, ind, cells_top, verbose); + int *ind = (int *)malloc(sizeof(int) * ind_size); + if (ind == NULL) error("Failed to allocate temporary particle indices."); + int *cell_part_counts = (int *)calloc(sizeof(int), s->nr_cells); + if (cell_part_counts == NULL) + error("Failed to allocate cell part count buffer."); + if (s->size_parts > 0) + space_parts_get_cell_index(s, ind, cell_part_counts, cells_top, verbose); /* Run through the gravity particles and get their cell index. */ const size_t gind_size = s->size_gparts + 100; - int *gind; - if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL) - error("Failed to allocate temporary g-particle indices."); + int *gind = (int *)malloc(sizeof(int) * gind_size); + if (gind == NULL) error("Failed to allocate temporary g-particle indices."); + int *cell_gpart_counts = (int *)calloc(sizeof(int), s->nr_cells); + if (cell_gpart_counts == NULL) + error("Failed to allocate cell gpart count buffer."); if (s->size_gparts > 0) - space_gparts_get_cell_index(s, gind, cells_top, verbose); + space_gparts_get_cell_index(s, gind, cell_gpart_counts, cells_top, verbose); /* Run through the star particles and get their cell index. */ const size_t sind_size = s->size_sparts + 100; - int *sind; - if ((sind = (int *)malloc(sizeof(int) * sind_size)) == NULL) - error("Failed to allocate temporary s-particle indices."); + int *sind = (int *)malloc(sizeof(int) * sind_size); + if (sind == NULL) error("Failed to allocate temporary s-particle indices."); + int *cell_spart_counts = (int *)calloc(sizeof(int), s->nr_cells); + if (cell_spart_counts == NULL) + error("Failed to allocate cell gpart count buffer."); if (s->size_sparts > 0) - space_sparts_get_cell_index(s, sind, cells_top, verbose); + space_sparts_get_cell_index(s, sind, cell_spart_counts, cells_top, verbose); #ifdef WITH_MPI const int local_nodeID = s->e->nodeID; @@ -609,9 +617,7 @@ void space_rebuild(struct space *s, int verbose) { if (cells_top[ind[k]].nodeID != local_nodeID) { nr_parts -= 1; /* Swap the particle */ - const struct part tp = s->parts[k]; - s->parts[k] = s->parts[nr_parts]; - s->parts[nr_parts] = tp; + memswap(&s->parts[k], &s->parts[nr_parts], sizeof(struct part)); /* Swap the link with the gpart */ if (s->parts[k].gpart != NULL) { s->parts[k].gpart->id_or_neg_offset = -k; @@ -620,13 +626,9 @@ void space_rebuild(struct space *s, int verbose) { s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; } /* Swap the xpart */ - const struct xpart txp = s->xparts[k]; - s->xparts[k] = s->xparts[nr_parts]; - s->xparts[nr_parts] = txp; + memswap(&s->xparts[k], &s->xparts[nr_parts], sizeof(struct xpart)); /* Swap the index */ - const int t = ind[k]; - ind[k] = ind[nr_parts]; - ind[nr_parts] = t; + memswap(&ind[k], &ind[nr_parts], sizeof(int)); } else { /* Increment when not exchanging otherwise we need to retest "k".*/ k++; @@ -652,9 +654,7 @@ void space_rebuild(struct space *s, int verbose) { if (cells_top[sind[k]].nodeID != local_nodeID) { nr_sparts -= 1; /* Swap the particle */ - const struct spart tp = s->sparts[k]; - s->sparts[k] = s->sparts[nr_sparts]; - s->sparts[nr_sparts] = tp; + memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart)); /* Swap the link with the gpart */ if (s->sparts[k].gpart != NULL) { s->sparts[k].gpart->id_or_neg_offset = -k; @@ -663,9 +663,7 @@ void space_rebuild(struct space *s, int verbose) { s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts; } /* Swap the index */ - const int t = sind[k]; - sind[k] = sind[nr_sparts]; - sind[nr_sparts] = t; + memswap(&sind[k], &sind[nr_sparts], sizeof(int)); } else { /* Increment when not exchanging otherwise we need to retest "k".*/ k++; @@ -691,9 +689,7 @@ void space_rebuild(struct space *s, int verbose) { if (cells_top[gind[k]].nodeID != local_nodeID) { nr_gparts -= 1; /* Swap the particle */ - const struct gpart tp = s->gparts[k]; - s->gparts[k] = s->gparts[nr_gparts]; - s->gparts[nr_gparts] = tp; + memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart)); /* Swap the link with part/spart */ if (s->gparts[k].type == swift_type_gas) { s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; @@ -708,9 +704,7 @@ void space_rebuild(struct space *s, int verbose) { &s->gparts[nr_gparts]; } /* Swap the index */ - const int t = gind[k]; - gind[k] = gind[nr_gparts]; - gind[nr_gparts] = t; + memswap(&gind[k], &gind[nr_gparts], sizeof(int)); } else { /* Increment when not exchanging otherwise we need to retest "k".*/ k++; @@ -745,6 +739,15 @@ void space_rebuild(struct space *s, int verbose) { s->nr_gparts = nr_gparts + nr_gparts_exchanged; s->nr_sparts = nr_sparts + nr_sparts_exchanged; + /* Clear non-local cell counts. */ + for (int k = 0; k < s->nr_cells; k++) { + if (s->cells_top[k].nodeID != local_nodeID) { + cell_part_counts[k] = 0; + cell_spart_counts[k] = 0; + cell_gpart_counts[k] = 0; + } + } + /* Re-allocate the index array for the parts if needed.. */ if (s->nr_parts + 1 > ind_size) { int *ind_new; @@ -773,6 +776,7 @@ void space_rebuild(struct space *s, int verbose) { const struct part *const p = &s->parts[k]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); + cell_part_counts[ind[k]]++; #ifdef SWIFT_DEBUG_CHECKS if (cells_top[ind[k]].nodeID != local_nodeID) error("Received part that does not belong to me (nodeID=%i).", @@ -786,6 +790,7 @@ void space_rebuild(struct space *s, int verbose) { const struct spart *const sp = &s->sparts[k]; sind[k] = cell_getid(cdim, sp->x[0] * ih[0], sp->x[1] * ih[1], sp->x[2] * ih[2]); + cell_spart_counts[sind[k]]++; #ifdef SWIFT_DEBUG_CHECKS if (cells_top[sind[k]].nodeID != local_nodeID) error("Received s-part that does not belong to me (nodeID=%i).", @@ -794,11 +799,12 @@ void space_rebuild(struct space *s, int verbose) { } nr_sparts = s->nr_sparts; -#endif /* WITH_MPI */ +#endif // WITH_MPI /* Sort the parts according to their cells. */ if (nr_parts > 0) - space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose); + space_parts_sort(s->parts, s->xparts, ind, cell_part_counts, s->nr_cells, + 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the part have been sorted correctly. */ @@ -825,7 +831,7 @@ void space_rebuild(struct space *s, int verbose) { /* Sort the sparts according to their cells. */ if (nr_sparts > 0) - space_sparts_sort(s, sind, nr_sparts, 0, s->nr_cells - 1, verbose); + space_sparts_sort(s->sparts, sind, cell_spart_counts, s->nr_cells, 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the spart have been sorted correctly. */ @@ -850,12 +856,6 @@ void space_rebuild(struct space *s, int verbose) { } #endif - /* Re-link the gparts to their (s-)particles. */ - if (nr_parts > 0 && nr_gparts > 0) - part_relink_gparts_to_parts(s->parts, nr_parts, 0); - if (nr_sparts > 0 && nr_gparts > 0) - part_relink_gparts_to_sparts(s->sparts, nr_sparts, 0); - /* Extract the cell counts from the sorted indices. */ size_t last_index = 0; ind[nr_parts] = s->nr_cells; // sentinel. @@ -878,7 +878,9 @@ void space_rebuild(struct space *s, int verbose) { /* We no longer need the indices as of here. */ free(ind); + free(cell_part_counts); free(sind); + free(cell_spart_counts); #ifdef WITH_MPI @@ -897,7 +899,7 @@ void space_rebuild(struct space *s, int verbose) { const struct gpart *const p = &s->gparts[k]; gind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - + cell_gpart_counts[gind[k]]++; #ifdef SWIFT_DEBUG_CHECKS if (cells_top[gind[k]].nodeID != s->e->nodeID) error("Received g-part that does not belong to me (nodeID=%i).", @@ -906,11 +908,12 @@ void space_rebuild(struct space *s, int verbose) { } nr_gparts = s->nr_gparts; -#endif /* WITH_MPI */ +#endif // WITH_MPI /* Sort the gparts according to their cells. */ if (nr_gparts > 0) - space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose); + space_gparts_sort(s->gparts, s->parts, s->sparts, gind, cell_gpart_counts, + s->nr_cells); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the gpart have been sorted correctly. */ @@ -935,14 +938,6 @@ void space_rebuild(struct space *s, int verbose) { } #endif - /* Re-link the parts. */ - if (nr_parts > 0 && nr_gparts > 0) - part_relink_parts_to_gparts(s->gparts, nr_gparts, s->parts); - - /* Re-link the sparts. */ - if (nr_sparts > 0 && nr_gparts > 0) - part_relink_sparts_to_gparts(s->gparts, nr_gparts, s->sparts); - /* Extract the cell counts from the sorted indices. */ size_t last_gindex = 0; gind[nr_gparts] = s->nr_cells; @@ -955,6 +950,7 @@ void space_rebuild(struct space *s, int verbose) { /* We no longer need the indices as of here. */ free(gind); + free(cell_gpart_counts); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the links are correct */ @@ -999,6 +995,9 @@ void space_rebuild(struct space *s, int verbose) { cell_check_multipole(&s->cells_top[k], NULL); #endif + /* Clean up any stray sort indices in the cell buffer. */ + space_free_buff_sort_indices(s); + if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1082,6 +1081,12 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, const double ih_y = s->iwidth[1]; const double ih_z = s->iwidth[2]; + /* Init the local count buffer. */ + int *cell_counts = (int *)calloc(sizeof(int), s->nr_cells); + if (cell_counts == NULL) + error("Failed to allocate temporary cell count buffer."); + + /* Loop over the parts. */ for (int k = 0; k < nr_parts; k++) { /* Get the particle */ @@ -1100,6 +1105,7 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, const int index = cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); ind[k] = index; + cell_counts[index]++; #ifdef SWIFT_DEBUG_CHECKS if (index < 0 || index >= cdim[0] * cdim[1] * cdim[2]) @@ -1117,6 +1123,11 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, p->x[1] = pos_y; p->x[2] = pos_z; } + + /* Write the counts back to the global array. */ + for (int k = 0; k < s->nr_cells; k++) + if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); + free(cell_counts); } /** @@ -1144,6 +1155,11 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, const double ih_y = s->iwidth[1]; const double ih_z = s->iwidth[2]; + /* Init the local count buffer. */ + int *cell_counts = (int *)calloc(sizeof(int), s->nr_cells); + if (cell_counts == NULL) + error("Failed to allocate temporary cell count buffer."); + for (int k = 0; k < nr_gparts; k++) { /* Get the particle */ @@ -1162,6 +1178,7 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, const int index = cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); ind[k] = index; + cell_counts[index]++; #ifdef SWIFT_DEBUG_CHECKS if (index < 0 || index >= cdim[0] * cdim[1] * cdim[2]) @@ -1179,6 +1196,11 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, gp->x[1] = pos_y; gp->x[2] = pos_z; } + + /* Write the counts back to the global array. */ + for (int k = 0; k < s->nr_cells; k++) + if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); + free(cell_counts); } /** @@ -1206,6 +1228,11 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, const double ih_y = s->iwidth[1]; const double ih_z = s->iwidth[2]; + /* Init the local count buffer. */ + int *cell_counts = (int *)calloc(sizeof(int), s->nr_cells); + if (cell_counts == NULL) + error("Failed to allocate temporary cell count buffer."); + for (int k = 0; k < nr_sparts; k++) { /* Get the particle */ @@ -1224,6 +1251,7 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, const int index = cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); ind[k] = index; + cell_counts[index]++; #ifdef SWIFT_DEBUG_CHECKS if (index < 0 || index >= cdim[0] * cdim[1] * cdim[2]) @@ -1241,6 +1269,11 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, sp->x[1] = pos_y; sp->x[2] = pos_z; } + + /* Write the counts back to the global array. */ + for (int k = 0; k < s->nr_cells; k++) + if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); + free(cell_counts); } /** @@ -1248,11 +1281,12 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, * * @param s The #space. * @param ind The array of indices to fill. + * @param cell_counts The cell counters to update. * @param cells The array of #cell to update. * @param verbose Are we talkative ? */ -void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells, - int verbose) { +void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, + struct cell *cells, int verbose) { const ticks tic = getticks(); @@ -1261,6 +1295,7 @@ void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells, data.s = s; data.cells = cells; data.ind = ind; + data.cell_counts = cell_counts; threadpool_map(&s->e->threadpool, space_parts_get_cell_index_mapper, s->parts, s->nr_parts, sizeof(struct part), 0, &data); @@ -1275,11 +1310,12 @@ void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells, * * @param s The #space. * @param gind The array of indices to fill. + * @param cell_counts The cell counters to update. * @param cells The array of #cell to update. * @param verbose Are we talkative ? */ -void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, - int verbose) { +void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts, + struct cell *cells, int verbose) { const ticks tic = getticks(); @@ -1288,6 +1324,7 @@ void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, data.s = s; data.cells = cells; data.ind = gind; + data.cell_counts = cell_counts; threadpool_map(&s->e->threadpool, space_gparts_get_cell_index_mapper, s->gparts, s->nr_gparts, sizeof(struct gpart), 0, &data); @@ -1302,11 +1339,12 @@ void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, * * @param s The #space. * @param sind The array of indices to fill. + * @param cell_counts The cell counters to update. * @param cells The array of #cell to update. * @param verbose Are we talkative ? */ -void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells, - int verbose) { +void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, + struct cell *cells, int verbose) { const ticks tic = getticks(); @@ -1315,6 +1353,7 @@ void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells, data.s = s; data.cells = cells; data.ind = sind; + data.cell_counts = cell_counts; threadpool_map(&s->e->threadpool, space_sparts_get_cell_index_mapper, s->sparts, s->nr_sparts, sizeof(struct spart), 0, &data); @@ -1328,551 +1367,176 @@ void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells, * @brief Sort the particles and condensed particles according to the given * indices. * - * @param s The #space. + * @param parts The array of #part to sort. + * @param xparts The corresponding #xpart array to sort as well. * @param ind The indices with respect to which the parts are sorted. - * @param N The number of parts - * @param min Lowest index. - * @param max highest index. - * @param verbose Are we talkative ? + * @param counts Number of particles per index. + * @param num_bins Total number of bins (length of count). + * @param parts_offset Offset of the #part array from the global #part array. */ -void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, - int verbose) { - - const ticks tic = getticks(); - - /* Populate a parallel_sort structure with the input data */ - struct parallel_sort sort_struct; - sort_struct.parts = s->parts; - sort_struct.xparts = s->xparts; - sort_struct.ind = ind; - sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; - if ((sort_struct.stack = (struct qstack *)malloc( - sizeof(struct qstack) * sort_struct.stack_size)) == NULL) - error("Failed to allocate sorting stack."); - for (unsigned int i = 0; i < sort_struct.stack_size; i++) - sort_struct.stack[i].ready = 0; - - /* Add the first interval. */ - sort_struct.stack[0].i = 0; - sort_struct.stack[0].j = N - 1; - sort_struct.stack[0].min = min; - sort_struct.stack[0].max = max; - sort_struct.stack[0].ready = 1; - sort_struct.first = 0; - sort_struct.last = 1; - sort_struct.waiting = 1; - - /* Launch the sorting tasks with a stride of zero such that the same - map data is passed to each thread. */ - threadpool_map(&s->e->threadpool, space_parts_sort_mapper, &sort_struct, - s->e->threadpool.num_threads, 0, 1, NULL); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify space_sort_struct. */ - for (size_t i = 1; i < N; i++) - if (ind[i - 1] > ind[i]) - error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, - ind[i - 1], i, ind[i], min, max); - if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); -#endif - - /* Clean up. */ - free(sort_struct.stack); - - if (verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - -void space_parts_sort_mapper(void *map_data, int num_elements, - void *extra_data) { - - /* Unpack the mapping data. */ - struct parallel_sort *sort_struct = (struct parallel_sort *)map_data; - - /* Pointers to the sorting data. */ - int *ind = sort_struct->ind; - struct part *parts = sort_struct->parts; - struct xpart *xparts = sort_struct->xparts; - - /* Main loop. */ - while (sort_struct->waiting) { - - /* Grab an interval off the queue. */ - int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size; - - /* Wait for the entry to be ready, or for the sorting do be done. */ - while (!sort_struct->stack[qid].ready) - if (!sort_struct->waiting) return; - - /* Get the stack entry. */ - ptrdiff_t i = sort_struct->stack[qid].i; - ptrdiff_t j = sort_struct->stack[qid].j; - int min = sort_struct->stack[qid].min; - int max = sort_struct->stack[qid].max; - sort_struct->stack[qid].ready = 0; - - /* Loop over sub-intervals. */ - while (1) { - - /* Bring beer. */ - const int pivot = (min + max) / 2; - /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.", - i, j, min, max, pivot); */ - - /* One pass of QuickSort's partitioning. */ - ptrdiff_t ii = i; - ptrdiff_t jj = j; - while (ii < jj) { - while (ii <= j && ind[ii] <= pivot) ii++; - while (jj >= i && ind[jj] > pivot) jj--; - if (ii < jj) { - memswap(&ind[ii], &ind[jj], sizeof(int)); - memswap(&parts[ii], &parts[jj], sizeof(struct part)); - memswap(&xparts[ii], &xparts[jj], sizeof(struct xpart)); - } +void space_parts_sort(struct part *parts, struct xpart *xparts, int *ind, + int *counts, int num_bins, ptrdiff_t parts_offset) { + /* Create the offsets array. */ + size_t *offsets = (size_t *)malloc(sizeof(size_t) * (num_bins + 1)); + if (offsets == NULL) + error("Failed to allocate temporary cell offsets array."); + offsets[0] = 0; + for (int k = 1; k <= num_bins; k++) { + offsets[k] = offsets[k - 1] + counts[k - 1]; + counts[k - 1] = 0; + } + + /* Loop over local cells. */ + for (int cid = 0; cid < num_bins; cid++) { + for (size_t k = offsets[cid] + counts[cid]; k < offsets[cid + 1]; k++) { + counts[cid]++; + int target_cid = ind[k]; + if (target_cid == cid) { + continue; } - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify space_sort_struct. */ - if (i != j) { - for (int k = i; k <= jj; k++) { - if (ind[k] > pivot) { - message( - "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, - ind[k], pivot, i, j); - error("Partition failed (<=pivot)."); - } - } - for (int k = jj + 1; k <= j; k++) { - if (ind[k] <= pivot) { - message( - "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, - ind[k], pivot, i, j); - error("Partition failed (>pivot)."); - } - } - } -#endif - - /* Split-off largest interval. */ - if (jj - i > j - jj + 1) { - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; - while (sort_struct->stack[qid].ready) - ; - sort_struct->stack[qid].i = i; - sort_struct->stack[qid].j = jj; - sort_struct->stack[qid].min = min; - sort_struct->stack[qid].max = pivot; - if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) - error("Qstack overflow."); - sort_struct->stack[qid].ready = 1; + struct part temp_part = parts[k]; + struct xpart temp_xpart = xparts[k]; + while (target_cid != cid) { + size_t j = offsets[target_cid] + counts[target_cid]++; + while (ind[j] == target_cid) { + j = offsets[target_cid] + counts[target_cid]++; } - - /* Recurse on the right? */ - if (jj + 1 < j && pivot + 1 < max) { - i = jj + 1; - min = pivot + 1; - } else - break; - - } else { - - /* Recurse on the right? */ - if (pivot + 1 < max) { - qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; - while (sort_struct->stack[qid].ready) - ; - sort_struct->stack[qid].i = jj + 1; - sort_struct->stack[qid].j = j; - sort_struct->stack[qid].min = pivot + 1; - sort_struct->stack[qid].max = max; - if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) - error("Qstack overflow."); - sort_struct->stack[qid].ready = 1; - } - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - j = jj; - max = pivot; - } else - break; + memswap(&parts[j], &temp_part, sizeof(struct part)); + memswap(&xparts[j], &temp_xpart, sizeof(struct xpart)); + memswap(&ind[j], &target_cid, sizeof(int)); + if (parts[j].gpart) + parts[j].gpart->id_or_neg_offset = -(j + parts_offset); } + parts[k] = temp_part; + xparts[k] = temp_xpart; + ind[k] = target_cid; + if (parts[k].gpart) + parts[k].gpart->id_or_neg_offset = -(k + parts_offset); + } + } - } /* loop over sub-intervals. */ - - atomic_dec(&sort_struct->waiting); - - } /* main loop. */ +#ifdef SWIFT_DEBUG_CHECKS + for (int k = 0; k < num_bins; k++) + if (offsets[k + 1] != offsets[k] + counts[k]) + error("Bad offsets after shuffle."); +#endif // SWIFT_DEBUG_CHECKS } /** * @brief Sort the s-particles according to the given indices. * - * @param s The #space. + * @param sparts The array of #spart to sort. * @param ind The indices with respect to which the #spart are sorted. - * @param N The number of parts - * @param min Lowest index. - * @param max highest index. - * @param verbose Are we talkative ? + * @param counts Number of particles per index. + * @param num_bins Total number of bins (length of counts). + * @param sparts_offset Offset of the #spart array from the global #spart. + * array. */ -void space_sparts_sort(struct space *s, int *ind, size_t N, int min, int max, - int verbose) { - - const ticks tic = getticks(); - - /* Populate a parallel_sort structure with the input data */ - struct parallel_sort sort_struct; - sort_struct.sparts = s->sparts; - sort_struct.ind = ind; - sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; - if ((sort_struct.stack = (struct qstack *)malloc( - sizeof(struct qstack) * sort_struct.stack_size)) == NULL) - error("Failed to allocate sorting stack."); - for (unsigned int i = 0; i < sort_struct.stack_size; i++) - sort_struct.stack[i].ready = 0; - - /* Add the first interval. */ - sort_struct.stack[0].i = 0; - sort_struct.stack[0].j = N - 1; - sort_struct.stack[0].min = min; - sort_struct.stack[0].max = max; - sort_struct.stack[0].ready = 1; - sort_struct.first = 0; - sort_struct.last = 1; - sort_struct.waiting = 1; - - /* Launch the sorting tasks with a stride of zero such that the same - map data is passed to each thread. */ - threadpool_map(&s->e->threadpool, space_sparts_sort_mapper, &sort_struct, - s->e->threadpool.num_threads, 0, 1, NULL); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify space_sort_struct. */ - for (size_t i = 1; i < N; i++) - if (ind[i - 1] > ind[i]) - error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, - ind[i - 1], i, ind[i], min, max); - if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); -#endif - - /* Clean up. */ - free(sort_struct.stack); - - if (verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - -void space_sparts_sort_mapper(void *map_data, int num_elements, - void *extra_data) { - - /* Unpack the mapping data. */ - struct parallel_sort *sort_struct = (struct parallel_sort *)map_data; - - /* Pointers to the sorting data. */ - int *ind = sort_struct->ind; - struct spart *sparts = sort_struct->sparts; - - /* Main loop. */ - while (sort_struct->waiting) { - - /* Grab an interval off the queue. */ - int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size; - - /* Wait for the entry to be ready, or for the sorting do be done. */ - while (!sort_struct->stack[qid].ready) - if (!sort_struct->waiting) return; - - /* Get the stack entry. */ - ptrdiff_t i = sort_struct->stack[qid].i; - ptrdiff_t j = sort_struct->stack[qid].j; - int min = sort_struct->stack[qid].min; - int max = sort_struct->stack[qid].max; - sort_struct->stack[qid].ready = 0; - - /* Loop over sub-intervals. */ - while (1) { - - /* Bring beer. */ - const int pivot = (min + max) / 2; - /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.", - i, j, min, max, pivot); */ - - /* One pass of QuickSort's partitioning. */ - ptrdiff_t ii = i; - ptrdiff_t jj = j; - while (ii < jj) { - while (ii <= j && ind[ii] <= pivot) ii++; - while (jj >= i && ind[jj] > pivot) jj--; - if (ii < jj) { - memswap(&ind[ii], &ind[jj], sizeof(int)); - memswap(&sparts[ii], &sparts[jj], sizeof(struct spart)); - } - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify space_sort_struct. */ - if (i != j) { - for (int k = i; k <= jj; k++) { - if (ind[k] > pivot) { - message( - "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li " - "min=%i max=%i.", - k, ind[k], pivot, i, j, min, max); - error("Partition failed (<=pivot)."); - } - } - for (int k = jj + 1; k <= j; k++) { - if (ind[k] <= pivot) { - message( - "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, - ind[k], pivot, i, j); - error("Partition failed (>pivot)."); - } - } +void space_sparts_sort(struct spart *sparts, int *ind, int *counts, + int num_bins, ptrdiff_t sparts_offset) { + /* Create the offsets array. */ + size_t *offsets = (size_t *)malloc(sizeof(size_t) * (num_bins + 1)); + if (offsets == NULL) + error("Failed to allocate temporary cell offsets array."); + offsets[0] = 0; + for (int k = 1; k <= num_bins; k++) { + offsets[k] = offsets[k - 1] + counts[k - 1]; + counts[k - 1] = 0; + } + + /* Loop over local cells. */ + for (int cid = 0; cid < num_bins; cid++) { + for (size_t k = offsets[cid] + counts[cid]; k < offsets[cid + 1]; k++) { + counts[cid]++; + int target_cid = ind[k]; + if (target_cid == cid) { + continue; } -#endif - - /* Split-off largest interval. */ - if (jj - i > j - jj + 1) { - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; - while (sort_struct->stack[qid].ready) - ; - sort_struct->stack[qid].i = i; - sort_struct->stack[qid].j = jj; - sort_struct->stack[qid].min = min; - sort_struct->stack[qid].max = pivot; - if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) - error("Qstack overflow."); - sort_struct->stack[qid].ready = 1; + struct spart temp_spart = sparts[k]; + while (target_cid != cid) { + size_t j = offsets[target_cid] + counts[target_cid]++; + while (ind[j] == target_cid) { + j = offsets[target_cid] + counts[target_cid]++; } - - /* Recurse on the right? */ - if (jj + 1 < j && pivot + 1 < max) { - i = jj + 1; - min = pivot + 1; - } else - break; - - } else { - - /* Recurse on the right? */ - if (pivot + 1 < max) { - qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; - while (sort_struct->stack[qid].ready) - ; - sort_struct->stack[qid].i = jj + 1; - sort_struct->stack[qid].j = j; - sort_struct->stack[qid].min = pivot + 1; - sort_struct->stack[qid].max = max; - if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) - error("Qstack overflow."); - sort_struct->stack[qid].ready = 1; - } - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - j = jj; - max = pivot; - } else - break; + memswap(&sparts[j], &temp_spart, sizeof(struct spart)); + memswap(&ind[j], &target_cid, sizeof(int)); + if (sparts[j].gpart) + sparts[j].gpart->id_or_neg_offset = -(j + sparts_offset); } + sparts[k] = temp_spart; + ind[k] = target_cid; + if (sparts[k].gpart) + sparts[k].gpart->id_or_neg_offset = -(k + sparts_offset); + } + } - } /* loop over sub-intervals. */ - - atomic_dec(&sort_struct->waiting); - - } /* main loop. */ +#ifdef SWIFT_DEBUG_CHECKS + for (int k = 0; k < num_bins; k++) + if (offsets[k + 1] != offsets[k] + counts[k]) + error("Bad offsets after shuffle."); +#endif // SWIFT_DEBUG_CHECKS } /** * @brief Sort the g-particles according to the given indices. * - * @param s The #space. + * @param gparts The array of #gpart to sort. + * @param parts Global #part array for re-linking. + * @param sparts Global #spart array for re-linking. * @param ind The indices with respect to which the gparts are sorted. - * @param N The number of gparts - * @param min Lowest index. - * @param max highest index. - * @param verbose Are we talkative ? + * @param counts Number of particles per index. + * @param num_bins Total number of bins (length of counts). */ -void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, - int verbose) { - - const ticks tic = getticks(); - - /*Populate a global parallel_sort structure with the input data */ - struct parallel_sort sort_struct; - sort_struct.gparts = s->gparts; - sort_struct.ind = ind; - sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; - if ((sort_struct.stack = (struct qstack *)malloc( - sizeof(struct qstack) * sort_struct.stack_size)) == NULL) - error("Failed to allocate sorting stack."); - for (unsigned int i = 0; i < sort_struct.stack_size; i++) - sort_struct.stack[i].ready = 0; - - /* Add the first interval. */ - sort_struct.stack[0].i = 0; - sort_struct.stack[0].j = N - 1; - sort_struct.stack[0].min = min; - sort_struct.stack[0].max = max; - sort_struct.stack[0].ready = 1; - sort_struct.first = 0; - sort_struct.last = 1; - sort_struct.waiting = 1; - - /* Launch the sorting tasks with a stride of zero such that the same - map data is passed to each thread. */ - threadpool_map(&s->e->threadpool, space_gparts_sort_mapper, &sort_struct, - s->e->threadpool.num_threads, 0, 1, NULL); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify space_sort_struct. */ - for (size_t i = 1; i < N; i++) - if (ind[i - 1] > ind[i]) - error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, - ind[i - 1], i, ind[i], min, max); - if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); -#endif - - /* Clean up. */ - free(sort_struct.stack); - - if (verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - -void space_gparts_sort_mapper(void *map_data, int num_elements, - void *extra_data) { - - /* Unpack the mapping data. */ - struct parallel_sort *sort_struct = (struct parallel_sort *)map_data; - - /* Pointers to the sorting data. */ - int *ind = sort_struct->ind; - struct gpart *gparts = sort_struct->gparts; - - /* Main loop. */ - while (sort_struct->waiting) { - - /* Grab an interval off the queue. */ - int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size; - - /* Wait for the entry to be ready, or for the sorting do be done. */ - while (!sort_struct->stack[qid].ready) - if (!sort_struct->waiting) return; - - /* Get the stack entry. */ - ptrdiff_t i = sort_struct->stack[qid].i; - ptrdiff_t j = sort_struct->stack[qid].j; - int min = sort_struct->stack[qid].min; - int max = sort_struct->stack[qid].max; - sort_struct->stack[qid].ready = 0; - - /* Loop over sub-intervals. */ - while (1) { - - /* Bring beer. */ - const int pivot = (min + max) / 2; - /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.", - i, j, min, max, pivot); */ - - /* One pass of QuickSort's partitioning. */ - ptrdiff_t ii = i; - ptrdiff_t jj = j; - while (ii < jj) { - while (ii <= j && ind[ii] <= pivot) ii++; - while (jj >= i && ind[jj] > pivot) jj--; - if (ii < jj) { - memswap(&ind[ii], &ind[jj], sizeof(int)); - memswap(&gparts[ii], &gparts[jj], sizeof(struct gpart)); - } +void space_gparts_sort(struct gpart *gparts, struct part *parts, + struct spart *sparts, int *ind, int *counts, + int num_bins) { + /* Create the offsets array. */ + size_t *offsets = (size_t *)malloc(sizeof(size_t) * (num_bins + 1)); + if (offsets == NULL) + error("Failed to allocate temporary cell offsets array."); + offsets[0] = 0; + for (int k = 1; k <= num_bins; k++) { + offsets[k] = offsets[k - 1] + counts[k - 1]; + counts[k - 1] = 0; + } + + /* Loop over local cells. */ + for (int cid = 0; cid < num_bins; cid++) { + for (size_t k = offsets[cid] + counts[cid]; k < offsets[cid + 1]; k++) { + counts[cid]++; + int target_cid = ind[k]; + if (target_cid == cid) { + continue; } - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify space_sort_struct. */ - if (i != j) { - for (int k = i; k <= jj; k++) { - if (ind[k] > pivot) { - message( - "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, - ind[k], pivot, i, j); - error("Partition failed (<=pivot)."); - } + struct gpart temp_gpart = gparts[k]; + while (target_cid != cid) { + size_t j = offsets[target_cid] + counts[target_cid]++; + while (ind[j] == target_cid) { + j = offsets[target_cid] + counts[target_cid]++; } - for (int k = jj + 1; k <= j; k++) { - if (ind[k] <= pivot) { - message( - "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, - ind[k], pivot, i, j); - error("Partition failed (>pivot)."); - } + memswap(&gparts[j], &temp_gpart, sizeof(struct gpart)); + memswap(&ind[j], &target_cid, sizeof(int)); + if (gparts[j].type == swift_type_gas) { + parts[-gparts[j].id_or_neg_offset].gpart = &gparts[j]; + } else if (gparts[j].type == swift_type_star) { + sparts[-gparts[j].id_or_neg_offset].gpart = &gparts[j]; } } -#endif - - /* Split-off largest interval. */ - if (jj - i > j - jj + 1) { - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; - while (sort_struct->stack[qid].ready) - ; - sort_struct->stack[qid].i = i; - sort_struct->stack[qid].j = jj; - sort_struct->stack[qid].min = min; - sort_struct->stack[qid].max = pivot; - if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) - error("Qstack overflow."); - sort_struct->stack[qid].ready = 1; - } - - /* Recurse on the right? */ - if (jj + 1 < j && pivot + 1 < max) { - i = jj + 1; - min = pivot + 1; - } else - break; - - } else { - - /* Recurse on the right? */ - if (pivot + 1 < max) { - qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; - while (sort_struct->stack[qid].ready) - ; - sort_struct->stack[qid].i = jj + 1; - sort_struct->stack[qid].j = j; - sort_struct->stack[qid].min = pivot + 1; - sort_struct->stack[qid].max = max; - if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) - error("Qstack overflow."); - sort_struct->stack[qid].ready = 1; - } - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - j = jj; - max = pivot; - } else - break; + gparts[k] = temp_gpart; + ind[k] = target_cid; + if (gparts[k].type == swift_type_gas) { + parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; + } else if (gparts[k].type == swift_type_star) { + sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } + } + } - } /* loop over sub-intervals. */ - - atomic_dec(&sort_struct->waiting); - - } /* main loop. */ +#ifdef SWIFT_DEBUG_CHECKS + for (int k = 0; k < num_bins; k++) + if (offsets[k + 1] != offsets[k] + counts[k]) + error("Bad offsets after shuffle."); +#endif // SWIFT_DEBUG_CHECKS } /** @@ -2410,10 +2074,6 @@ void space_recycle(struct space *s, struct cell *c) { lock_destroy(&c->mlock) != 0 || lock_destroy(&c->slock) != 0) error("Failed to destroy spinlocks."); - /* Clear this cell's sort arrays. */ - for (int i = 0; i < 13; i++) - if (c->sort[i] != NULL) free(c->sort[i]); - /* Lock the space. */ lock_lock(&s->lock); @@ -2463,10 +2123,6 @@ void space_recycle_list(struct space *s, struct cell *cell_list_begin, lock_destroy(&c->mlock) != 0 || lock_destroy(&c->slock) != 0) error("Failed to destroy spinlocks."); - /* Clear this cell's sort arrays. */ - for (int i = 0; i < 13; i++) - if (c->sort[i] != NULL) free(c->sort[i]); - /* Count this cell. */ count += 1; } @@ -2514,6 +2170,9 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { space_cellallocchunk * sizeof(struct cell)) != 0) error("Failed to allocate more cells."); + /* Clear the newly-allocated cells. */ + bzero(s->cells_sub, sizeof(struct cell) * space_cellallocchunk); + /* Constructed a linked list */ for (int k = 0; k < space_cellallocchunk - 1; k++) s->cells_sub[k].next = &s->cells_sub[k + 1]; @@ -2550,6 +2209,8 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Init some things in the cell we just got. */ for (int j = 0; j < nr_cells; j++) { + for (int k = 0; k < 13; k++) + if (cells[j]->sort[k] != NULL) free(cells[j]->sort[k]); struct gravity_tensors *temp = cells[j]->multipole; bzero(cells[j], sizeof(struct cell)); cells[j]->multipole = temp; @@ -2560,6 +2221,22 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { } } +/** + * @brief Free sort arrays in any cells in the cell buffer. + * + * @param s The #space. + */ +void space_free_buff_sort_indices(struct space *s) { + for (struct cell *finger = s->cells_sub; finger != NULL; + finger = finger->next) { + for (int k = 0; k < 13; k++) + if (finger->sort[k] != NULL) { + free(finger->sort[k]); + finger->sort[k] = NULL; + } + } +} + /** * @brief Construct the list of top-level cells that have any tasks in * their hierarchy. @@ -2685,6 +2362,9 @@ void space_first_init_parts(struct space *s, /* And the cooling */ cooling_first_init_part(phys_const, us, cosmo, cool_func, &p[i], &xp[i]); + /* Check part->gpart->part linkeage. */ + if (p[i].gpart) p[i].gpart->id_or_neg_offset = -i; + #ifdef SWIFT_DEBUG_CHECKS p[i].ti_drift = 0; p[i].ti_kick = 0; @@ -2762,6 +2442,9 @@ void space_first_init_sparts(struct space *s) { star_first_init_spart(&sp[i]); + /* Check spart->gpart->spart linkeage. */ + if (sp[i].gpart) sp[i].gpart->id_or_neg_offset = -i; + #ifdef SWIFT_DEBUG_CHECKS sp[i].ti_drift = 0; sp[i].ti_kick = 0; diff --git a/src/space.h b/src/space.h index b3f7f839651a81f525f7071c25a0aab5ebc067aa..db9c9edad3d7435ea619e72814e2c97094344090 100644 --- a/src/space.h +++ b/src/space.h @@ -173,12 +173,14 @@ struct space { }; /* function prototypes. */ -void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, - int verbose); -void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, - int verbose); -void space_sparts_sort(struct space *s, int *ind, size_t N, int min, int max, - int verbose); +void space_free_buff_sort_indices(struct space *s); +void space_parts_sort(struct part *parts, struct xpart *xparts, int *ind, + int *counts, int num_bins, ptrdiff_t parts_offset); +void space_gparts_sort(struct gpart *gparts, struct part *parts, + struct spart *sparts, int *ind, int *counts, + int num_bins); +void space_sparts_sort(struct spart *sparts, int *ind, int *counts, + int num_bins, ptrdiff_t sparts_offset); void space_getcells(struct space *s, int nr_cells, struct cell **cells); int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift); @@ -199,12 +201,6 @@ void space_map_parts_xparts(struct space *s, struct cell *c)); void space_map_cells_post(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data); -void space_parts_sort_mapper(void *map_data, int num_elements, - void *extra_data); -void space_gparts_sort_mapper(void *map_data, int num_elements, - void *extra_data); -void space_sparts_sort_mapper(void *map_data, int num_elements, - void *extra_data); void space_rebuild(struct space *s, int verbose); void space_recycle(struct space *s, struct cell *c); void space_recycle_list(struct space *s, struct cell *cell_list_begin, @@ -215,12 +211,12 @@ void space_split(struct space *s, struct cell *cells, int nr_cells, int verbose); void space_split_mapper(void *map_data, int num_elements, void *extra_data); void space_list_cells_with_tasks(struct space *s); -void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells, - int verbose); -void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, - int verbose); -void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells, - int verbose); +void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, + struct cell *cells, int verbose); +void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts, + struct cell *cells, int verbose); +void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, + struct cell *cells, int verbose); void space_synchronize_particle_positions(struct space *s); void space_do_parts_sort(); void space_do_gparts_sort();