diff --git a/examples/main.c b/examples/main.c index ba0b11eae89431ac663a5f1a6cb6baa45306bfaa..b1085b73d201be92850e7e67e7b74f35b572221c 100644 --- a/examples/main.c +++ b/examples/main.c @@ -499,7 +499,6 @@ int main(int argc, char *argv[]) { if (with_star_formation && with_feedback) error("Can't run with star formation and feedback over MPI (yet)"); if (with_limiter) error("Can't run with time-step limiter over MPI (yet)"); - if (with_black_holes) error("Can't run with black holes over MPI (yet)"); #endif /* Temporary early aborts for modes not supported with hand-vec. */ @@ -799,16 +798,18 @@ int main(int argc, char *argv[]) { #if defined(HAVE_HDF5) #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) - read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, - &Ngpart, &Nspart, &flag_entropy_ICs, with_hydro, + read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &sparts, &bparts, &Ngas, + &Ngpart, &Nspart, &Nbpart, &flag_entropy_ICs, with_hydro, (with_external_gravity || with_self_gravity), with_stars, + with_black_holes, cleanup_h, cleanup_sqrt_a, cosmo.h, cosmo.a, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, nr_threads, dry_run); #else - read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, - &Ngpart, &Nspart, &flag_entropy_ICs, with_hydro, + read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &sparts, &bparts, &Ngas, + &Ngpart, &Nspart, &Nbpart, &flag_entropy_ICs, with_hydro, (with_external_gravity || with_self_gravity), with_stars, + with_black_holes, cleanup_h, cleanup_sqrt_a, cosmo.h, cosmo.a, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, nr_threads, dry_run); @@ -834,6 +835,10 @@ int main(int argc, char *argv[]) { for (size_t k = 0; k < Ngpart; ++k) if (gparts[k].type == swift_type_stars) error("Linking problem"); } + if (!with_black_holes && !dry_run) { + for (size_t k = 0; k < Ngpart; ++k) + if (gparts[k].type == swift_type_black_hole) error("Linking problem"); + } if (!with_hydro && !dry_run) { for (size_t k = 0; k < Ngpart; ++k) if (gparts[k].type == swift_type_gas) error("Linking problem"); @@ -1058,9 +1063,9 @@ int main(int argc, char *argv[]) { /* Legend */ if (myrank == 0) { - printf("# %6s %14s %12s %12s %14s %9s %12s %12s %12s %16s [%s] %6s\n", + printf("# %6s %14s %12s %12s %14s %9s %12s %12s %12s %12s %16s [%s] %6s\n", "Step", "Time", "Scale-factor", "Redshift", "Time-step", "Time-bins", - "Updates", "g-Updates", "s-Updates", "Wall-clock time", + "Updates", "g-Updates", "s-Updates", "b-Updates", "Wall-clock time", clocks_getunit(), "Props"); fflush(stdout); } diff --git a/src/cell.c b/src/cell.c index 07d896db2103a6ed7549cc7737c5377c9e577680..638cd58d161cb35dee040e0ec18c0de7f8b43355 100644 --- a/src/cell.c +++ b/src/cell.c @@ -188,6 +188,39 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) { return c->stars.count; } +/** + * @brief Link the cells recursively to the given #bpart array. + * + * @param c The #cell. + * @param bparts The #bpart array. + * + * @return The number of particles linked. + */ +int cell_link_bparts(struct cell *c, struct bpart *bparts) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); + + if (c->black_holes.parts != NULL) + error("Linking bparts into a cell that was already linked"); +#endif + + c->black_holes.parts = bparts; + + /* Fill the progeny recursively, depth-first. */ + if (c->split) { + int offset = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) + offset += cell_link_bparts(c->progeny[k], &bparts[offset]); + } + } + + /* Return the total number of linked particles. */ + return c->black_holes.count; +} + /** * @brief Recurse down foreign cells until reaching one with hydro * tasks; then trigger the linking of the #part array from that diff --git a/src/cell.h b/src/cell.h index 6795e1ffa19a3f5cc651da5ef375fb7151c7c731..55456df25953462bff218dce89090bb932561c0e 100644 --- a/src/cell.h +++ b/src/cell.h @@ -824,6 +824,7 @@ int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); int cell_link_sparts(struct cell *c, struct spart *sparts); +int cell_link_bparts(struct cell *c, struct bpart *bparts); int cell_link_foreign_parts(struct cell *c, struct part *parts); int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts); int cell_count_parts_for_tasks(const struct cell *c); diff --git a/src/collectgroup.c b/src/collectgroup.c index 5242d0d93bbca2315a91d2abc099670afb2789ed..0332e526dd11c601f39bd088e3b564544ef141c8 100644 --- a/src/collectgroup.c +++ b/src/collectgroup.c @@ -36,17 +36,20 @@ /* Local collections for MPI reduces. */ struct mpicollectgroup1 { - long long updated, g_updated, s_updated; - long long inhibited, g_inhibited, s_inhibited; + long long updated, g_updated, s_updated, b_updated; + long long inhibited, g_inhibited, s_inhibited, b_inhibited; integertime_t ti_hydro_end_min; integertime_t ti_gravity_end_min; integertime_t ti_stars_end_min; + integertime_t ti_black_holes_end_min; integertime_t ti_hydro_end_max; integertime_t ti_gravity_end_max; integertime_t ti_stars_end_max; + integertime_t ti_black_holes_end_max; integertime_t ti_hydro_beg_max; integertime_t ti_gravity_beg_max; integertime_t ti_stars_beg_max; + integertime_t ti_black_holes_beg_max; int forcerebuild; long long total_nr_cells; long long total_nr_tasks; @@ -96,18 +99,23 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { e->ti_stars_end_min = grp1->ti_stars_end_min; e->ti_stars_end_max = grp1->ti_stars_end_max; e->ti_stars_beg_max = grp1->ti_stars_beg_max; + e->ti_black_holes_end_min = grp1->ti_black_holes_end_min; + e->ti_black_holes_end_max = grp1->ti_black_holes_end_max; + e->ti_black_holes_beg_max = grp1->ti_black_holes_beg_max; e->ti_end_min = - min3(e->ti_hydro_end_min, e->ti_gravity_end_min, e->ti_stars_end_min); + min4(e->ti_hydro_end_min, e->ti_gravity_end_min, e->ti_stars_end_min, e->ti_black_holes_end_min); e->ti_end_max = - max3(e->ti_hydro_end_max, e->ti_gravity_end_max, e->ti_stars_end_max); + max4(e->ti_hydro_end_max, e->ti_gravity_end_max, e->ti_stars_end_max , e->ti_black_holes_end_max); e->ti_beg_max = - max3(e->ti_hydro_beg_max, e->ti_gravity_beg_max, e->ti_stars_beg_max); + max4(e->ti_hydro_beg_max, e->ti_gravity_beg_max, e->ti_stars_beg_max, e->ti_black_holes_beg_max); e->updates = grp1->updated; e->g_updates = grp1->g_updated; e->s_updates = grp1->s_updated; + e->b_updates = grp1->b_updated; e->nr_inhibited_parts = grp1->inhibited; e->nr_inhibited_gparts = grp1->g_inhibited; e->nr_inhibited_sparts = grp1->s_inhibited; + e->nr_inhibited_bparts = grp1->b_inhibited; e->forcerebuild = grp1->forcerebuild; e->total_nr_cells = grp1->total_nr_cells; e->total_nr_tasks = grp1->total_nr_tasks; @@ -153,20 +161,24 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { */ void collectgroup1_init( struct collectgroup1 *grp1, size_t updated, size_t g_updated, - size_t s_updated, size_t inhibited, size_t g_inhibited, size_t s_inhibited, + size_t s_updated, size_t b_updated, size_t inhibited, size_t g_inhibited, size_t s_inhibited, + size_t b_inhibited, integertime_t ti_hydro_end_min, integertime_t ti_hydro_end_max, integertime_t ti_hydro_beg_max, integertime_t ti_gravity_end_min, integertime_t ti_gravity_end_max, integertime_t ti_gravity_beg_max, integertime_t ti_stars_end_min, integertime_t ti_stars_end_max, - integertime_t ti_stars_beg_max, int forcerebuild, long long total_nr_cells, + integertime_t ti_stars_beg_max, integertime_t ti_black_holes_end_min, integertime_t ti_black_holes_end_max, + integertime_t ti_black_holes_beg_max, int forcerebuild, long long total_nr_cells, long long total_nr_tasks, float tasks_per_cell) { grp1->updated = updated; grp1->g_updated = g_updated; grp1->s_updated = s_updated; + grp1->b_updated = b_updated; grp1->inhibited = inhibited; grp1->g_inhibited = g_inhibited; grp1->s_inhibited = s_inhibited; + grp1->b_inhibited = b_inhibited; grp1->ti_hydro_end_min = ti_hydro_end_min; grp1->ti_hydro_end_max = ti_hydro_end_max; grp1->ti_hydro_beg_max = ti_hydro_beg_max; @@ -176,6 +188,9 @@ void collectgroup1_init( grp1->ti_stars_end_min = ti_stars_end_min; grp1->ti_stars_end_max = ti_stars_end_max; grp1->ti_stars_beg_max = ti_stars_beg_max; + grp1->ti_black_holes_end_min = ti_black_holes_end_min; + grp1->ti_black_holes_end_max = ti_black_holes_end_max; + grp1->ti_black_holes_beg_max = ti_black_holes_beg_max; grp1->forcerebuild = forcerebuild; grp1->total_nr_cells = total_nr_cells; grp1->total_nr_tasks = total_nr_tasks; @@ -199,18 +214,23 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { mpigrp11.updated = grp1->updated; mpigrp11.g_updated = grp1->g_updated; mpigrp11.s_updated = grp1->s_updated; + mpigrp11.b_updated = grp1->b_updated; mpigrp11.inhibited = grp1->inhibited; mpigrp11.g_inhibited = grp1->g_inhibited; mpigrp11.s_inhibited = grp1->s_inhibited; + mpigrp11.b_inhibited = grp1->b_inhibited; mpigrp11.ti_hydro_end_min = grp1->ti_hydro_end_min; mpigrp11.ti_gravity_end_min = grp1->ti_gravity_end_min; mpigrp11.ti_stars_end_min = grp1->ti_stars_end_min; + mpigrp11.ti_black_holes_end_min = grp1->ti_black_holes_end_min; mpigrp11.ti_hydro_end_max = grp1->ti_hydro_end_max; mpigrp11.ti_gravity_end_max = grp1->ti_gravity_end_max; mpigrp11.ti_stars_end_max = grp1->ti_stars_end_max; + mpigrp11.ti_black_holes_end_max = grp1->ti_black_holes_end_max; mpigrp11.ti_hydro_beg_max = grp1->ti_hydro_beg_max; mpigrp11.ti_gravity_beg_max = grp1->ti_gravity_beg_max; mpigrp11.ti_stars_beg_max = grp1->ti_stars_beg_max; + mpigrp11.ti_black_holes_beg_max = grp1->ti_black_holes_beg_max; mpigrp11.forcerebuild = grp1->forcerebuild; mpigrp11.total_nr_cells = grp1->total_nr_cells; mpigrp11.total_nr_tasks = grp1->total_nr_tasks; @@ -225,18 +245,23 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { grp1->updated = mpigrp12.updated; grp1->g_updated = mpigrp12.g_updated; grp1->s_updated = mpigrp12.s_updated; + grp1->b_updated = mpigrp12.b_updated; grp1->inhibited = mpigrp12.inhibited; grp1->g_inhibited = mpigrp12.g_inhibited; grp1->s_inhibited = mpigrp12.s_inhibited; + grp1->b_inhibited = mpigrp12.b_inhibited; grp1->ti_hydro_end_min = mpigrp12.ti_hydro_end_min; grp1->ti_gravity_end_min = mpigrp12.ti_gravity_end_min; grp1->ti_stars_end_min = mpigrp12.ti_stars_end_min; + grp1->ti_black_holes_end_min = mpigrp12.ti_black_holes_end_min; grp1->ti_hydro_end_max = mpigrp12.ti_hydro_end_max; grp1->ti_gravity_end_max = mpigrp12.ti_gravity_end_max; grp1->ti_stars_end_max = mpigrp12.ti_stars_end_max; + grp1->ti_black_holes_end_max = mpigrp12.ti_black_holes_end_max; grp1->ti_hydro_beg_max = mpigrp12.ti_hydro_beg_max; grp1->ti_gravity_beg_max = mpigrp12.ti_gravity_beg_max; grp1->ti_stars_beg_max = mpigrp12.ti_stars_beg_max; + grp1->ti_black_holes_beg_max = mpigrp12.ti_black_holes_beg_max; grp1->forcerebuild = mpigrp12.forcerebuild; grp1->total_nr_cells = mpigrp12.total_nr_cells; grp1->total_nr_tasks = mpigrp12.total_nr_tasks; @@ -260,11 +285,13 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, mpigrp11->updated += mpigrp12->updated; mpigrp11->g_updated += mpigrp12->g_updated; mpigrp11->s_updated += mpigrp12->s_updated; + mpigrp11->b_updated += mpigrp12->b_updated; /* Sum of inhibited */ mpigrp11->inhibited += mpigrp12->inhibited; mpigrp11->g_inhibited += mpigrp12->g_inhibited; mpigrp11->s_inhibited += mpigrp12->s_inhibited; + mpigrp11->b_inhibited += mpigrp12->b_inhibited; /* Minimum end time. */ mpigrp11->ti_hydro_end_min = @@ -273,6 +300,8 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, min(mpigrp11->ti_gravity_end_min, mpigrp12->ti_gravity_end_min); mpigrp11->ti_stars_end_min = min(mpigrp11->ti_stars_end_min, mpigrp12->ti_stars_end_min); + mpigrp11->ti_black_holes_end_min = + min(mpigrp11->ti_black_holes_end_min, mpigrp12->ti_black_holes_end_min); /* Maximum end time. */ mpigrp11->ti_hydro_end_max = @@ -281,6 +310,8 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, max(mpigrp11->ti_gravity_end_max, mpigrp12->ti_gravity_end_max); mpigrp11->ti_stars_end_max = max(mpigrp11->ti_stars_end_max, mpigrp12->ti_stars_end_max); + mpigrp11->ti_black_holes_end_max = + max(mpigrp11->ti_black_holes_end_max, mpigrp12->ti_black_holes_end_max); /* Maximum beg time. */ mpigrp11->ti_hydro_beg_max = @@ -289,6 +320,8 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, max(mpigrp11->ti_gravity_beg_max, mpigrp12->ti_gravity_beg_max); mpigrp11->ti_stars_beg_max = max(mpigrp11->ti_stars_beg_max, mpigrp12->ti_stars_beg_max); + mpigrp11->ti_black_holes_beg_max = + max(mpigrp11->ti_black_holes_beg_max, mpigrp12->ti_black_holes_beg_max); /* Everyone must agree to not rebuild. */ if (mpigrp11->forcerebuild || mpigrp12->forcerebuild) diff --git a/src/collectgroup.h b/src/collectgroup.h index 0c0e0898f64deb0081cdab5c6cfbcad1dc760b1c..6b6678481051c43f0d7cecc1dab1066e8e38b845 100644 --- a/src/collectgroup.h +++ b/src/collectgroup.h @@ -35,15 +35,16 @@ struct engine; struct collectgroup1 { /* Number of particles updated */ - long long updated, g_updated, s_updated; + long long updated, g_updated, s_updated, b_updated; /* Number of particles inhibited */ - long long inhibited, g_inhibited, s_inhibited; + long long inhibited, g_inhibited, s_inhibited, b_inhibited; /* Times for the time-step */ integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max; + integertime_t ti_black_holes_end_min, ti_black_holes_end_max, ti_black_holes_beg_max; /* Force the engine to rebuild? */ int forcerebuild; @@ -59,14 +60,16 @@ struct collectgroup1 { void collectgroup_init(void); void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e); void collectgroup1_init( - struct collectgroup1 *grp1, size_t updated, size_t g_updated, - size_t s_updated, size_t inhibited, size_t g_inhibited, size_t s_inhibited, + struct collectgroup1 *grp1, size_t updated, size_t g_updated, + size_t s_updated, size_t b_updated, size_t inhibited, size_t g_inhibited, size_t s_inhibited, + size_t b_inhibited, integertime_t ti_hydro_end_min, integertime_t ti_hydro_end_max, integertime_t ti_hydro_beg_max, integertime_t ti_gravity_end_min, integertime_t ti_gravity_end_max, integertime_t ti_gravity_beg_max, integertime_t ti_stars_end_min, integertime_t ti_stars_end_max, - integertime_t ti_stars_beg_max, int forcerebuild, long long total_nr_cells, - long long total_nr_tasks, float tasks_per_cell); + integertime_t ti_stars_beg_max, integertime_t ti_black_holes_end_min, integertime_t ti_black_holes_end_max, + integertime_t ti_black_holes_beg_max, int forcerebuild, long long total_nr_cells, + long long total_nr_tasks, float tasks_per_cell); void collectgroup1_reduce(struct collectgroup1 *grp1); #endif /* SWIFT_COLLECTGROUP_H */ diff --git a/src/engine.c b/src/engine.c index eb064ec0dff2fde05016faee1628b0fc58a1d7a7..d575a20e5e6e3ac82ea58b6ee3b6cc36384e6d4e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -131,11 +131,12 @@ int engine_current_step; */ struct end_of_step_data { - size_t updated, g_updated, s_updated; - size_t inhibited, g_inhibited, s_inhibited; + size_t updated, g_updated, s_updated, b_updated; + size_t inhibited, g_inhibited, s_inhibited, b_inhibited; integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max; + integertime_t ti_black_holes_end_min, ti_black_holes_end_max, ti_black_holes_beg_max; struct engine *e; }; @@ -546,7 +547,7 @@ static void engine_redistribute_relink_mapper(void *map_data, int num_elements, /* Re-link */ s->gparts[k].id_or_neg_offset = -partner_index; - s->sparts[partner_index].gpart = &s->gparts[k]; + s->bparts[partner_index].gpart = &s->gparts[k]; } } } @@ -875,7 +876,7 @@ void engine_redistribute(struct engine *e) { const struct cell *c = &s->cells_top[new_cid]; const int new_node = c->nodeID; - if (s_dest[k] != new_node) + if (b_dest[k] != new_node) error("bpart's new node index not matching sorted index."); if (bp->x[0] < c->loc[0] || bp->x[0] > c->loc[0] + c->width[0] || @@ -890,7 +891,7 @@ void engine_redistribute(struct engine *e) { struct savelink_mapper_data savelink_data; savelink_data.nr_nodes = nr_nodes; - savelink_data.counts = s_counts; + savelink_data.counts = b_counts; savelink_data.parts = (void *)bparts; savelink_data.nodeID = nodeID; threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart, @@ -1389,6 +1390,11 @@ void engine_exchange_cells(struct engine *e) { * @param ind_spart The foreign #cell ID of each spart. * @param Nspart The number of stray sparts, contains the number of sparts * received on return. + * @param offset_bparts The index in the bparts array as of which the foreign + * parts reside (i.e. the current number of local #bpart). + * @param ind_bpart The foreign #cell ID of each bpart. + * @param Nbpart The number of stray bparts, contains the number of bparts + * received on return. * * Note that this function does not mess-up the linkage between parts and * gparts, i.e. the received particles have correct linkeage. @@ -1397,7 +1403,8 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, const int *ind_part, size_t *Npart, const size_t offset_gparts, const int *ind_gpart, size_t *Ngpart, const size_t offset_sparts, - const int *ind_spart, size_t *Nspart) { + const int *ind_spart, size_t *Nspart, const size_t offset_bparts, + const int *ind_bpart, size_t *Nbpart) { #ifdef WITH_MPI @@ -1409,6 +1416,7 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, e->proxies[k].nr_parts_out = 0; e->proxies[k].nr_gparts_out = 0; e->proxies[k].nr_sparts_out = 0; + e->proxies[k].nr_bparts_out = 0; } /* Put the parts into the corresponding proxies. */ @@ -1482,6 +1490,41 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, proxy_sparts_load(&e->proxies[pid], &s->sparts[offset_sparts + k], 1); } + /* Put the bparts into the corresponding proxies. */ + for (size_t k = 0; k < *Nbpart; k++) { + + /* Ignore the particles we want to get rid of (inhibited, ...). */ + if (ind_bpart[k] == -1) continue; + + /* Get the target node and proxy ID. */ + const int node_id = e->s->cells_top[ind_bpart[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) { + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lld, x=[%e,%e,%e].", + node_id, s->bparts[offset_bparts + k].id, + s->bparts[offset_bparts + k].x[0], s->bparts[offset_bparts + k].x[1], + s->bparts[offset_bparts + k].x[2]); + } + + /* Re-link the associated gpart with the buffer offset of the bpart. */ + if (s->bparts[offset_bparts + k].gpart != NULL) { + s->bparts[offset_bparts + k].gpart->id_or_neg_offset = + -e->proxies[pid].nr_bparts_out; + } + +#ifdef SWIFT_DEBUG_CHECKS + if (s->bparts[offset_bparts + k].time_bin == time_bin_inhibited) + error("Attempting to exchange an inhibited particle"); +#endif + + /* Load the bpart into the proxy */ + proxy_bparts_load(&e->proxies[pid], &s->bparts[offset_bparts + k], 1); + } + /* Put the gparts into the corresponding proxies. */ for (size_t k = 0; k < *Ngpart; k++) { @@ -1512,8 +1555,8 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, } /* Launch the proxies. */ - MPI_Request reqs_in[4 * engine_maxproxies]; - MPI_Request reqs_out[4 * engine_maxproxies]; + MPI_Request reqs_in[5 * engine_maxproxies]; + MPI_Request reqs_out[5 * engine_maxproxies]; for (int k = 0; k < e->nr_proxies; k++) { proxy_parts_exchange_first(&e->proxies[k]); reqs_in[k] = e->proxies[k].req_parts_count_in; @@ -1540,15 +1583,17 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, int count_parts_in = 0; int count_gparts_in = 0; int count_sparts_in = 0; + int count_bparts_in = 0; for (int k = 0; k < e->nr_proxies; k++) { count_parts_in += e->proxies[k].nr_parts_in; count_gparts_in += e->proxies[k].nr_gparts_in; count_sparts_in += e->proxies[k].nr_sparts_in; + count_bparts_in += e->proxies[k].nr_bparts_in; } if (e->verbose) { - message("sent out %zu/%zu/%zu parts/gparts/sparts, got %i/%i/%i back.", - *Npart, *Ngpart, *Nspart, count_parts_in, count_gparts_in, - count_sparts_in); + message("sent out %zu/%zu/%zu/%zu parts/gparts/sparts/bparts, got %i/%i/%i/%i back.", + *Npart, *Ngpart, *Nspart, *Nbpart, count_parts_in, count_gparts_in, + count_sparts_in, count_bparts_in); } /* Reallocate the particle arrays if necessary */ @@ -1596,6 +1641,25 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, } } + if (offset_bparts + count_bparts_in > s->size_bparts) { + message("re-allocating bparts array."); + s->size_bparts = (offset_bparts + count_bparts_in) * engine_parts_size_grow; + struct bpart *bparts_new = NULL; + if (swift_memalign("bparts", (void **)&bparts_new, bpart_align, + sizeof(struct bpart) * s->size_bparts) != 0) + error("Failed to allocate new bpart data."); + memcpy(bparts_new, s->bparts, sizeof(struct bpart) * offset_bparts); + swift_free("bparts", s->bparts); + s->bparts = bparts_new; + + /* Reset the links */ + for (size_t k = 0; k < offset_bparts; k++) { + if (s->bparts[k].gpart != NULL) { + s->bparts[k].gpart->id_or_neg_offset = -k; + } + } + } + if (offset_gparts + count_gparts_in > s->size_gparts) { message("re-allocating gparts array."); s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; @@ -1613,6 +1677,8 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } else if (s->gparts[k].type == swift_type_stars) { s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_black_hole) { + s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } } } @@ -1621,52 +1687,64 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, int nr_in = 0, nr_out = 0; for (int k = 0; k < e->nr_proxies; k++) { if (e->proxies[k].nr_parts_in > 0) { - reqs_in[4 * k] = e->proxies[k].req_parts_in; - reqs_in[4 * k + 1] = e->proxies[k].req_xparts_in; + reqs_in[5 * k] = e->proxies[k].req_parts_in; + reqs_in[5 * k + 1] = e->proxies[k].req_xparts_in; nr_in += 2; } else { - reqs_in[4 * k] = reqs_in[4 * k + 1] = MPI_REQUEST_NULL; + reqs_in[5 * k] = reqs_in[5 * k + 1] = MPI_REQUEST_NULL; } if (e->proxies[k].nr_gparts_in > 0) { - reqs_in[4 * k + 2] = e->proxies[k].req_gparts_in; + reqs_in[5 * k + 2] = e->proxies[k].req_gparts_in; nr_in += 1; } else { - reqs_in[4 * k + 2] = MPI_REQUEST_NULL; + reqs_in[5 * k + 2] = MPI_REQUEST_NULL; } if (e->proxies[k].nr_sparts_in > 0) { - reqs_in[4 * k + 3] = e->proxies[k].req_sparts_in; + reqs_in[5 * k + 3] = e->proxies[k].req_sparts_in; nr_in += 1; } else { - reqs_in[4 * k + 3] = MPI_REQUEST_NULL; + reqs_in[5 * k + 3] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_bparts_in > 0) { + reqs_in[5 * k + 4] = e->proxies[k].req_bparts_in; + nr_in += 1; + } else { + reqs_in[5 * k + 4] = MPI_REQUEST_NULL; } if (e->proxies[k].nr_parts_out > 0) { - reqs_out[4 * k] = e->proxies[k].req_parts_out; - reqs_out[4 * k + 1] = e->proxies[k].req_xparts_out; + reqs_out[5 * k] = e->proxies[k].req_parts_out; + reqs_out[5 * k + 1] = e->proxies[k].req_xparts_out; nr_out += 2; } else { - reqs_out[4 * k] = reqs_out[4 * k + 1] = MPI_REQUEST_NULL; + reqs_out[5 * k] = reqs_out[5 * k + 1] = MPI_REQUEST_NULL; } if (e->proxies[k].nr_gparts_out > 0) { - reqs_out[4 * k + 2] = e->proxies[k].req_gparts_out; + reqs_out[5 * k + 2] = e->proxies[k].req_gparts_out; nr_out += 1; } else { - reqs_out[4 * k + 2] = MPI_REQUEST_NULL; + reqs_out[5 * k + 2] = MPI_REQUEST_NULL; } if (e->proxies[k].nr_sparts_out > 0) { - reqs_out[4 * k + 3] = e->proxies[k].req_sparts_out; + reqs_out[5 * k + 3] = e->proxies[k].req_sparts_out; + nr_out += 1; + } else { + reqs_out[5 * k + 3] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_bparts_out > 0) { + reqs_out[5 * k + 4] = e->proxies[k].req_bparts_out; nr_out += 1; } else { - reqs_out[4 * k + 3] = MPI_REQUEST_NULL; + reqs_out[5 * k + 4] = MPI_REQUEST_NULL; } } /* Wait for each part array to come in and collect the new parts from the proxies. */ - int count_parts = 0, count_gparts = 0, count_sparts = 0; + int count_parts = 0, count_gparts = 0, count_sparts = 0, count_bparts = 0; for (int k = 0; k < nr_in; k++) { int err, pid; - if ((err = MPI_Waitany(4 * e->nr_proxies, reqs_in, &pid, + if ((err = MPI_Waitany(5 * e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; int res; @@ -1674,16 +1752,17 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, error("MPI_Waitany failed (%s).", buff); } if (pid == MPI_UNDEFINED) break; - // message( "request from proxy %i has arrived." , pid / 4 ); - pid = 4 * (pid / 4); + // message( "request from proxy %i has arrived." , pid / 5 ); + pid = 5 * (pid / 5); /* If all the requests for a given proxy have arrived... */ if (reqs_in[pid + 0] == MPI_REQUEST_NULL && reqs_in[pid + 1] == MPI_REQUEST_NULL && reqs_in[pid + 2] == MPI_REQUEST_NULL && - reqs_in[pid + 3] == MPI_REQUEST_NULL) { + reqs_in[pid + 3] == MPI_REQUEST_NULL && + reqs_in[pid + 4] == MPI_REQUEST_NULL) { /* Copy the particle data to the part/xpart/gpart arrays. */ - struct proxy *prox = &e->proxies[pid / 4]; + struct proxy *prox = &e->proxies[pid / 5]; memcpy(&s->parts[offset_parts + count_parts], prox->parts_in, sizeof(struct part) * prox->nr_parts_in); memcpy(&s->xparts[offset_parts + count_parts], prox->xparts_in, @@ -1692,6 +1771,8 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, sizeof(struct gpart) * prox->nr_gparts_in); memcpy(&s->sparts[offset_sparts + count_sparts], prox->sparts_in, sizeof(struct spart) * prox->nr_sparts_in); + memcpy(&s->bparts[offset_bparts + count_bparts], prox->bparts_in, + sizeof(struct bpart) * prox->nr_bparts_in); /* for (int k = offset; k < offset + count; k++) message( "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.", @@ -1712,19 +1793,25 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, &s->sparts[offset_sparts + count_sparts - gp->id_or_neg_offset]; gp->id_or_neg_offset = s->sparts - sp; sp->gpart = gp; - } + } else if (gp->type == swift_type_black_hole) { + struct bpart *bp = + &s->bparts[offset_bparts + count_bparts - gp->id_or_neg_offset]; + gp->id_or_neg_offset = s->bparts - bp; + bp->gpart = gp; + } } /* Advance the counters. */ count_parts += prox->nr_parts_in; count_gparts += prox->nr_gparts_in; count_sparts += prox->nr_sparts_in; + count_bparts += prox->nr_bparts_in; } } /* Wait for all the sends to have finished too. */ if (nr_out > 0) - if (MPI_Waitall(4 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != + if (MPI_Waitall(5 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) error("MPI_Waitall on sends failed."); @@ -1736,6 +1823,7 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, *Npart = count_parts; *Ngpart = count_gparts; *Nspart = count_sparts; + *Nbpart = count_bparts; #else error("SWIFT was not compiled with MPI support."); @@ -1999,7 +2087,7 @@ void engine_allocate_foreign_particles(struct engine *e) { /* Count the number of particles we need to import and re-allocate the buffer if needed. */ - size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; + size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0, count_bparts_in = 0; for (int k = 0; k < nr_proxies; k++) { for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { @@ -2014,6 +2102,9 @@ void engine_allocate_foreign_particles(struct engine *e) { /* For stars, we just use the numbers in the top-level cells */ count_sparts_in += e->proxies[k].cells_in[j]->stars.count; + + /* For black holes, we just use the numbers in the top-level cells */ + count_bparts_in += e->proxies[k].cells_in[j]->black_holes.count; } } @@ -2055,18 +2146,31 @@ void engine_allocate_foreign_particles(struct engine *e) { error("Failed to allocate foreign spart data."); } + /* Allocate bpace for the foreign particles we will receive */ + if (count_bparts_in > s->size_bparts_foreign) { + if (s->bparts_foreign != NULL) + swift_free("bparts_foreign", s->bparts_foreign); + s->size_bparts_foreign = engine_foreign_alloc_margin * count_bparts_in; + if (swift_memalign("bparts_foreign", (void **)&s->bparts_foreign, + bpart_align, + sizeof(struct bpart) * s->size_bparts_foreign) != 0) + error("Failed to allocate foreign bpart data."); + } + if (e->verbose) - message("Allocating %zd/%zd/%zd foreign part/gpart/spart (%zd/%zd/%zd MB)", + message("Allocating %zd/%zd/%zd/%zd foreign part/gpart/spart/bpart (%zd/%zd/%zd/%zd MB)", s->size_parts_foreign, s->size_gparts_foreign, - s->size_sparts_foreign, + s->size_sparts_foreign, s->size_bparts_foreign, s->size_parts_foreign * sizeof(struct part) / (1024 * 1024), s->size_gparts_foreign * sizeof(struct gpart) / (1024 * 1024), - s->size_sparts_foreign * sizeof(struct spart) / (1024 * 1024)); + s->size_sparts_foreign * sizeof(struct spart) / (1024 * 1024), + s->size_bparts_foreign * sizeof(struct bpart) / (1024 * 1024)); /* Unpack the cells and link to the particle data. */ struct part *parts = s->parts_foreign; struct gpart *gparts = s->gparts_foreign; struct spart *sparts = s->sparts_foreign; + struct bpart *bparts = s->bparts_foreign; for (int k = 0; k < nr_proxies; k++) { for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { @@ -2087,6 +2191,10 @@ void engine_allocate_foreign_particles(struct engine *e) { /* For stars, we just use the numbers in the top-level cells */ cell_link_sparts(e->proxies[k].cells_in[j], sparts); sparts = &sparts[e->proxies[k].cells_in[j]->stars.count]; + + /* For black holes, we just use the numbers in the top-level cells */ + cell_link_bparts(e->proxies[k].cells_in[j], bparts); + bparts = &bparts[e->proxies[k].cells_in[j]->black_holes.count]; } } @@ -2094,6 +2202,7 @@ void engine_allocate_foreign_particles(struct engine *e) { s->nr_parts_foreign = parts - s->parts_foreign; s->nr_gparts_foreign = gparts - s->gparts_foreign; s->nr_sparts_foreign = sparts - s->sparts_foreign; + s->nr_bparts_foreign = bparts - s->bparts_foreign; if (e->verbose) message("Recursively linking foreign arrays took %.3f %s.", @@ -2169,6 +2278,7 @@ void engine_print_task_counts(const struct engine *e) { message("nr_parts = %zu.", e->s->nr_parts); message("nr_gparts = %zu.", e->s->nr_gparts); message("nr_sparts = %zu.", e->s->nr_sparts); + message("nr_bparts = %zu.", e->s->nr_bparts); if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -2719,6 +2829,66 @@ void engine_collect_end_of_step_recurse_stars(struct cell *c, c->stars.inhibited = inhibited; } +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_black_holes(struct cell *c, + const struct engine *e) { + +/* Skip super-cells (Their values are already set) */ +#ifdef WITH_MPI + // MATTHIEU + if (c->timestep != NULL) return; // || c->mpi.black_holes.recv_ti != NULL) return; +#else + if (c->timestep != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0, inhibited = 0; + integertime_t ti_black_holes_end_min = max_nr_timesteps, ti_black_holes_end_max = 0, + ti_black_holes_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->black_holes.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_black_holes(cp, e); + + /* And update */ + ti_black_holes_end_min = min(ti_black_holes_end_min, cp->black_holes.ti_end_min); + ti_black_holes_end_max = max(ti_black_holes_end_max, cp->black_holes.ti_end_max); + ti_black_holes_beg_max = max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max); + + updated += cp->black_holes.updated; + inhibited += cp->black_holes.inhibited; + + /* Collected, so clear for next time. */ + cp->black_holes.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->black_holes.ti_end_min = ti_black_holes_end_min; + c->black_holes.ti_end_max = ti_black_holes_end_max; + c->black_holes.ti_beg_max = ti_black_holes_beg_max; + c->black_holes.updated = updated; + c->black_holes.inhibited = inhibited; +} + + /** * @brief Mapping function to collect the data from the end of the step * @@ -2740,23 +2910,26 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, const int with_ext_grav = (e->policy & engine_policy_external_gravity); const int with_grav = (with_self_grav || with_ext_grav); const int with_stars = (e->policy & engine_policy_stars); + const int with_black_holes = (e->policy & engine_policy_black_holes); struct space *s = e->s; int *local_cells = (int *)map_data; /* Local collectible */ - size_t updated = 0, g_updated = 0, s_updated = 0; - size_t inhibited = 0, g_inhibited = 0, s_inhibited = 0; + size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; + size_t inhibited = 0, g_inhibited = 0, s_inhibited = 0, b_inhibited = 0; integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, ti_gravity_beg_max = 0; integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, ti_stars_beg_max = 0; + integertime_t ti_black_holes_end_min = max_nr_timesteps, ti_black_holes_end_max = 0, + ti_black_holes_beg_max = 0; for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &s->cells_top[local_cells[ind]]; - if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0) { + if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 || c->black_holes.count > 0) { /* Make the top-cells recurse */ if (with_hydro) { @@ -2768,6 +2941,9 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, if (with_stars) { engine_collect_end_of_step_recurse_stars(c, e); } + if (with_black_holes) { + engine_collect_end_of_step_recurse_black_holes(c, e); + } /* And aggregate */ if (c->hydro.ti_end_min > e->ti_current) @@ -2785,18 +2961,26 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max); ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max); + if (c->black_holes.ti_end_min > e->ti_current) + ti_black_holes_end_min = min(ti_black_holes_end_min, c->black_holes.ti_end_min); + ti_black_holes_end_max = max(ti_black_holes_end_max, c->black_holes.ti_end_max); + ti_black_holes_beg_max = max(ti_black_holes_beg_max, c->black_holes.ti_beg_max); + updated += c->hydro.updated; g_updated += c->grav.updated; s_updated += c->stars.updated; + b_updated += c->black_holes.updated; inhibited += c->hydro.inhibited; g_inhibited += c->grav.inhibited; s_inhibited += c->stars.inhibited; + b_inhibited += c->black_holes.inhibited; /* Collected, so clear for next time. */ c->hydro.updated = 0; c->grav.updated = 0; c->stars.updated = 0; + c->black_holes.updated = 0; } } @@ -2806,10 +2990,12 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, data->updated += updated; data->g_updated += g_updated; data->s_updated += s_updated; + data->b_updated += b_updated; data->inhibited += inhibited; data->g_inhibited += g_inhibited; data->s_inhibited += s_inhibited; + data->b_inhibited += b_inhibited; if (ti_hydro_end_min > e->ti_current) data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min); @@ -2828,8 +3014,13 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max); data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max); - } + if (ti_black_holes_end_min > e->ti_current) + data->ti_black_holes_end_min = min(ti_black_holes_end_min, data->ti_black_holes_end_min); + data->ti_black_holes_end_max = max(ti_black_holes_end_max, data->ti_black_holes_end_max); + data->ti_black_holes_beg_max = max(ti_black_holes_beg_max, data->ti_black_holes_beg_max); + } + if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); } @@ -2855,14 +3046,16 @@ void engine_collect_end_of_step(struct engine *e, int apply) { const ticks tic = getticks(); struct space *s = e->s; struct end_of_step_data data; - data.updated = 0, data.g_updated = 0, data.s_updated = 0; - data.inhibited = 0, data.g_inhibited = 0, data.s_inhibited = 0; + data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0; + data.inhibited = 0, data.g_inhibited = 0, data.s_inhibited = 0, data.b_inhibited = 0; data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0, data.ti_hydro_beg_max = 0; data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0, data.ti_gravity_beg_max = 0; data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0, data.ti_stars_beg_max = 0; + data.ti_black_holes_end_min = max_nr_timesteps, data.ti_black_holes_end_max = 0, + data.ti_black_holes_beg_max = 0; data.e = e; /* Collect information from the local top-level cells */ @@ -2874,14 +3067,16 @@ void engine_collect_end_of_step(struct engine *e, int apply) { s->nr_inhibited_parts = data.inhibited; s->nr_inhibited_gparts = data.g_inhibited; s->nr_inhibited_sparts = data.s_inhibited; + s->nr_inhibited_bparts = data.b_inhibited; /* Store these in the temporary collection group. */ collectgroup1_init( - &e->collect_group1, data.updated, data.g_updated, data.s_updated, - data.inhibited, data.g_inhibited, data.s_inhibited, data.ti_hydro_end_min, + &e->collect_group1, data.updated, data.g_updated, data.s_updated, data.b_updated, + data.inhibited, data.g_inhibited, data.s_inhibited, data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max, data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max, data.ti_gravity_beg_max, data.ti_stars_end_min, - data.ti_stars_end_max, data.ti_stars_beg_max, e->forcerebuild, + data.ti_stars_end_max, data.ti_stars_beg_max, data.ti_black_holes_end_min, + data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild, e->s->tot_cells, e->sched.nr_tasks, (float)e->sched.nr_tasks / (float)e->s->tot_cells); @@ -3419,11 +3614,11 @@ void engine_step(struct engine *e) { /* Print some information to the screen */ printf( - " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %21.3f " - "%6d\n", + " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %12lld " + "%21.3f %6d\n", e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step, e->min_active_bin, e->max_active_bin, e->updates, e->g_updates, - e->s_updates, e->wallclock_time, e->step_props); + e->s_updates, e->b_updates, e->wallclock_time, e->step_props); #ifdef SWIFT_DEBUG_CHECKS fflush(stdout); #endif @@ -3431,11 +3626,11 @@ void engine_step(struct engine *e) { if (!e->restarting) fprintf( e->file_timesteps, - " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %21.3f " - "%6d\n", + " %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %12lld " + "%21.3f %6d\n", e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step, e->min_active_bin, e->max_active_bin, e->updates, e->g_updates, - e->s_updates, e->wallclock_time, e->step_props); + e->s_updates, e->b_updates, e->wallclock_time, e->step_props); #ifdef SWIFT_DEBUG_CHECKS fflush(e->file_timesteps); #endif @@ -3570,6 +3765,7 @@ void engine_step(struct engine *e) { e->updates_since_rebuild += e->collect_group1.updated; e->g_updates_since_rebuild += e->collect_group1.g_updated; e->s_updates_since_rebuild += e->collect_group1.s_updated; + e->b_updates_since_rebuild += e->collect_group1.b_updated; #ifdef SWIFT_DEBUG_CHECKS if (e->ti_end_min == e->ti_current && e->ti_end_min < max_nr_timesteps) @@ -4271,6 +4467,25 @@ void engine_split(struct engine *e, struct partition *initial_partition) { if (s->nr_sparts > 0 && s->nr_gparts > 0) part_relink_gparts_to_sparts(s->sparts, s->nr_sparts, 0); + /* Re-allocate the local bparts. */ + if (e->verbose) + message("Re-allocating bparts array from %zu to %zu.", s->size_bparts, + (size_t)(s->nr_bparts * engine_redistribute_alloc_margin)); + s->size_bparts = s->nr_bparts * engine_redistribute_alloc_margin; + struct bpart *bparts_new = NULL; + if (swift_memalign("bparts", (void **)&bparts_new, bpart_align, + sizeof(struct bpart) * s->size_bparts) != 0) + error("Failed to allocate new bpart data."); + + if (s->nr_bparts > 0) + memcpy(bparts_new, s->bparts, sizeof(struct bpart) * s->nr_bparts); + swift_free("bparts", s->bparts); + s->bparts = bparts_new; + + /* Re-link the gparts to their bparts. */ + if (s->nr_bparts > 0 && s->nr_gparts > 0) + part_relink_gparts_to_bparts(s->bparts, s->nr_bparts, 0); + /* Re-allocate the local gparts. */ if (e->verbose) message("Re-allocating gparts array from %zu to %zu.", s->size_gparts, @@ -4294,6 +4509,10 @@ void engine_split(struct engine *e, struct partition *initial_partition) { if (s->nr_sparts > 0 && s->nr_gparts > 0) part_relink_sparts_to_gparts(s->gparts, s->nr_gparts, s->sparts); + /* Re-link the bparts. */ + if (s->nr_bparts > 0 && s->nr_gparts > 0) + part_relink_bparts_to_gparts(s->gparts, s->nr_gparts, s->bparts); + #ifdef SWIFT_DEBUG_CHECKS /* Verify that the links are correct */ @@ -4955,9 +5174,9 @@ void engine_config(int restart, struct engine *e, struct swift_params *params, engine_step_prop_stf, engine_step_prop_logger_index); fprintf(e->file_timesteps, - "# %6s %14s %12s %12s %14s %9s %12s %12s %12s %16s [%s] %6s\n", + "# %6s %14s %12s %12s %14s %9s %12s %12s %12s %12s %16s [%s] %6s\n", "Step", "Time", "Scale-factor", "Redshift", "Time-step", - "Time-bins", "Updates", "g-Updates", "s-Updates", + "Time-bins", "Updates", "g-Updates", "s-Updates", "b-Updates", "Wall-clock time", clocks_getunit(), "Props"); fflush(e->file_timesteps); } @@ -5606,7 +5825,7 @@ void engine_recompute_displacement_constraint(struct engine *e) { FLT_MAX, FLT_MAX, e->s->min_spart_mass, - FLT_MAX}; + e->s->min_bpart_mass}; #ifdef SWIFT_DEBUG_CHECKS /* Check that the minimal mass collection worked */ float min_part_mass_check = FLT_MAX; @@ -5630,7 +5849,7 @@ void engine_recompute_displacement_constraint(struct engine *e) { 0.f, 0.f, e->s->sum_spart_vel_norm, - 0.f}; + e->s->sum_spart_vel_norm}; #ifdef WITH_MPI MPI_Allreduce(MPI_IN_PLACE, vel_norm, swift_type_count, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); @@ -5644,15 +5863,15 @@ void engine_recompute_displacement_constraint(struct engine *e) { 0.f, 0.f, (float)e->total_nr_sparts, - 0.f}; + (float)e->total_nr_bparts}; /* Count of particles for the two species */ const float N_dm = count_parts[1]; - const float N_b = count_parts[0] + count_parts[4]; + const float N_b = count_parts[0] + count_parts[4] + count_parts[5]; /* Peculiar motion norm for the two species */ const float vel_norm_dm = vel_norm[1]; - const float vel_norm_b = vel_norm[0] + vel_norm[4]; + const float vel_norm_b = vel_norm[0] + vel_norm[4] + vel_norm[5]; /* Mesh forces smoothing scale */ float r_s; @@ -5683,7 +5902,7 @@ void engine_recompute_displacement_constraint(struct engine *e) { if (N_b > 0.f) { /* Minimal mass for the baryons */ - const float min_mass_b = min(min_mass[0], min_mass[4]); + const float min_mass_b = min3(min_mass[0], min_mass[4], min_mass[5]); /* Inter-particle sepration for the baryons */ const float d_b = cbrtf(min_mass_b / (Ob * rho_crit0)); diff --git a/src/engine.h b/src/engine.h index c9557c189cae013e3c21c02b54ca65c8c9907ec8..42e7993612b55cd61fc8f3dfc87b440368c51e4a 100644 --- a/src/engine.h +++ b/src/engine.h @@ -204,6 +204,15 @@ struct engine { /* Maximal stars ti_beg for the next time-step */ integertime_t ti_stars_beg_max; + /* Minimal black holes ti_end for the next time-step */ + integertime_t ti_black_holes_end_min; + + /* Maximal black holes ti_end for the next time-step */ + integertime_t ti_black_holes_end_max; + + /* Maximal black holes ti_beg for the next time-step */ + integertime_t ti_black_holes_beg_max; + /* Minimal overall ti_end for the next time-step */ integertime_t ti_end_min; @@ -473,7 +482,8 @@ void engine_exchange_strays(struct engine *e, const size_t offset_parts, const int *ind_part, size_t *Npart, const size_t offset_gparts, const int *ind_gpart, size_t *Ngpart, const size_t offset_sparts, - const int *ind_spart, size_t *Nspart); + const int *ind_spart, size_t *Nspart, const size_t offset_bparts, + const int *ind_bpart, size_t *Nbpart); void engine_rebuild(struct engine *e, int redistributed, int clean_h_values); void engine_repartition(struct engine *e); void engine_repartition_trigger(struct engine *e); diff --git a/src/parallel_io.c b/src/parallel_io.c index 0160a53c110c8913e42e0d7cd7c8720a9ed3d331..c093f91bc063d408893e620b139d0d802e322904 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -37,6 +37,7 @@ #include "parallel_io.h" /* Local includes. */ +#include "black_holes_io.h" #include "chemistry_io.h" #include "common_io.h" #include "cooling_io.h" @@ -651,14 +652,17 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, * @param parts (output) The array of #part read from the file. * @param gparts (output) The array of #gpart read from the file. * @param sparts (output) The array of #spart read from the file. + * @param bparts (output) The array of #bpart read from the file. * @param Ngas (output) The number of particles read from the file. * @param Ngparts (output) The number of particles read from the file. * @param Nstars (output) The number of particles read from the file. + * @param Nblackholes (output) The number of particles read from the file. * @param flag_entropy (output) 1 if the ICs contained Entropy in the * InternalEnergy field * @param with_hydro Are we running with hydro ? * @param with_gravity Are we running with gravity ? * @param with_stars Are we running with stars ? + * @param with_black_holes Are we running with black holes ? * @param cleanup_h Are we cleaning-up h-factors from the quantities we read? * @param cleanup_sqrt_a Are we cleaning-up the sqrt(a) factors in the Gadget * IC velocities? @@ -674,9 +678,9 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, */ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nstars, int* flag_entropy, int with_hydro, - int with_gravity, int with_stars, int cleanup_h, + struct spart** sparts, struct bpart** bparts, size_t* Ngas, size_t* Ngparts, + size_t* Nstars, size_t* Nblackholes, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int with_black_holes, int cleanup_h, int cleanup_sqrt_a, double h, double a, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, int n_threads, int dry_run) { @@ -692,6 +696,9 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, int dimension = 3; /* Assume 3D if nothing is specified */ size_t Ndm = 0; + /* Initialise counters */ + *Ngas = 0, *Ngparts = 0, *Nstars = 0, *Nblackholes = 0; + /* Open file */ /* message("Opening file '%s' as IC.", fileName); */ hid_t h_plist_id = H5Pcreate(H5P_FILE_ACCESS); @@ -831,12 +838,22 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, bzero(*sparts, *Nstars * sizeof(struct spart)); } + /* Allocate memory to store black hole particles */ + if (with_black_holes) { + *Nblackholes = N[swift_type_black_hole]; + if (swift_memalign("sparts", (void**)bparts, bpart_align, + *Nblackholes * sizeof(struct bpart)) != 0) + error("Error while allocating memory for black_holes particles"); + bzero(*bparts, *Nblackholes * sizeof(struct bpart)); + } + /* Allocate memory to store gravity particles */ if (with_gravity) { Ndm = N[1]; *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + - (with_stars ? N[swift_type_stars] : 0); + (with_stars ? N[swift_type_stars] : 0) + + (with_black_holes ? N[swift_type_black_hole] : 0); if (swift_memalign("gparts", (void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -892,6 +909,13 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, } break; + case swift_type_black_hole: + if (with_black_holes) { + Nparticles = *Nblackholes; + black_holes_read_particles(*bparts, list, &num_fields); + } + break; + default: if (mpi_rank == 0) message("Particle Type %d not yet supported. Particles ignored", @@ -925,6 +949,10 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, if (with_stars) io_duplicate_stars_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); + /* Duplicate the stars particles into gparts */ + if (with_black_holes) + io_duplicate_black_holes_gparts(&tp, *bparts, *gparts, *Nblackholes, Ndm + *Ngas + *Nstars); + threadpool_clean(&tp); } @@ -957,6 +985,7 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], const struct xpart* xparts = e->s->xparts; const struct gpart* gparts = e->s->gparts; const struct spart* sparts = e->s->sparts; + const struct bpart* bparts = e->s->bparts; struct swift_params* params = e->parameter_file; const int with_cosmology = e->policy & engine_policy_cosmology; const int with_cooling = e->policy & engine_policy_cooling; @@ -1182,6 +1211,13 @@ void prepare_file(struct engine* e, const char* baseName, long long N_total[6], } break; + case swift_type_black_hole: + black_holes_write_particles(bparts, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_bparts(bparts, list + num_fields); + } + break; + default: error("Particle Type %d not yet supported. Aborting", ptype); } @@ -1245,6 +1281,7 @@ void write_output_parallel(struct engine* e, const char* baseName, const struct xpart* xparts = e->s->xparts; const struct gpart* gparts = e->s->gparts; const struct spart* sparts = e->s->sparts; + const struct bpart* bparts = e->s->bparts; struct swift_params* params = e->parameter_file; const int with_cosmology = e->policy & engine_policy_cosmology; const int with_cooling = e->policy & engine_policy_cooling; @@ -1260,6 +1297,7 @@ void write_output_parallel(struct engine* e, const char* baseName, const size_t Ntot = e->s->nr_gparts; const size_t Ngas = e->s->nr_parts; const size_t Nstars = e->s->nr_sparts; + const size_t Nblackholes = e->s->nr_bparts; // const size_t Nbaryons = Ngas + Nstars; // const size_t Ndm = Ntot > 0 ? Ntot - Nbaryons : 0; @@ -1270,6 +1308,8 @@ void write_output_parallel(struct engine* e, const char* baseName, e->s->nr_parts - e->s->nr_inhibited_parts - e->s->nr_extra_parts; const size_t Nstars_written = e->s->nr_sparts - e->s->nr_inhibited_sparts - e->s->nr_extra_sparts; + const size_t Nblackholes_written = + e->s->nr_bparts - e->s->nr_inhibited_bparts - e->s->nr_extra_bparts; const size_t Nbaryons_written = Ngas_written + Nstars_written; const size_t Ndm_written = Ntot_written > 0 ? Ntot_written - Nbaryons_written : 0; @@ -1458,6 +1498,7 @@ void write_output_parallel(struct engine* e, const char* baseName, struct gpart* gparts_written = NULL; struct velociraptor_gpart_data* gpart_group_data_written = NULL; struct spart* sparts_written = NULL; + struct bpart* bparts_written = NULL; /* Write particle fields from the particle structure */ switch (ptype) { @@ -1608,6 +1649,39 @@ void write_output_parallel(struct engine* e, const char* baseName, } } break; + case swift_type_black_hole: { + if (Nblackholes == Nblackholes_written) { + + /* No inhibted particles: easy case */ + Nparticles = Nblackholes; + black_holes_write_particles(bparts, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_bparts(bparts, list + num_fields); + } + } else { + + /* Ok, we need to fish out the particles we want */ + Nparticles = Nblackholes_written; + + /* Allocate temporary arrays */ + if (swift_memalign("bparts_written", (void**)&bparts_written, + bpart_align, + Nblackholes_written * sizeof(struct bpart)) != 0) + error("Error while allocating temporart memory for bparts"); + + /* Collect the particles we want to write */ + io_collect_bparts_to_write(bparts, bparts_written, Nblackholes, + Nblackholes_written); + + /* Select the fields to write */ + black_holes_write_particles(bparts_written, list, &num_fields); + if (with_stf) { + num_fields += + velociraptor_write_bparts(bparts_written, list + num_fields); + } + } + } break; + default: error("Particle Type %d not yet supported. Aborting", ptype); } @@ -1634,6 +1708,7 @@ void write_output_parallel(struct engine* e, const char* baseName, if (gpart_group_data_written) swift_free("gpart_group_written", gpart_group_data_written); if (sparts_written) swift_free("sparts_written", sparts_written); + if (bparts_written) swift_free("bparts_written", bparts_written); #ifdef IO_SPEED_MEASUREMENT MPI_Barrier(MPI_COMM_WORLD); diff --git a/src/parallel_io.h b/src/parallel_io.h index 9cd775347f0d5fbb3bc1b17664e0d5dba734d795..3bcfcdba2d25a5e29396768c046f7560ad2be739 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -35,9 +35,10 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nsparts, int* flag_entropy, int with_hydro, - int with_gravity, int with_stars, int cleanup_h, + struct spart** sparts, struct bpart** bparts, size_t* Ngas, + size_t* Ngparts, + size_t* Nsparts, size_t *Nbparts, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int with_black_holes, int cleanup_h, int cleanup_sqrt_a, double h, double a, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, int nr_threads, int dry_run); @@ -47,13 +48,6 @@ void write_output_parallel(struct engine* e, const char* baseName, const struct unit_system* snapshot_units, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info); - -void writeArray(struct engine* e, hid_t grp, char* fileName, - char* partTypeGroupName, struct io_props props, size_t N, - long long N_total, int mpi_rank, long long offset, - const struct unit_system* internal_units, - const struct unit_system* snapshot_units); - #endif #endif /* SWIFT_PARALLEL_IO_H */ diff --git a/src/part.c b/src/part.c index 7fb27acae568a86e4f439fdac01dfe39374f9b54..d58a10a91d8c7cb2c4cdfd0b1df2eaffd3633390 100644 --- a/src/part.c +++ b/src/part.c @@ -64,6 +64,22 @@ void part_relink_gparts_to_sparts(struct spart *sparts, size_t N, } } +/** + * @brief Re-link the #gpart%s associated with the list of #bpart%s. + * + * @param bparts The list of #bpart. + * @param N The number of s-particles to re-link; + * @param offset The offset of #bpart%s relative to the global bparts list. + */ +void part_relink_gparts_to_bparts(struct bpart *bparts, size_t N, + ptrdiff_t offset) { + for (size_t k = 0; k < N; k++) { + if (bparts[k].gpart) { + bparts[k].gpart->id_or_neg_offset = -(k + offset); + } + } +} + /** * @brief Re-link the #part%s associated with the list of #gpart%s. * @@ -243,7 +259,7 @@ void part_verify_links(struct part *parts, struct gpart *gparts, gparts[k].x[2] != bpart->x[2]) error( "Linked particles are not at the same position !\n" - "gp->x=[%e %e %e] sp->x=[%e %e %e] diff=[%e %e %e]", + "gp->x=[%e %e %e] bp->x=[%e %e %e] diff=[%e %e %e]", gparts[k].x[0], gparts[k].x[1], gparts[k].x[2], bpart->x[0], bpart->x[1], bpart->x[2], gparts[k].x[0] - bpart->x[0], gparts[k].x[1] - bpart->x[1], gparts[k].x[2] - bpart->x[2]); diff --git a/src/part.h b/src/part.h index 3b2f01180d67a597b71086c46283dae2d7de49b9..d782b5b70fb2fa9a239a8760338436fbf087a0d0 100644 --- a/src/part.h +++ b/src/part.h @@ -115,6 +115,8 @@ void part_relink_gparts_to_parts(struct part *parts, size_t N, ptrdiff_t offset); void part_relink_gparts_to_sparts(struct spart *sparts, size_t N, ptrdiff_t offset); +void part_relink_gparts_to_bparts(struct bpart *bparts, size_t N, + ptrdiff_t offset); void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, struct part *parts); void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, diff --git a/src/proxy.c b/src/proxy.c index d9f27ad63cc4c888c14048de83a1576000aacac6..ee2ba818541fdb874fff9c865ac4bb2ee02b371c 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -584,7 +584,8 @@ void proxy_parts_exchange_first(struct proxy *p) { p->buff_out[0] = p->nr_parts_out; p->buff_out[1] = p->nr_gparts_out; p->buff_out[2] = p->nr_sparts_out; - if (MPI_Isend(p->buff_out, 3, MPI_INT, p->nodeID, + p->buff_out[3] = p->nr_bparts_out; + if (MPI_Isend(p->buff_out, 4, MPI_INT, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_out) != MPI_SUCCESS) error("Failed to isend nr of parts."); @@ -612,7 +613,7 @@ void proxy_parts_exchange_first(struct proxy *p) { p->mynodeID * proxy_tag_shift + proxy_tag_gparts, MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS) error("Failed to isend gpart data."); - // message( "isent gpart data (%i) to node %i." , p->nr_parts_out , + // message( "isent gpart data (%i) to node %i." , p->nr_gparts_out , // p->nodeID ); fflush(stdout); } @@ -621,12 +622,20 @@ void proxy_parts_exchange_first(struct proxy *p) { p->mynodeID * proxy_tag_shift + proxy_tag_sparts, MPI_COMM_WORLD, &p->req_sparts_out) != MPI_SUCCESS) error("Failed to isend spart data."); - // message( "isent gpart data (%i) to node %i." , p->nr_parts_out , + // message( "isent spart data (%i) to node %i." , p->nr_sparts_out , + // p->nodeID ); fflush(stdout); + } + if (p->nr_bparts_out > 0) { + if (MPI_Isend(p->bparts_out, p->nr_bparts_out, bpart_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_bparts, + MPI_COMM_WORLD, &p->req_bparts_out) != MPI_SUCCESS) + error("Failed to isend bpart data."); + // message( "isent bpart data (%i) to node %i." , p->nr_bparts_out , // p->nodeID ); fflush(stdout); } /* Receive the number of particles. */ - if (MPI_Irecv(p->buff_in, 3, MPI_INT, p->nodeID, + if (MPI_Irecv(p->buff_in, 4, MPI_INT, p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_in) != MPI_SUCCESS) error("Failed to irecv nr of parts."); @@ -644,6 +653,7 @@ void proxy_parts_exchange_second(struct proxy *p) { p->nr_parts_in = p->buff_in[0]; p->nr_gparts_in = p->buff_in[1]; p->nr_sparts_in = p->buff_in[2]; + p->nr_bparts_in = p->buff_in[3]; /* Is there enough space in the buffers? */ if (p->nr_parts_in > p->size_parts_in) { @@ -676,6 +686,15 @@ void proxy_parts_exchange_second(struct proxy *p) { "sparts_in", sizeof(struct spart) * p->size_sparts_in)) == NULL) error("Failed to re-allocate sparts_in buffers."); } + if (p->nr_bparts_in > p->size_bparts_in) { + do { + p->size_bparts_in *= proxy_buffgrow; + } while (p->nr_bparts_in > p->size_bparts_in); + swift_free("bparts_in", p->bparts_in); + if ((p->bparts_in = (struct bpart *)swift_malloc( + "bparts_in", sizeof(struct bpart) * p->size_bparts_in)) == NULL) + error("Failed to re-allocate bparts_in buffers."); + } /* Receive the particle buffers. */ if (p->nr_parts_in > 0) { @@ -702,7 +721,15 @@ void proxy_parts_exchange_second(struct proxy *p) { p->nodeID * proxy_tag_shift + proxy_tag_sparts, MPI_COMM_WORLD, &p->req_sparts_in) != MPI_SUCCESS) error("Failed to irecv spart data."); - // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in , + // message( "irecv spart data (%i) from node %i." , p->nr_sparts_in , + // p->nodeID ); fflush(stdout); + } + if (p->nr_bparts_in > 0) { + if (MPI_Irecv(p->bparts_in, p->nr_bparts_in, bpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_bparts, + MPI_COMM_WORLD, &p->req_bparts_in) != MPI_SUCCESS) + error("Failed to irecv bpart data."); + // message( "irecv bpart data (%i) from node %i." , p->nr_bparts_in , // p->nodeID ); fflush(stdout); } @@ -810,6 +837,36 @@ void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N) { p->nr_sparts_out += N; } +/** + * @brief Load bparts onto a proxy for exchange. + * + * @param p The #proxy. + * @param bparts Pointer to an array of #bpart to send. + * @param N The number of bparts. + */ +void proxy_bparts_load(struct proxy *p, const struct bpart *bparts, int N) { + + /* Is there enough space in the buffer? */ + if (p->nr_bparts_out + N > p->size_bparts_out) { + do { + p->size_bparts_out *= proxy_buffgrow; + } while (p->nr_bparts_out + N > p->size_bparts_out); + struct bpart *tp; + if ((tp = (struct bpart *)swift_malloc( + "bparts_out", sizeof(struct bpart) * p->size_bparts_out)) == NULL) + error("Failed to re-allocate bparts_out buffers."); + memcpy(tp, p->bparts_out, sizeof(struct bpart) * p->nr_bparts_out); + swift_free("bparts_out", p->bparts_out); + p->bparts_out = tp; + } + + /* Copy the parts and xparts data to the buffer. */ + memcpy(&p->bparts_out[p->nr_bparts_out], bparts, sizeof(struct bpart) * N); + + /* Increase the counters. */ + p->nr_bparts_out += N; +} + /** * @brief Initialize the given proxy. * @@ -896,6 +953,22 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { error("Failed to allocate sparts_out buffers."); } p->nr_sparts_out = 0; + + /* Allocate the bpart send and receive buffers, if needed. */ + if (p->bparts_in == NULL) { + p->size_bparts_in = proxy_buffinit; + if ((p->bparts_in = (struct bpart *)swift_malloc( + "bparts_in", sizeof(struct bpart) * p->size_bparts_in)) == NULL) + error("Failed to allocate bparts_in buffers."); + } + p->nr_bparts_in = 0; + if (p->bparts_out == NULL) { + p->size_bparts_out = proxy_buffinit; + if ((p->bparts_out = (struct bpart *)swift_malloc( + "bparts_out", sizeof(struct bpart) * p->size_bparts_out)) == NULL) + error("Failed to allocate bparts_out buffers."); + } + p->nr_bparts_out = 0; } /** diff --git a/src/proxy.h b/src/proxy.h index 2e3f350333d9e6fdb09161f852cf3a143c60e7ce..c59c8ff84356188ab7935ab2151f1c8075045095 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -35,7 +35,8 @@ #define proxy_tag_xparts 2 #define proxy_tag_gparts 3 #define proxy_tag_sparts 4 -#define proxy_tag_cells 5 +#define proxy_tag_bparts 5 +#define proxy_tag_cells 6 /** * @brief The different reasons a cell can be in a proxy @@ -69,15 +70,18 @@ struct proxy { struct xpart *xparts_in, *xparts_out; struct gpart *gparts_in, *gparts_out; struct spart *sparts_in, *sparts_out; + struct bpart *bparts_in, *bparts_out; int size_parts_in, size_parts_out; int nr_parts_in, nr_parts_out; int size_gparts_in, size_gparts_out; int nr_gparts_in, nr_gparts_out; int size_sparts_in, size_sparts_out; int nr_sparts_in, nr_sparts_out; + int size_bparts_in, size_bparts_out; + int nr_bparts_in, nr_bparts_out; /* Buffer to hold the incomming/outgoing particle counts. */ - int buff_out[3], buff_in[3]; + int buff_out[4], buff_in[4]; /* MPI request handles. */ #ifdef WITH_MPI @@ -86,6 +90,7 @@ struct proxy { MPI_Request req_xparts_out, req_xparts_in; MPI_Request req_gparts_out, req_gparts_in; MPI_Request req_sparts_out, req_sparts_in; + MPI_Request req_bparts_out, req_bparts_in; MPI_Request req_cells_count_out, req_cells_count_in; MPI_Request req_cells_out, req_cells_in; #endif @@ -97,6 +102,7 @@ void proxy_parts_load(struct proxy *p, const struct part *parts, const struct xpart *xparts, int N); void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N); void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N); +void proxy_bparts_load(struct proxy *p, const struct bpart *bparts, int N); void proxy_parts_exchange_first(struct proxy *p); void proxy_parts_exchange_second(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c, int type); diff --git a/src/serial_io.c b/src/serial_io.c index 55a468411b7b9f84d4e05b8f388a5de7873f7f59..7b8797ada40df798bc1f7c0ea1390f9301089d07 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -37,6 +37,7 @@ #include "serial_io.h" /* Local includes. */ +#include "black_holes_io.h" #include "chemistry_io.h" #include "common_io.h" #include "cooling_io.h" @@ -431,14 +432,17 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, * @param parts (output) The array of #part (gas particles) read from the file. * @param gparts (output) The array of #gpart read from the file. * @param sparts (output) Array of #spart particles. + * @param bparts (output) Array of #bpart particles. * @param Ngas (output) The number of #part read from the file on that node. * @param Ngparts (output) The number of #gpart read from the file on that node. * @param Nstars (output) The number of #spart read from the file on that node. + * @param Nblackholes (output) The number of #bpart read from the file on that node. * @param flag_entropy (output) 1 if the ICs contained Entropy in the * InternalEnergy field * @param with_hydro Are we reading gas particles ? * @param with_gravity Are we reading/creating #gpart arrays ? * @param with_stars Are we reading star particles ? + * @param with_black_holes Are we reading black hole particles ? * @param cleanup_h Are we cleaning-up h-factors from the quantities we read? * @param cleanup_sqrt_a Are we cleaning-up the sqrt(a) factors in the Gadget * IC velocities? @@ -461,9 +465,9 @@ void writeArray(const struct engine* e, hid_t grp, char* fileName, */ void read_ic_serial(char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nstars, int* flag_entropy, int with_hydro, - int with_gravity, int with_stars, int cleanup_h, + struct spart** sparts, struct bpart** bparts, size_t* Ngas, size_t* Ngparts, + size_t* Nstars, size_t *Nblackholes, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int with_black_holes, int cleanup_h, int cleanup_sqrt_a, double h, double a, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, int n_threads, int dry_run) { @@ -482,6 +486,9 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, struct unit_system* ic_units = (struct unit_system*)malloc(sizeof(struct unit_system)); + /* Initialise counters */ + *Ngas = 0, *Ngparts = 0, *Nstars = 0, *Nblackholes = 0; + /* First read some information about the content */ if (mpi_rank == 0) { @@ -630,12 +637,22 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, bzero(*sparts, *Nstars * sizeof(struct spart)); } + /* Allocate memory to store stars particles */ + if (with_black_holes) { + *Nblackholes = N[swift_type_black_hole]; + if (swift_memalign("bparts", (void**)bparts, bpart_align, + *Nblackholes * sizeof(struct bpart)) != 0) + error("Error while allocating memory for black hole particles"); + bzero(*bparts, *Nblackholes * sizeof(struct bpart)); + } + /* Allocate memory to store all gravity particles */ if (with_gravity) { Ndm = N[1]; *Ngparts = (with_hydro ? N[swift_type_gas] : 0) + N[swift_type_dark_matter] + - (with_stars ? N[swift_type_stars] : 0); + (with_stars ? N[swift_type_stars] : 0) + + (with_black_holes ? N[swift_type_black_hole] : 0); if (swift_memalign("gparts", (void**)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -703,6 +720,13 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, } break; + case swift_type_black_hole: + if (with_black_holes) { + Nparticles = *Nblackholes; + black_holes_read_particles(*bparts, list, &num_fields); + } + break; + default: if (mpi_rank == 0) message("Particle Type %d not yet supported. Particles ignored", @@ -745,6 +769,10 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, if (with_stars) io_duplicate_stars_gparts(&tp, *sparts, *gparts, *Nstars, Ndm + *Ngas); + /* Duplicate the black holes particles into gparts */ + if (with_black_holes) + io_duplicate_black_holes_gparts(&tp, *bparts, *gparts, *Nblackholes, Ndm + *Ngas + *Nstars); + threadpool_clean(&tp); } @@ -785,6 +813,7 @@ void write_output_serial(struct engine* e, const char* baseName, const struct xpart* xparts = e->s->xparts; const struct gpart* gparts = e->s->gparts; const struct spart* sparts = e->s->sparts; + const struct bpart* bparts = e->s->bparts; struct swift_params* params = e->parameter_file; const int with_cosmology = e->policy & engine_policy_cosmology; const int with_cooling = e->policy & engine_policy_cooling; @@ -802,6 +831,7 @@ void write_output_serial(struct engine* e, const char* baseName, const size_t Ntot = e->s->nr_gparts; const size_t Ngas = e->s->nr_parts; const size_t Nstars = e->s->nr_sparts; + const size_t Nblackholes = e->s->nr_bparts; // const size_t Nbaryons = Ngas + Nstars; // const size_t Ndm = Ntot > 0 ? Ntot - Nbaryons : 0; @@ -812,7 +842,9 @@ void write_output_serial(struct engine* e, const char* baseName, e->s->nr_parts - e->s->nr_inhibited_parts - e->s->nr_extra_parts; const size_t Nstars_written = e->s->nr_sparts - e->s->nr_inhibited_sparts - e->s->nr_extra_sparts; - const size_t Nbaryons_written = Ngas_written + Nstars_written; + const size_t Nblackholes_written = + e->s->nr_bparts - e->s->nr_inhibited_bparts - e->s->nr_extra_bparts; + const size_t Nbaryons_written = Ngas_written + Nstars_written + Nblackholes_written; const size_t Ndm_written = Ntot_written > 0 ? Ntot_written - Nbaryons_written : 0; @@ -827,7 +859,7 @@ void write_output_serial(struct engine* e, const char* baseName, /* Compute offset in the file and total number of particles */ size_t N[swift_type_count] = { - Ngas_written, Ndm_written, 0, 0, Nstars_written, 0}; + Ngas_written, Ndm_written, 0, 0, Nstars_written, Nblackholes_written}; long long N_total[swift_type_count] = {0}; long long offset[swift_type_count] = {0}; MPI_Exscan(&N, &offset, swift_type_count, MPI_LONG_LONG_INT, MPI_SUM, comm); @@ -1103,6 +1135,7 @@ void write_output_serial(struct engine* e, const char* baseName, struct gpart* gparts_written = NULL; struct velociraptor_gpart_data* gpart_group_data_written = NULL; struct spart* sparts_written = NULL; + struct bpart* bparts_written = NULL; /* Write particle fields from the particle structure */ switch (ptype) { @@ -1256,6 +1289,40 @@ void write_output_serial(struct engine* e, const char* baseName, } } break; + case swift_type_black_hole: { + if (Nblackholes == Nblackholes_written) { + + /* No inhibted particles: easy case */ + Nparticles = Nblackholes; + black_holes_write_particles(bparts, list, &num_fields); + if (with_stf) { + num_fields += + velociraptor_write_bparts(bparts, list + num_fields); + } + } else { + + /* Ok, we need to fish out the particles we want */ + Nparticles = Nblackholes_written; + + /* Allocate temporary arrays */ + if (swift_memalign("bparts_written", (void**)&bparts_written, + bpart_align, + Nblackholes_written * sizeof(struct bpart)) != 0) + error("Error while allocating temporart memory for bparts"); + + /* Collect the particles we want to write */ + io_collect_bparts_to_write(bparts, bparts_written, Nblackholes, + Nblackholes_written); + + /* Select the fields to write */ + black_holes_write_particles(bparts_written, list, &num_fields); + if (with_stf) { + num_fields += velociraptor_write_bparts(bparts_written, + list + num_fields); + } + } + } break; + default: error("Particle Type %d not yet supported. Aborting", ptype); } @@ -1282,6 +1349,7 @@ void write_output_serial(struct engine* e, const char* baseName, if (gpart_group_data_written) swift_free("gpart_group_written", gpart_group_data_written); if (sparts_written) swift_free("sparts_written", sparts_written); + if (bparts_written) swift_free("bparts_written", sparts_written); /* Close particle group */ H5Gclose(h_grp); diff --git a/src/serial_io.h b/src/serial_io.h index 07df76fe869fa0612bba5cf953faadd8bc63f29e..9cf93e0b0203b0161be9133295c8511428e9bf4a 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -37,11 +37,11 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - struct spart** sparts, size_t* Ngas, size_t* Ngparts, - size_t* Nstars, int* flag_entropy, int with_hydro, - int with_gravity, int with_stars, int cleanup_h, + struct spart** sparts, struct bpart** bparts, size_t* Ngas, size_t* Ngparts, + size_t* Nstars, size_t *Nblackholes, int* flag_entropy, int with_hydro, + int with_gravity, int with_stars, int with_black_holes, int cleanup_h, int cleanup_sqrt_a, double h, double a, int mpi_rank, - int mpi_size, MPI_Comm comm, MPI_Info info, int nr_threads, + int mpi_size, MPI_Comm comm, MPI_Info info, int n_threads, int dry_run); void write_output_serial(struct engine* e, const char* baseName, diff --git a/src/single_io.c b/src/single_io.c index 83802fca25e7ff8a325883bbbdaa3c1827dfc328..2593dfe963157a11b9242add7e53891d0a4433c8 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -395,6 +395,9 @@ void read_ic_single(const char* fileName, int dimension = 3; /* Assume 3D if nothing is specified */ size_t Ndm = 0; + /* Initialise counters */ + *Ngas = 0, *Ngparts = 0, *Nstars = 0, *Nblackholes = 0; + /* Open file */ /* message("Opening file '%s' as IC.", fileName); */ h_file = H5Fopen(fileName, H5F_ACC_RDONLY, H5P_DEFAULT); @@ -523,7 +526,7 @@ void read_ic_single(const char* fileName, bzero(*sparts, *Nstars * sizeof(struct spart)); } - /* Allocate memory to store star particles */ + /* Allocate memory to store black hole particles */ if (with_black_holes) { *Nblackholes = N[swift_type_black_hole]; if (swift_memalign("bparts", (void**)bparts, bpart_align, diff --git a/src/space.c b/src/space.c index 63f4489d27dd5ea738c67be92a171283eb3916fd..618f2b6f4155c8a996e2b6121474bdf491397936 100644 --- a/src/space.c +++ b/src/space.c @@ -202,6 +202,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->hydro.dx_max_sort = 0.0f; c->stars.dx_max_part = 0.f; c->stars.dx_max_sort = 0.f; + c->black_holes.dx_max_part = 0.f; c->hydro.sorted = 0; c->stars.sorted = 0; c->hydro.count = 0; @@ -216,6 +217,10 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->stars.count_total = 0; c->stars.updated = 0; c->stars.inhibited = 0; + c->black_holes.count = 0; + c->black_holes.count_total = 0; + c->black_holes.updated = 0; + c->black_holes.inhibited = 0; c->grav.init = NULL; c->grav.init_out = NULL; c->hydro.extra_ghost = NULL; @@ -234,6 +239,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->stars.drift = NULL; c->stars.stars_in = NULL; c->stars.stars_out = NULL; + c->black_holes.drift = NULL; c->grav.drift = NULL; c->grav.drift_out = NULL; c->hydro.cooling = NULL; @@ -250,11 +256,13 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->hydro.xparts = NULL; c->grav.parts = NULL; c->stars.parts = NULL; + c->black_holes.parts = NULL; c->hydro.do_sub_sort = 0; c->stars.do_sub_sort = 0; c->hydro.do_sub_drift = 0; c->grav.do_sub_drift = 0; c->stars.do_sub_drift = 0; + c->black_holes.do_sub_drift = 0; c->hydro.do_sub_limiter = 0; c->hydro.do_limiter = 0; c->hydro.ti_end_min = -1; @@ -263,6 +271,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->grav.ti_end_max = -1; c->stars.ti_end_min = -1; c->stars.ti_end_max = -1; + c->black_holes.ti_end_min = -1; + c->black_holes.ti_end_max = -1; #if defined(SWIFT_DEBUG_CHECKS) || defined(SWIFT_CELL_GRAPH) c->cellID = 0; #endif @@ -338,6 +348,11 @@ void space_free_foreign_parts(struct space *s) { s->size_sparts_foreign = 0; s->sparts_foreign = NULL; } + if (s->bparts_foreign != NULL) { + swift_free("bparts_foreign", s->bparts_foreign); + s->size_bparts_foreign = 0; + s->bparts_foreign = NULL; + } #endif } @@ -591,16 +606,26 @@ void space_regrid(struct space *s, int verbose) { c->grav.ti_old_multipole = ti_current; #ifdef WITH_MPI c->mpi.tag = -1; - c->mpi.hydro.recv_xv = NULL; - c->mpi.hydro.recv_rho = NULL; - c->mpi.hydro.recv_gradient = NULL; - c->mpi.hydro.send_xv = NULL; - c->mpi.hydro.send_rho = NULL; - c->mpi.hydro.send_gradient = NULL; - c->mpi.stars.send = NULL; - c->mpi.stars.recv = NULL; - c->mpi.grav.recv = NULL; - c->mpi.grav.send = NULL; + + c->mpi.hydro.recv_xv = NULL; + c->mpi.hydro.recv_rho = NULL; + c->mpi.hydro.recv_gradient = NULL; + c->mpi.hydro.recv_ti = NULL; + c->mpi.grav.recv = NULL; + c->mpi.grav.recv_ti = NULL; + c->mpi.stars.recv = NULL; + c->mpi.stars.recv_ti = NULL; + c->mpi.limiter.recv = NULL; + + c->mpi.hydro.send_xv = NULL; + c->mpi.hydro.send_rho = NULL; + c->mpi.hydro.send_gradient = NULL; + c->mpi.hydro.send_ti = NULL; + c->mpi.grav.send = NULL; + c->mpi.grav.send_ti = NULL; + c->mpi.stars.send = NULL; + c->mpi.stars.send_ti = NULL; + c->mpi.limiter.send = NULL; #endif // WITH_MPI if (s->with_self_gravity) c->grav.multipole = &s->multipoles_top[cid]; #if defined(SWIFT_DEBUG_CHECKS) || defined(SWIFT_CELL_GRAPH) @@ -1486,7 +1511,8 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { engine_exchange_strays(s->e, nr_parts, &h_index[nr_parts], &nr_parts_exchanged, nr_gparts, &g_index[nr_gparts], &nr_gparts_exchanged, nr_sparts, &s_index[nr_sparts], - &nr_sparts_exchanged); + &nr_sparts_exchanged, nr_bparts, &b_index[nr_bparts], + &nr_bparts_exchanged); /* Set the new particle counts. */ s->nr_parts = nr_parts + nr_parts_exchanged; @@ -5249,6 +5275,8 @@ void space_struct_restore(struct space *s, FILE *stream) { s->size_gparts_foreign = 0; s->sparts_foreign = NULL; s->size_sparts_foreign = 0; + s->bparts_foreign = NULL; + s->size_bparts_foreign = 0; #endif /* More things to read. */ diff --git a/src/space.h b/src/space.h index 19acb8bffb59e57a14e4d2b138cb9a76508b9708..266d3f4ea30e58b87d7389c8a99ed58b353cfbc3 100644 --- a/src/space.h +++ b/src/space.h @@ -275,10 +275,14 @@ struct space { struct gpart *gparts_foreign; size_t nr_gparts_foreign, size_gparts_foreign; - /*! Buffers for g-parts that we will receive from foreign cells. */ + /*! Buffers for s-parts that we will receive from foreign cells. */ struct spart *sparts_foreign; size_t nr_sparts_foreign, size_sparts_foreign; + /*! Buffers for b-parts that we will receive from foreign cells. */ + struct bpart *bparts_foreign; + size_t nr_bparts_foreign, size_bparts_foreign; + #endif };