diff --git a/src/Makefile.am b/src/Makefile.am index 480953c6aad3857d3ca8c25d61274f71cd973931..665aa4b24c94162fb8f772edd346f3c95a1d7ddb 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -44,7 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ common_io.h single_io.h multipole.h map.h tools.h partition.h partition_fixed_costs.h \ clocks.h parser.h physical_constants.h physical_constants_cgs.h potential.h version.h \ hydro_properties.h riemann.h threadpool.h cooling_io.h cooling.h cooling_struct.h \ - statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h entropy_floor.h \ + statistics.h memswap.h cache.h runner_doiact_hydro_vec.h profiler.h entropy_floor.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h \ chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \ @@ -69,13 +69,18 @@ EAGLE_FEEDBACK_SOURCES += feedback/EAGLE/feedback.c endif # Common source files -AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c \ - engine_marktasks.c engine_drift.c engine_unskip.c serial_io.c timers.c debug.c scheduler.c \ +AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_grav.c \ + runner_doiact_stars.c runner_doiact_black_holes.c runner_ghost.c runner_recv.c \ + runner_sort.c runner_drift.c runner_black_holes.c runner_time_integration.c \ + runner_doiact_hydro_vec.c runner_others.c\ + queue.c task.c cell.c engine.c engine_maketasks.c \ + engine_marktasks.c engine_drift.c engine_unskip.c engine_collect_end_of_step.c \ + engine_redistribute.c engine_fof.c serial_io.c timers.c debug.c scheduler.c \ proxy.c parallel_io.c units.c common_io.c single_io.c multipole.c version.c map.c \ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ physical_constants.c potential.c hydro_properties.c \ threadpool.c cooling.c star_formation.c \ - statistics.c runner_doiact_vec.c profiler.c dump.c logger.c \ + statistics.c profiler.c dump.c logger.c \ part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ @@ -85,8 +90,10 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ - gravity_iact.h kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h \ - runner_doiact_nosort.h runner_doiact_stars.h runner_doiact_black_holes.h units.h intrinsics.h minmax.h \ + gravity_iact.h kernel_long_gravity.h vector.h cache.h \ + runner_doiact_nosort.h runner_doiact_hydro.h runner_doiact_stars.h runner_doiact_black_holes.h runner_doiact_grav.h \ + runner_doiact_functions_hydro.h runner_doiact_functions_stars.h runner_doiact_functions_black_holes.h \ + units.h intrinsics.h minmax.h \ kick.h timestep.h drift.h adiabatic_index.h io_properties.h dimension.h part_type.h periodic.h memswap.h \ dump.h logger.h sign.h logger_io.h timestep_limiter.h hashmap.h \ gravity.h gravity_io.h gravity_cache.h \ diff --git a/src/engine.c b/src/engine.c index 61ba7051cdee0156292289b6cfd8a504ea668747..68fa1b1d949e189a13e0f03bcc0c0379e14bc203 100644 --- a/src/engine.c +++ b/src/engine.c @@ -67,7 +67,6 @@ #include "logger.h" #include "logger_io.h" #include "map.h" -#include "memswap.h" #include "memuse.h" #include "minmax.h" #include "outputlist.h" @@ -128,22 +127,6 @@ int engine_current_step; extern int engine_max_parts_per_ghost; extern int engine_max_sparts_per_ghost; -/** - * @brief Data collected from the cells at the end of a time-step - */ -struct end_of_step_data { - - size_t updated, g_updated, s_updated, b_updated; - size_t inhibited, g_inhibited, s_inhibited, b_inhibited; - integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; - integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; - integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max; - integertime_t ti_black_holes_end_min, ti_black_holes_end_max, - ti_black_holes_beg_max; - struct engine *e; - struct star_formation_history sfh; -}; - /** * @brief Link a density/force task to a cell. * @@ -175,1007 +158,6 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { res->next = atomic_swap(l, res); } -#ifdef WITH_MPI -/** - * Do the exchange of one type of particles with all the other nodes. - * - * @param label a label for the memory allocations of this particle type. - * @param counts 2D array with the counts of particles to exchange with - * each other node. - * @param parts the particle data to exchange - * @param new_nr_parts the number of particles this node will have after all - * exchanges have completed. - * @param sizeofparts sizeof the particle struct. - * @param alignsize the memory alignment required for this particle type. - * @param mpi_type the MPI_Datatype for these particles. - * @param nr_nodes the number of nodes to exchange with. - * @param nodeID the id of this node. - * - * @result new particle data constructed from all the exchanges with the - * given alignment. - */ -static void *engine_do_redistribute(const char *label, int *counts, char *parts, - size_t new_nr_parts, size_t sizeofparts, - size_t alignsize, MPI_Datatype mpi_type, - int nr_nodes, int nodeID) { - - /* Allocate a new particle array with some extra margin */ - char *parts_new = NULL; - if (swift_memalign( - label, (void **)&parts_new, alignsize, - sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0) - error("Failed to allocate new particle data."); - - /* Prepare MPI requests for the asynchronous communications */ - MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) == - NULL) - error("Failed to allocate MPI request list."); - - /* Only send and receive only "chunk" particles per request. So we need to - * loop as many times as necessary here. Make 2Gb/sizeofparts so we only - * send 2Gb packets. */ - const int chunk = INT_MAX / sizeofparts; - int sent = 0; - int recvd = 0; - - int activenodes = 1; - while (activenodes) { - - for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; - - /* Emit the sends and recvs for the data. */ - size_t offset_send = sent; - size_t offset_recv = recvd; - activenodes = 0; - - for (int k = 0; k < nr_nodes; k++) { - - /* Indices in the count arrays of the node of interest */ - const int ind_send = nodeID * nr_nodes + k; - const int ind_recv = k * nr_nodes + nodeID; - - /* Are we sending any data this loop? */ - int sending = counts[ind_send] - sent; - if (sending > 0) { - activenodes++; - if (sending > chunk) sending = chunk; - - /* If the send and receive is local then just copy. */ - if (k == nodeID) { - int receiving = counts[ind_recv] - recvd; - if (receiving > chunk) receiving = chunk; - memcpy(&parts_new[offset_recv * sizeofparts], - &parts[offset_send * sizeofparts], sizeofparts * receiving); - } else { - /* Otherwise send it. */ - int res = - MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k, - ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]); - if (res != MPI_SUCCESS) - mpi_error(res, "Failed to isend parts to node %i.", k); - } - } - - /* If we're sending to this node, then move past it to next. */ - if (counts[ind_send] > 0) offset_send += counts[ind_send]; - - /* Are we receiving any data from this node? Note already done if coming - * from this node. */ - if (k != nodeID) { - int receiving = counts[ind_recv] - recvd; - if (receiving > 0) { - activenodes++; - if (receiving > chunk) receiving = chunk; - int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving, - mpi_type, k, ind_recv, MPI_COMM_WORLD, - &reqs[2 * k + 1]); - if (res != MPI_SUCCESS) - mpi_error(res, "Failed to emit irecv of parts from node %i.", k); - } - } - - /* If we're receiving from this node, then move past it to next. */ - if (counts[ind_recv] > 0) offset_recv += counts[ind_recv]; - } - - /* Wait for all the sends and recvs to tumble in. */ - MPI_Status stats[2 * nr_nodes]; - int res; - if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { - for (int k = 0; k < 2 * nr_nodes; k++) { - char buff[MPI_MAX_ERROR_STRING]; - MPI_Error_string(stats[k].MPI_ERROR, buff, &res); - message("request from source %i, tag %i has error '%s'.", - stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); - } - error("Failed during waitall for part data."); - } - - /* Move to next chunks. */ - sent += chunk; - recvd += chunk; - } - - /* Free temps. */ - free(reqs); - - /* And return new memory. */ - return parts_new; -} -#endif - -#ifdef WITH_MPI /* redist_mapper */ - -/* Support for engine_redistribute threadpool dest mappers. */ -struct redist_mapper_data { - int *counts; - int *dest; - int nodeID; - int nr_nodes; - struct cell *cells; - struct space *s; - void *base; -}; - -/* Generic function for accumulating counts for TYPE parts. Note - * we use a local counts array to avoid the atomic_add in the parts - * loop. */ -#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE) \ - engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \ - void *extra_data) { \ - struct TYPE *parts = (struct TYPE *)map_data; \ - struct redist_mapper_data *mydata = \ - (struct redist_mapper_data *)extra_data; \ - struct space *s = mydata->s; \ - int *dest = \ - mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base); \ - int *lcounts = NULL; \ - if ((lcounts = (int *)calloc( \ - sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL) \ - error("Failed to allocate counts thread-specific buffer"); \ - for (int k = 0; k < num_elements; k++) { \ - for (int j = 0; j < 3; j++) { \ - if (parts[k].x[j] < 0.0) \ - parts[k].x[j] += s->dim[j]; \ - else if (parts[k].x[j] >= s->dim[j]) \ - parts[k].x[j] -= s->dim[j]; \ - } \ - const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0], \ - parts[k].x[1] * s->iwidth[1], \ - parts[k].x[2] * s->iwidth[2]); \ - dest[k] = s->cells_top[cid].nodeID; \ - size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k]; \ - lcounts[ind] += 1; \ - } \ - for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++) \ - atomic_add(&mydata->counts[k], lcounts[k]); \ - free(lcounts); \ - } - -/** - * @brief Accumulate the counts of particles per cell. - * Threadpool helper for accumulating the counts of particles per cell. - * - * part version. - */ -static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part); - -/** - * @brief Accumulate the counts of star particles per cell. - * Threadpool helper for accumulating the counts of particles per cell. - * - * spart version. - */ -static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart); - -/** - * @brief Accumulate the counts of gravity particles per cell. - * Threadpool helper for accumulating the counts of particles per cell. - * - * gpart version. - */ -static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart); - -/** - * @brief Accumulate the counts of black holes particles per cell. - * Threadpool helper for accumulating the counts of particles per cell. - * - * bpart version. - */ -static void ENGINE_REDISTRIBUTE_DEST_MAPPER(bpart); - -#endif /* redist_mapper_data */ - -#ifdef WITH_MPI /* savelink_mapper_data */ - -/* Support for saving the linkage between gparts and parts/sparts. */ -struct savelink_mapper_data { - int nr_nodes; - int *counts; - void *parts; - int nodeID; -}; - -/** - * @brief Save the offset of each gravity partner of a part or spart. - * - * The offset is from the start of the sorted particles to be sent to a node. - * This is possible as parts without gravity partners have a positive id. - * These offsets are used to restore the pointers on the receiving node. - * - * CHECKS should be eliminated as dead code when optimizing. - */ -#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS) \ - engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \ - void *extra_data) { \ - int *nodes = (int *)map_data; \ - struct savelink_mapper_data *mydata = \ - (struct savelink_mapper_data *)extra_data; \ - int nodeID = mydata->nodeID; \ - int nr_nodes = mydata->nr_nodes; \ - int *counts = mydata->counts; \ - struct TYPE *parts = (struct TYPE *)mydata->parts; \ - \ - for (int j = 0; j < num_elements; j++) { \ - int node = nodes[j]; \ - int count = 0; \ - size_t offset = 0; \ - for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i]; \ - \ - for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) { \ - if (parts[k + offset].gpart != NULL) { \ - if (CHECKS) \ - if (parts[k + offset].gpart->id_or_neg_offset > 0) \ - error("Trying to link a partnerless " #TYPE "!"); \ - parts[k + offset].gpart->id_or_neg_offset = -count; \ - count++; \ - } \ - } \ - } \ - } - -/** - * @brief Save position of part-gpart links. - * Threadpool helper for accumulating the counts of particles per cell. - */ -#ifdef SWIFT_DEBUG_CHECKS -static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1); -#else -static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0); -#endif - -/** - * @brief Save position of spart-gpart links. - * Threadpool helper for accumulating the counts of particles per cell. - */ -#ifdef SWIFT_DEBUG_CHECKS -static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1); -#else -static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0); -#endif - -/** - * @brief Save position of bpart-gpart links. - * Threadpool helper for accumulating the counts of particles per cell. - */ -#ifdef SWIFT_DEBUG_CHECKS -static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 1); -#else -static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 0); -#endif - -#endif /* savelink_mapper_data */ - -#ifdef WITH_MPI /* relink_mapper_data */ - -/* Support for relinking parts, gparts, sparts and bparts after moving between - * nodes. */ -struct relink_mapper_data { - int nodeID; - int nr_nodes; - int *counts; - int *s_counts; - int *g_counts; - int *b_counts; - struct space *s; -}; - -/** - * @brief Restore the part/gpart and spart/gpart links for a list of nodes. - * - * @param map_data address of nodes to process. - * @param num_elements the number nodes to process. - * @param extra_data additional data defining the context (a - * relink_mapper_data). - */ -static void engine_redistribute_relink_mapper(void *map_data, int num_elements, - void *extra_data) { - - int *nodes = (int *)map_data; - struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data; - - int nodeID = mydata->nodeID; - int nr_nodes = mydata->nr_nodes; - int *counts = mydata->counts; - int *g_counts = mydata->g_counts; - int *s_counts = mydata->s_counts; - int *b_counts = mydata->b_counts; - struct space *s = mydata->s; - - for (int i = 0; i < num_elements; i++) { - - int node = nodes[i]; - - /* Get offsets to correct parts of the counts arrays for this node. */ - size_t offset_parts = 0; - size_t offset_gparts = 0; - size_t offset_sparts = 0; - size_t offset_bparts = 0; - for (int n = 0; n < node; n++) { - int ind_recv = n * nr_nodes + nodeID; - offset_parts += counts[ind_recv]; - offset_gparts += g_counts[ind_recv]; - offset_sparts += s_counts[ind_recv]; - offset_bparts += b_counts[ind_recv]; - } - - /* Number of gparts sent from this node. */ - int ind_recv = node * nr_nodes + nodeID; - const size_t count_gparts = g_counts[ind_recv]; - - /* Loop over the gparts received from this node */ - for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) { - - /* Does this gpart have a gas partner ? */ - if (s->gparts[k].type == swift_type_gas) { - - const ptrdiff_t partner_index = - offset_parts - s->gparts[k].id_or_neg_offset; - - /* Re-link */ - s->gparts[k].id_or_neg_offset = -partner_index; - s->parts[partner_index].gpart = &s->gparts[k]; - } - - /* Does this gpart have a star partner ? */ - else if (s->gparts[k].type == swift_type_stars) { - - const ptrdiff_t partner_index = - offset_sparts - s->gparts[k].id_or_neg_offset; - - /* Re-link */ - s->gparts[k].id_or_neg_offset = -partner_index; - s->sparts[partner_index].gpart = &s->gparts[k]; - } - - /* Does this gpart have a black hole partner ? */ - else if (s->gparts[k].type == swift_type_black_hole) { - - const ptrdiff_t partner_index = - offset_bparts - s->gparts[k].id_or_neg_offset; - - /* Re-link */ - s->gparts[k].id_or_neg_offset = -partner_index; - s->bparts[partner_index].gpart = &s->gparts[k]; - } - } - } -} - -#endif /* relink_mapper_data */ - -/** - * @brief Redistribute the particles amongst the nodes according - * to their cell's node IDs. - * - * The strategy here is as follows: - * 1) Each node counts the number of particles it has to send to each other - * node. - * 2) The number of particles of each type is then exchanged. - * 3) The particles to send are placed in a temporary buffer in which the - * part-gpart links are preserved. - * 4) Each node allocates enough space for the new particles. - * 5) (Asynchronous) communications are issued to transfer the data. - * - * - * @param e The #engine. - */ -void engine_redistribute(struct engine *e) { - -#ifdef WITH_MPI - - const int nr_nodes = e->nr_nodes; - const int nodeID = e->nodeID; - struct space *s = e->s; - struct cell *cells = s->cells_top; - const int nr_cells = s->nr_cells; - struct xpart *xparts = s->xparts; - struct part *parts = s->parts; - struct gpart *gparts = s->gparts; - struct spart *sparts = s->sparts; - struct bpart *bparts = s->bparts; - ticks tic = getticks(); - - size_t nr_parts = s->nr_parts; - size_t nr_gparts = s->nr_gparts; - size_t nr_sparts = s->nr_sparts; - size_t nr_bparts = s->nr_bparts; - - /* Start by moving inhibited particles to the end of the arrays */ - for (size_t k = 0; k < nr_parts; /* void */) { - if (parts[k].time_bin == time_bin_inhibited || - parts[k].time_bin == time_bin_not_created) { - nr_parts -= 1; - - /* Swap the particle */ - memswap(&parts[k], &parts[nr_parts], sizeof(struct part)); - - /* Swap the xpart */ - memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart)); - - /* Swap the link with the gpart */ - if (parts[k].gpart != NULL) { - parts[k].gpart->id_or_neg_offset = -k; - } - if (parts[nr_parts].gpart != NULL) { - parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; - } - } else { - k++; - } - } - - /* Now move inhibited star particles to the end of the arrays */ - for (size_t k = 0; k < nr_sparts; /* void */) { - if (sparts[k].time_bin == time_bin_inhibited || - sparts[k].time_bin == time_bin_not_created) { - nr_sparts -= 1; - - /* Swap the particle */ - memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart)); - - /* Swap the link with the gpart */ - if (s->sparts[k].gpart != NULL) { - s->sparts[k].gpart->id_or_neg_offset = -k; - } - if (s->sparts[nr_sparts].gpart != NULL) { - s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts; - } - } else { - k++; - } - } - - /* Now move inhibited black hole particles to the end of the arrays */ - for (size_t k = 0; k < nr_bparts; /* void */) { - if (bparts[k].time_bin == time_bin_inhibited || - bparts[k].time_bin == time_bin_not_created) { - nr_bparts -= 1; - - /* Swap the particle */ - memswap(&s->bparts[k], &s->bparts[nr_bparts], sizeof(struct bpart)); - - /* Swap the link with the gpart */ - if (s->bparts[k].gpart != NULL) { - s->bparts[k].gpart->id_or_neg_offset = -k; - } - if (s->bparts[nr_bparts].gpart != NULL) { - s->bparts[nr_bparts].gpart->id_or_neg_offset = -nr_bparts; - } - } else { - k++; - } - } - - /* Finally do the same with the gravity particles */ - for (size_t k = 0; k < nr_gparts; /* void */) { - if (gparts[k].time_bin == time_bin_inhibited || - gparts[k].time_bin == time_bin_not_created) { - nr_gparts -= 1; - - /* Swap the particle */ - memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart)); - - /* Swap the link with part/spart */ - if (s->gparts[k].type == swift_type_gas) { - s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; - } else if (s->gparts[k].type == swift_type_stars) { - s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; - } else if (s->gparts[k].type == swift_type_black_hole) { - s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; - } - - if (s->gparts[nr_gparts].type == swift_type_gas) { - s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = - &s->gparts[nr_gparts]; - } else if (s->gparts[nr_gparts].type == swift_type_stars) { - s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = - &s->gparts[nr_gparts]; - } else if (s->gparts[nr_gparts].type == swift_type_black_hole) { - s->bparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = - &s->gparts[nr_gparts]; - } - } else { - k++; - } - } - - /* Now we are ready to deal with real particles and can start the exchange. */ - - /* Allocate temporary arrays to store the counts of particles to be sent - * and the destination of each particle */ - int *counts; - if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) - error("Failed to allocate counts temporary buffer."); - - int *dest; - if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL) - error("Failed to allocate dest temporary buffer."); - - /* Simple index of node IDs, used for mappers over nodes. */ - int *nodes = NULL; - if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL) - error("Failed to allocate nodes temporary buffer."); - for (int k = 0; k < nr_nodes; k++) nodes[k] = k; - - /* Get destination of each particle */ - struct redist_mapper_data redist_data; - redist_data.s = s; - redist_data.nodeID = nodeID; - redist_data.nr_nodes = nr_nodes; - - redist_data.counts = counts; - redist_data.dest = dest; - redist_data.base = (void *)parts; - - threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts, - nr_parts, sizeof(struct part), 0, &redist_data); - - /* Sort the particles according to their cell index. */ - if (nr_parts > 0) - space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes], - nr_nodes, 0); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify that the part have been sorted correctly. */ - for (size_t k = 0; k < nr_parts; k++) { - const struct part *p = &s->parts[k]; - - if (p->time_bin == time_bin_inhibited) - error("Inhibited particle found after sorting!"); - - if (p->time_bin == time_bin_not_created) - error("Inhibited particle found after sorting!"); - - /* New cell index */ - const int new_cid = - cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1], - p->x[2] * s->iwidth[2]); - - /* New cell of this part */ - const struct cell *c = &s->cells_top[new_cid]; - const int new_node = c->nodeID; - - if (dest[k] != new_node) - error("part's new node index not matching sorted index."); - - if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] || - p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] || - p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2]) - error("part not sorted into the right top-level cell!"); - } -#endif - - /* We will need to re-link the gpart partners of parts, so save their - * relative positions in the sent lists. */ - if (nr_parts > 0 && nr_gparts > 0) { - - struct savelink_mapper_data savelink_data; - savelink_data.nr_nodes = nr_nodes; - savelink_data.counts = counts; - savelink_data.parts = (void *)parts; - savelink_data.nodeID = nodeID; - threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part, - nodes, nr_nodes, sizeof(int), 0, &savelink_data); - } - swift_free("dest", dest); - - /* Get destination of each s-particle */ - int *s_counts; - if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) - error("Failed to allocate s_counts temporary buffer."); - - int *s_dest; - if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL) - error("Failed to allocate s_dest temporary buffer."); - - redist_data.counts = s_counts; - redist_data.dest = s_dest; - redist_data.base = (void *)sparts; - - threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts, - nr_sparts, sizeof(struct spart), 0, &redist_data); - - /* Sort the particles according to their cell index. */ - if (nr_sparts > 0) - space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes, - 0); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify that the spart have been sorted correctly. */ - for (size_t k = 0; k < nr_sparts; k++) { - const struct spart *sp = &s->sparts[k]; - - if (sp->time_bin == time_bin_inhibited) - error("Inhibited particle found after sorting!"); - - if (sp->time_bin == time_bin_not_created) - error("Inhibited particle found after sorting!"); - - /* New cell index */ - const int new_cid = - cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1], - sp->x[2] * s->iwidth[2]); - - /* New cell of this spart */ - const struct cell *c = &s->cells_top[new_cid]; - const int new_node = c->nodeID; - - if (s_dest[k] != new_node) - error("spart's new node index not matching sorted index."); - - if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] || - sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] || - sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2]) - error("spart not sorted into the right top-level cell!"); - } -#endif - - /* We need to re-link the gpart partners of sparts. */ - if (nr_sparts > 0) { - - struct savelink_mapper_data savelink_data; - savelink_data.nr_nodes = nr_nodes; - savelink_data.counts = s_counts; - savelink_data.parts = (void *)sparts; - savelink_data.nodeID = nodeID; - threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart, - nodes, nr_nodes, sizeof(int), 0, &savelink_data); - } - swift_free("s_dest", s_dest); - - /* Get destination of each b-particle */ - int *b_counts; - if ((b_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) - error("Failed to allocate b_counts temporary buffer."); - - int *b_dest; - if ((b_dest = (int *)swift_malloc("b_dest", sizeof(int) * nr_bparts)) == NULL) - error("Failed to allocate b_dest temporary buffer."); - - redist_data.counts = b_counts; - redist_data.dest = b_dest; - redist_data.base = (void *)bparts; - - threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_bpart, bparts, - nr_bparts, sizeof(struct bpart), 0, &redist_data); - - /* Sort the particles according to their cell index. */ - if (nr_bparts > 0) - space_bparts_sort(s->bparts, b_dest, &b_counts[nodeID * nr_nodes], nr_nodes, - 0); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify that the bpart have been sorted correctly. */ - for (size_t k = 0; k < nr_bparts; k++) { - const struct bpart *bp = &s->bparts[k]; - - if (bp->time_bin == time_bin_inhibited) - error("Inhibited particle found after sorting!"); - - if (bp->time_bin == time_bin_not_created) - error("Inhibited particle found after sorting!"); - - /* New cell index */ - const int new_cid = - cell_getid(s->cdim, bp->x[0] * s->iwidth[0], bp->x[1] * s->iwidth[1], - bp->x[2] * s->iwidth[2]); - - /* New cell of this bpart */ - const struct cell *c = &s->cells_top[new_cid]; - const int new_node = c->nodeID; - - if (b_dest[k] != new_node) - error("bpart's new node index not matching sorted index."); - - if (bp->x[0] < c->loc[0] || bp->x[0] > c->loc[0] + c->width[0] || - bp->x[1] < c->loc[1] || bp->x[1] > c->loc[1] + c->width[1] || - bp->x[2] < c->loc[2] || bp->x[2] > c->loc[2] + c->width[2]) - error("bpart not sorted into the right top-level cell!"); - } -#endif - - /* We need to re-link the gpart partners of bparts. */ - if (nr_bparts > 0) { - - struct savelink_mapper_data savelink_data; - savelink_data.nr_nodes = nr_nodes; - savelink_data.counts = b_counts; - savelink_data.parts = (void *)bparts; - savelink_data.nodeID = nodeID; - threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart, - nodes, nr_nodes, sizeof(int), 0, &savelink_data); - } - swift_free("b_dest", b_dest); - - /* Get destination of each g-particle */ - int *g_counts; - if ((g_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) - error("Failed to allocate g_gcount temporary buffer."); - - int *g_dest; - if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL) - error("Failed to allocate g_dest temporary buffer."); - - redist_data.counts = g_counts; - redist_data.dest = g_dest; - redist_data.base = (void *)gparts; - - threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts, - nr_gparts, sizeof(struct gpart), 0, &redist_data); - - /* Sort the gparticles according to their cell index. */ - if (nr_gparts > 0) - space_gparts_sort(s->gparts, s->parts, s->sparts, s->bparts, g_dest, - &g_counts[nodeID * nr_nodes], nr_nodes); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify that the gpart have been sorted correctly. */ - for (size_t k = 0; k < nr_gparts; k++) { - const struct gpart *gp = &s->gparts[k]; - - if (gp->time_bin == time_bin_inhibited) - error("Inhibited particle found after sorting!"); - - if (gp->time_bin == time_bin_not_created) - error("Inhibited particle found after sorting!"); - - /* New cell index */ - const int new_cid = - cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1], - gp->x[2] * s->iwidth[2]); - - /* New cell of this gpart */ - const struct cell *c = &s->cells_top[new_cid]; - const int new_node = c->nodeID; - - if (g_dest[k] != new_node) - error("gpart's new node index not matching sorted index (%d != %d).", - g_dest[k], new_node); - - if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] || - gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] || - gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2]) - error("gpart not sorted into the right top-level cell!"); - } -#endif - - swift_free("g_dest", g_dest); - - /* Get all the counts from all the nodes. */ - if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to allreduce particle transfer counts."); - - /* Get all the g_counts from all the nodes. */ - if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, - MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to allreduce gparticle transfer counts."); - - /* Get all the s_counts from all the nodes. */ - if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT, - MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to allreduce sparticle transfer counts."); - - /* Get all the b_counts from all the nodes. */ - if (MPI_Allreduce(MPI_IN_PLACE, b_counts, nr_nodes * nr_nodes, MPI_INT, - MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to allreduce bparticle transfer counts."); - - /* Report how many particles will be moved. */ - if (e->verbose) { - if (e->nodeID == 0) { - size_t total = 0, g_total = 0, s_total = 0, b_total = 0; - size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0, b_unmoved = 0; - for (int p = 0, r = 0; p < nr_nodes; p++) { - for (int n = 0; n < nr_nodes; n++) { - total += counts[r]; - g_total += g_counts[r]; - s_total += s_counts[r]; - b_total += b_counts[r]; - if (p == n) { - unmoved += counts[r]; - g_unmoved += g_counts[r]; - s_unmoved += s_counts[r]; - b_unmoved += b_counts[r]; - } - r++; - } - } - if (total > 0) - message("%zu of %zu (%.2f%%) of particles moved", total - unmoved, - total, 100.0 * (double)(total - unmoved) / (double)total); - if (g_total > 0) - message("%zu of %zu (%.2f%%) of g-particles moved", g_total - g_unmoved, - g_total, - 100.0 * (double)(g_total - g_unmoved) / (double)g_total); - if (s_total > 0) - message("%zu of %zu (%.2f%%) of s-particles moved", s_total - s_unmoved, - s_total, - 100.0 * (double)(s_total - s_unmoved) / (double)s_total); - if (b_total > 0) - message("%ld of %ld (%.2f%%) of b-particles moved", b_total - b_unmoved, - b_total, - 100.0 * (double)(b_total - b_unmoved) / (double)b_total); - } - } - - /* Now each node knows how many parts, sparts, bparts, and gparts will be - * transferred to every other node. Get the new numbers of particles for this - * node. */ - size_t nr_parts_new = 0, nr_gparts_new = 0, nr_sparts_new = 0, - nr_bparts_new = 0; - for (int k = 0; k < nr_nodes; k++) - nr_parts_new += counts[k * nr_nodes + nodeID]; - for (int k = 0; k < nr_nodes; k++) - nr_gparts_new += g_counts[k * nr_nodes + nodeID]; - for (int k = 0; k < nr_nodes; k++) - nr_sparts_new += s_counts[k * nr_nodes + nodeID]; - for (int k = 0; k < nr_nodes; k++) - nr_bparts_new += b_counts[k * nr_nodes + nodeID]; - - /* Now exchange the particles, type by type to keep the memory required - * under control. */ - - /* SPH particles. */ - void *new_parts = engine_do_redistribute( - "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part), - part_align, part_mpi_type, nr_nodes, nodeID); - swift_free("parts", s->parts); - s->parts = (struct part *)new_parts; - s->nr_parts = nr_parts_new; - s->size_parts = engine_redistribute_alloc_margin * nr_parts_new; - - /* Extra SPH particle properties. */ - new_parts = engine_do_redistribute( - "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart), - xpart_align, xpart_mpi_type, nr_nodes, nodeID); - swift_free("xparts", s->xparts); - s->xparts = (struct xpart *)new_parts; - - /* Gravity particles. */ - new_parts = engine_do_redistribute( - "gparts", g_counts, (char *)s->gparts, nr_gparts_new, - sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID); - swift_free("gparts", s->gparts); - s->gparts = (struct gpart *)new_parts; - s->nr_gparts = nr_gparts_new; - s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new; - - /* Star particles. */ - new_parts = engine_do_redistribute( - "sparts", s_counts, (char *)s->sparts, nr_sparts_new, - sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID); - swift_free("sparts", s->sparts); - s->sparts = (struct spart *)new_parts; - s->nr_sparts = nr_sparts_new; - s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new; - - /* Black holes particles. */ - new_parts = engine_do_redistribute( - "bparts", b_counts, (char *)s->bparts, nr_bparts_new, - sizeof(struct bpart), bpart_align, bpart_mpi_type, nr_nodes, nodeID); - swift_free("bparts", s->bparts); - s->bparts = (struct bpart *)new_parts; - s->nr_bparts = nr_bparts_new; - s->size_bparts = engine_redistribute_alloc_margin * nr_bparts_new; - - /* All particles have now arrived. Time for some final operations on the - stuff we just received */ - - /* Restore the part<->gpart and spart<->gpart links. - * Generate indices and counts for threadpool tasks. Note we process a node - * at a time. */ - struct relink_mapper_data relink_data; - relink_data.s = s; - relink_data.counts = counts; - relink_data.g_counts = g_counts; - relink_data.s_counts = s_counts; - relink_data.b_counts = b_counts; - relink_data.nodeID = nodeID; - relink_data.nr_nodes = nr_nodes; - - threadpool_map(&e->threadpool, engine_redistribute_relink_mapper, nodes, - nr_nodes, sizeof(int), 1, &relink_data); - free(nodes); - - /* Clean up the counts now we are done. */ - free(counts); - free(g_counts); - free(s_counts); - free(b_counts); - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify that all parts are in the right place. */ - for (size_t k = 0; k < nr_parts_new; k++) { - const int cid = cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0], - s->parts[k].x[1] * s->iwidth[1], - s->parts[k].x[2] * s->iwidth[2]); - if (cells[cid].nodeID != nodeID) - error("Received particle (%zu) that does not belong here (nodeID=%i).", k, - cells[cid].nodeID); - } - for (size_t k = 0; k < nr_gparts_new; k++) { - const int cid = cell_getid(s->cdim, s->gparts[k].x[0] * s->iwidth[0], - s->gparts[k].x[1] * s->iwidth[1], - s->gparts[k].x[2] * s->iwidth[2]); - if (cells[cid].nodeID != nodeID) - error("Received g-particle (%zu) that does not belong here (nodeID=%i).", - k, cells[cid].nodeID); - } - for (size_t k = 0; k < nr_sparts_new; k++) { - const int cid = cell_getid(s->cdim, s->sparts[k].x[0] * s->iwidth[0], - s->sparts[k].x[1] * s->iwidth[1], - s->sparts[k].x[2] * s->iwidth[2]); - if (cells[cid].nodeID != nodeID) - error("Received s-particle (%zu) that does not belong here (nodeID=%i).", - k, cells[cid].nodeID); - } - for (size_t k = 0; k < nr_bparts_new; k++) { - const int cid = cell_getid(s->cdim, s->bparts[k].x[0] * s->iwidth[0], - s->bparts[k].x[1] * s->iwidth[1], - s->bparts[k].x[2] * s->iwidth[2]); - if (cells[cid].nodeID != nodeID) - error("Received b-particle (%zu) that does not belong here (nodeID=%i).", - k, cells[cid].nodeID); - } - - /* Verify that the links are correct */ - part_verify_links(s->parts, s->gparts, s->sparts, s->bparts, nr_parts_new, - nr_gparts_new, nr_sparts_new, nr_bparts_new, e->verbose); - -#endif - - /* Be verbose about what just happened. */ - if (e->verbose) { - int my_cells = 0; - for (int k = 0; k < nr_cells; k++) - if (cells[k].nodeID == nodeID) my_cells += 1; - message( - "node %i now has %zu parts, %zu sparts, %zu bparts and %zu gparts in " - "%i cells.", - nodeID, nr_parts_new, nr_sparts_new, nr_bparts_new, nr_gparts_new, - my_cells); - } - - /* Flag that we do not have any extra particles any more */ - s->nr_extra_parts = 0; - s->nr_extra_gparts = 0; - s->nr_extra_sparts = 0; - s->nr_extra_bparts = 0; - - /* Flag that a redistribute has taken place */ - e->step_props |= engine_step_prop_redistribute; - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - /** * @brief Repartition the cells amongst the nodes. * @@ -2687,544 +1669,6 @@ void engine_barrier(struct engine *e) { swift_barrier_wait(&e->run_barrier); } -/** - * @brief Recursive function gathering end-of-step data. - * - * We recurse until we encounter a timestep or time-step MPI recv task - * as the values will have been set at that level. We then bring these - * values upwards. - * - * @param c The #cell to recurse into. - * @param e The #engine. - */ -void engine_collect_end_of_step_recurse_hydro(struct cell *c, - const struct engine *e) { - - /* Skip super-cells (Their values are already set) */ - if (c->timestep != NULL) return; -#ifdef WITH_MPI - if (cell_get_recv(c, task_subtype_tend_part) != NULL) return; -#endif /* WITH_MPI */ - -#ifdef SWIFT_DEBUG_CHECKS - /* if (!c->split) error("Reached a leaf without finding a time-step task! - * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */ - /* c->depth, c->maxdepth, c->hydro.count, c->nodeID); */ -#endif - - /* Counters for the different quantities. */ - size_t updated = 0; - integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, - ti_hydro_beg_max = 0; - - /* Local Star formation history properties */ - struct star_formation_history sfh_updated; - - /* Initialize the star formation structs */ - star_formation_logger_init(&sfh_updated); - - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL && cp->hydro.count > 0) { - - /* Recurse */ - engine_collect_end_of_step_recurse_hydro(cp, e); - - /* And update */ - ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min); - ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max); - ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max); - - updated += cp->hydro.updated; - - /* Check if the cell is inactive and in that case reorder the SFH */ - if (!cell_is_starting_hydro(cp, e)) { - star_formation_logger_log_inactive_cell(&cp->stars.sfh); - } - - /* Add the star formation history in this cell to sfh_updated */ - star_formation_logger_add(&sfh_updated, &cp->stars.sfh); - - /* Collected, so clear for next time. */ - cp->hydro.updated = 0; - } - } - - /* Store the collected values in the cell. */ - c->hydro.ti_end_min = ti_hydro_end_min; - c->hydro.ti_end_max = ti_hydro_end_max; - c->hydro.ti_beg_max = ti_hydro_beg_max; - c->hydro.updated = updated; - // c->hydro.inhibited = inhibited; - - /* Store the star formation history in the parent cell */ - star_formation_logger_add(&c->stars.sfh, &sfh_updated); -} - -/** - * @brief Recursive function gathering end-of-step data. - * - * We recurse until we encounter a timestep or time-step MPI recv task - * as the values will have been set at that level. We then bring these - * values upwards. - * - * @param c The #cell to recurse into. - * @param e The #engine. - */ -void engine_collect_end_of_step_recurse_grav(struct cell *c, - const struct engine *e) { - - /* Skip super-cells (Their values are already set) */ - if (c->timestep != NULL) return; -#ifdef WITH_MPI - if (cell_get_recv(c, task_subtype_tend_gpart) != NULL) return; -#endif /* WITH_MPI */ - -#ifdef SWIFT_DEBUG_CHECKS - // if (!c->split) error("Reached a leaf without finding a time-step - // task!"); -#endif - - /* Counters for the different quantities. */ - size_t updated = 0; - integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0, - ti_grav_beg_max = 0; - - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL && cp->grav.count > 0) { - - /* Recurse */ - engine_collect_end_of_step_recurse_grav(cp, e); - - /* And update */ - ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min); - ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max); - ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max); - - updated += cp->grav.updated; - - /* Collected, so clear for next time. */ - cp->grav.updated = 0; - } - } - - /* Store the collected values in the cell. */ - c->grav.ti_end_min = ti_grav_end_min; - c->grav.ti_end_max = ti_grav_end_max; - c->grav.ti_beg_max = ti_grav_beg_max; - c->grav.updated = updated; -} - -/** - * @brief Recursive function gathering end-of-step data. - * - * We recurse until we encounter a timestep or time-step MPI recv task - * as the values will have been set at that level. We then bring these - * values upwards. - * - * @param c The #cell to recurse into. - * @param e The #engine. - */ -void engine_collect_end_of_step_recurse_stars(struct cell *c, - const struct engine *e) { - - /* Skip super-cells (Their values are already set) */ - if (c->timestep != NULL) return; -#ifdef WITH_MPI - if (cell_get_recv(c, task_subtype_tend_spart) != NULL) return; -#endif /* WITH_MPI */ - -#ifdef SWIFT_DEBUG_CHECKS - // if (!c->split) error("Reached a leaf without finding a time-step task!"); -#endif - - /* Counters for the different quantities. */ - size_t updated = 0; - integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, - ti_stars_beg_max = 0; - - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL && cp->stars.count > 0) { - - /* Recurse */ - engine_collect_end_of_step_recurse_stars(cp, e); - - /* And update */ - ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); - ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max); - ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max); - - updated += cp->stars.updated; - - /* Collected, so clear for next time. */ - cp->stars.updated = 0; - } - } - - /* Store the collected values in the cell. */ - c->stars.ti_end_min = ti_stars_end_min; - c->stars.ti_end_max = ti_stars_end_max; - c->stars.ti_beg_max = ti_stars_beg_max; - c->stars.updated = updated; -} - -/** - * @brief Recursive function gathering end-of-step data. - * - * We recurse until we encounter a timestep or time-step MPI recv task - * as the values will have been set at that level. We then bring these - * values upwards. - * - * @param c The #cell to recurse into. - * @param e The #engine. - */ -void engine_collect_end_of_step_recurse_black_holes(struct cell *c, - const struct engine *e) { - - /* Skip super-cells (Their values are already set) */ - if (c->timestep != NULL) return; -#ifdef WITH_MPI - if (cell_get_recv(c, task_subtype_tend_bpart) != NULL) return; -#endif /* WITH_MPI */ - -#ifdef SWIFT_DEBUG_CHECKS - // if (!c->split) error("Reached a leaf without finding a time-step task!"); -#endif - - /* Counters for the different quantities. */ - size_t updated = 0; - integertime_t ti_black_holes_end_min = max_nr_timesteps, - ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; - - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL && cp->black_holes.count > 0) { - - /* Recurse */ - engine_collect_end_of_step_recurse_black_holes(cp, e); - - /* And update */ - ti_black_holes_end_min = - min(ti_black_holes_end_min, cp->black_holes.ti_end_min); - ti_black_holes_end_max = - max(ti_black_holes_end_max, cp->black_holes.ti_end_max); - ti_black_holes_beg_max = - max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max); - - updated += cp->black_holes.updated; - - /* Collected, so clear for next time. */ - cp->black_holes.updated = 0; - } - } - - /* Store the collected values in the cell. */ - c->black_holes.ti_end_min = ti_black_holes_end_min; - c->black_holes.ti_end_max = ti_black_holes_end_max; - c->black_holes.ti_beg_max = ti_black_holes_beg_max; - c->black_holes.updated = updated; -} - -/** - * @brief Mapping function to collect the data from the end of the step - * - * This function will call a recursive function on all the top-level cells - * to collect the information we are after. - * - * @param map_data The list of cells with tasks on this node. - * @param num_elements The number of elements in the list this thread will work - * on. - * @param extra_data The #engine. - */ -void engine_collect_end_of_step_mapper(void *map_data, int num_elements, - void *extra_data) { - - struct end_of_step_data *data = (struct end_of_step_data *)extra_data; - const struct engine *e = data->e; - const int with_hydro = (e->policy & engine_policy_hydro); - const int with_self_grav = (e->policy & engine_policy_self_gravity); - const int with_ext_grav = (e->policy & engine_policy_external_gravity); - const int with_grav = (with_self_grav || with_ext_grav); - const int with_stars = (e->policy & engine_policy_stars); - const int with_black_holes = (e->policy & engine_policy_black_holes); - struct space *s = e->s; - int *local_cells = (int *)map_data; - struct star_formation_history *sfh_top = &data->sfh; - - /* Local collectible */ - size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; - integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, - ti_hydro_beg_max = 0; - integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, - ti_gravity_beg_max = 0; - integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, - ti_stars_beg_max = 0; - integertime_t ti_black_holes_end_min = max_nr_timesteps, - ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; - - /* Local Star formation history properties */ - struct star_formation_history sfh_updated; - - /* Initialize the star formation structs for this engine to zero */ - star_formation_logger_init(&sfh_updated); - - for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &s->cells_top[local_cells[ind]]; - - if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 || - c->black_holes.count > 0) { - - /* Make the top-cells recurse */ - if (with_hydro) { - engine_collect_end_of_step_recurse_hydro(c, e); - } - if (with_grav) { - engine_collect_end_of_step_recurse_grav(c, e); - } - if (with_stars) { - engine_collect_end_of_step_recurse_stars(c, e); - } - if (with_black_holes) { - engine_collect_end_of_step_recurse_black_holes(c, e); - } - - /* And aggregate */ - if (c->hydro.ti_end_min > e->ti_current) - ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min); - ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max); - ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max); - - if (c->grav.ti_end_min > e->ti_current) - ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min); - ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max); - ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max); - - if (c->stars.ti_end_min > e->ti_current) - ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min); - ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max); - ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max); - - if (c->black_holes.ti_end_min > e->ti_current) - ti_black_holes_end_min = - min(ti_black_holes_end_min, c->black_holes.ti_end_min); - ti_black_holes_end_max = - max(ti_black_holes_end_max, c->black_holes.ti_end_max); - ti_black_holes_beg_max = - max(ti_black_holes_beg_max, c->black_holes.ti_beg_max); - - updated += c->hydro.updated; - g_updated += c->grav.updated; - s_updated += c->stars.updated; - b_updated += c->black_holes.updated; - - /* Check if the cell is inactive and in that case reorder the SFH */ - if (!cell_is_starting_hydro(c, e)) { - star_formation_logger_log_inactive_cell(&c->stars.sfh); - } - - /* Get the star formation history from the current cell and store it in - * the star formation history struct */ - star_formation_logger_add(&sfh_updated, &c->stars.sfh); - - /* Collected, so clear for next time. */ - c->hydro.updated = 0; - c->grav.updated = 0; - c->stars.updated = 0; - c->black_holes.updated = 0; - } - } - - /* Let's write back to the global data. - * We use the space lock to garanty single access*/ - if (lock_lock(&s->lock) == 0) { - data->updated += updated; - data->g_updated += g_updated; - data->s_updated += s_updated; - data->b_updated += b_updated; - - /* Add the SFH information from this engine to the global data */ - star_formation_logger_add(sfh_top, &sfh_updated); - - if (ti_hydro_end_min > e->ti_current) - data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min); - data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max); - data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max); - - if (ti_gravity_end_min > e->ti_current) - data->ti_gravity_end_min = - min(ti_gravity_end_min, data->ti_gravity_end_min); - data->ti_gravity_end_max = - max(ti_gravity_end_max, data->ti_gravity_end_max); - data->ti_gravity_beg_max = - max(ti_gravity_beg_max, data->ti_gravity_beg_max); - - if (ti_stars_end_min > e->ti_current) - data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); - data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max); - data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max); - - if (ti_black_holes_end_min > e->ti_current) - data->ti_black_holes_end_min = - min(ti_black_holes_end_min, data->ti_black_holes_end_min); - data->ti_black_holes_end_max = - max(ti_black_holes_end_max, data->ti_black_holes_end_max); - data->ti_black_holes_beg_max = - max(ti_black_holes_beg_max, data->ti_black_holes_beg_max); - } - - if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); -} - -/** - * @brief Collects the next time-step and rebuild flag. - * - * The next time-step is determined by making each super-cell recurse to - * collect the minimal of ti_end and the number of updated particles. When in - * MPI mode this routines reduces these across all nodes and also collects the - * forcerebuild flag -- this is so that we only use a single collective MPI - * call per step for all these values. - * - * Note that the results are stored in e->collect_group1 struct not in the - * engine fields, unless apply is true. These can be applied field-by-field - * or all at once using collectgroup1_copy(); - * - * @param e The #engine. - * @param apply whether to apply the results to the engine or just keep in the - * group1 struct. - */ -void engine_collect_end_of_step(struct engine *e, int apply) { - - const ticks tic = getticks(); - struct space *s = e->s; - struct end_of_step_data data; - data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0; - data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0, - data.ti_hydro_beg_max = 0; - data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0, - data.ti_gravity_beg_max = 0; - data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0, - data.ti_stars_beg_max = 0; - data.ti_black_holes_end_min = max_nr_timesteps, - data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0; - data.e = e; - - /* Initialize the total SFH of the simulation to zero */ - star_formation_logger_init(&data.sfh); - - /* Collect information from the local top-level cells */ - threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper, - s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks, - sizeof(int), 0, &data); - - /* Get the number of inhibited particles from the space-wide counters - * since these have been updated atomically during the time-steps. */ - data.inhibited = s->nr_inhibited_parts; - data.g_inhibited = s->nr_inhibited_gparts; - data.s_inhibited = s->nr_inhibited_sparts; - data.b_inhibited = s->nr_inhibited_bparts; - - /* Store these in the temporary collection group. */ - collectgroup1_init( - &e->collect_group1, data.updated, data.g_updated, data.s_updated, - data.b_updated, data.inhibited, data.g_inhibited, data.s_inhibited, - data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max, - data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max, - data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max, - data.ti_stars_beg_max, data.ti_black_holes_end_min, - data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild, - e->s->tot_cells, e->sched.nr_tasks, - (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh); - -/* Aggregate collective data from the different nodes for this step. */ -#ifdef WITH_MPI - collectgroup1_reduce(&e->collect_group1); - -#ifdef SWIFT_DEBUG_CHECKS - { - /* Check the above using the original MPI calls. */ - integertime_t in_i[2], out_i[2]; - in_i[0] = 0; - in_i[1] = 0; - out_i[0] = data.ti_hydro_end_min; - out_i[1] = data.ti_gravity_end_min; - if (MPI_Allreduce(out_i, in_i, 2, MPI_LONG_LONG_INT, MPI_MIN, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate ti_end_min."); - if (in_i[0] != (long long)e->collect_group1.ti_hydro_end_min) - error("Failed to get same ti_hydro_end_min, is %lld, should be %lld", - in_i[0], e->collect_group1.ti_hydro_end_min); - if (in_i[1] != (long long)e->collect_group1.ti_gravity_end_min) - error("Failed to get same ti_gravity_end_min, is %lld, should be %lld", - in_i[1], e->collect_group1.ti_gravity_end_min); - - long long in_ll[4], out_ll[4]; - out_ll[0] = data.updated; - out_ll[1] = data.g_updated; - out_ll[2] = data.s_updated; - out_ll[3] = data.b_updated; - if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate particle counts."); - if (in_ll[0] != (long long)e->collect_group1.updated) - error("Failed to get same updated, is %lld, should be %lld", in_ll[0], - e->collect_group1.updated); - if (in_ll[1] != (long long)e->collect_group1.g_updated) - error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1], - e->collect_group1.g_updated); - if (in_ll[2] != (long long)e->collect_group1.s_updated) - error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2], - e->collect_group1.s_updated); - if (in_ll[3] != (long long)e->collect_group1.b_updated) - error("Failed to get same b_updated, is %lld, should be %lld", in_ll[3], - e->collect_group1.b_updated); - - out_ll[0] = data.inhibited; - out_ll[1] = data.g_inhibited; - out_ll[2] = data.s_inhibited; - out_ll[3] = data.b_inhibited; - if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate particle counts."); - if (in_ll[0] != (long long)e->collect_group1.inhibited) - error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0], - e->collect_group1.inhibited); - if (in_ll[1] != (long long)e->collect_group1.g_inhibited) - error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1], - e->collect_group1.g_inhibited); - if (in_ll[2] != (long long)e->collect_group1.s_inhibited) - error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2], - e->collect_group1.s_inhibited); - if (in_ll[3] != (long long)e->collect_group1.b_inhibited) - error("Failed to get same b_inhibited, is %lld, should be %lld", in_ll[3], - e->collect_group1.b_inhibited); - - int buff = 0; - if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate the rebuild flag across nodes."); - if (!!buff != !!e->collect_group1.forcerebuild) - error( - "Failed to get same rebuild flag from all nodes, is %d," - "should be %d", - buff, e->collect_group1.forcerebuild); - } -#endif -#endif - - /* Apply to the engine, if requested. */ - if (apply) collectgroup1_apply(&e->collect_group1, e); - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - /** * @brief Print the conserved quantities statistics to a log file * @@ -6423,127 +4867,3 @@ void engine_struct_restore(struct engine *e, FILE *stream) { e->forcerebuild = 1; e->forcerepart = 0; } - -/** - * @brief Activate all the #gpart communications in preparation - * fof a call to FOF. - * - * @param e The #engine to act on. - */ -void engine_activate_gpart_comms(struct engine *e) { - -#ifdef WITH_MPI - - const ticks tic = getticks(); - - struct scheduler *s = &e->sched; - const int nr_tasks = s->nr_tasks; - struct task *tasks = s->tasks; - - for (int k = 0; k < nr_tasks; ++k) { - - struct task *t = &tasks[k]; - - if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) { - scheduler_activate(s, t); - } else if ((t->type == task_type_recv) && - (t->subtype == task_subtype_gpart)) { - scheduler_activate(s, t); - } else { - t->skip = 1; - } - } - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); - -#else - error("Calling an MPI function in non-MPI mode."); -#endif -} - -/** - * @brief Activate all the FOF tasks. - * - * Marks all the other task types to be skipped. - * - * @param e The #engine to act on. - */ -void engine_activate_fof_tasks(struct engine *e) { - - const ticks tic = getticks(); - - struct scheduler *s = &e->sched; - const int nr_tasks = s->nr_tasks; - struct task *tasks = s->tasks; - - for (int k = 0; k < nr_tasks; k++) { - - struct task *t = &tasks[k]; - - if (t->type == task_type_fof_self || t->type == task_type_fof_pair) - scheduler_activate(s, t); - else - t->skip = 1; - } - - if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); -} - -/** - * @brief Run a FOF search. - * - * @param e the engine - * @param dump_results Are we writing group catalogues to output files? - * @param seed_black_holes Are we seeding black holes? - */ -void engine_fof(struct engine *e, const int dump_results, - const int seed_black_holes) { - -#ifdef WITH_FOF - - ticks tic = getticks(); - - /* Compute number of DM particles */ - const long long total_nr_baryons = - e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts; - const long long total_nr_dmparts = - e->total_nr_gparts - e->total_nr_DM_background_gparts - total_nr_baryons; - - /* Initialise FOF parameters and allocate FOF arrays. */ - fof_allocate(e->s, total_nr_dmparts, e->fof_properties); - - /* Make FOF tasks */ - engine_make_fof_tasks(e); - - /* and activate them. */ - engine_activate_fof_tasks(e); - - /* Perform local FOF tasks. */ - engine_launch(e); - - /* Perform FOF search over foreign particles and - * find groups which require black hole seeding. */ - fof_search_tree(e->fof_properties, e->black_holes_properties, - e->physical_constants, e->cosmology, e->s, dump_results, - seed_black_holes); - - /* Reset flag. */ - e->run_fof = 0; - - /* Flag that a FOF has taken place */ - e->step_props |= engine_step_prop_fof; - - /* ... and find the next FOF time */ - if (seed_black_holes) engine_compute_next_fof_time(e); - - if (engine_rank == 0) - message("Complete FOF search took: %.3f %s.", - clocks_from_ticks(getticks() - tic), clocks_getunit()); -#else - error("SWIFT was not compiled with FOF enabled!"); -#endif -} diff --git a/src/engine.h b/src/engine.h index 3484336039c64baa43469b1152f1856f70ee2823..72d528969553b6e24ab939ce05acea69c7cb1b0c 100644 --- a/src/engine.h +++ b/src/engine.h @@ -490,6 +490,7 @@ void engine_reconstruct_multipoles(struct engine *e); void engine_allocate_foreign_particles(struct engine *e); void engine_print_stats(struct engine *e); void engine_check_for_dumps(struct engine *e); +void engine_collect_end_of_step(struct engine *e, int apply); void engine_dump_snapshot(struct engine *e); void engine_init_output_lists(struct engine *e, struct swift_params *params); void engine_init(struct engine *e, struct space *s, struct swift_params *params, diff --git a/src/engine_collect_end_of_step.c b/src/engine_collect_end_of_step.c new file mode 100644 index 0000000000000000000000000000000000000000..ec02acfefdf65aca13d44a7cf90d48f31b99778f --- /dev/null +++ b/src/engine_collect_end_of_step.c @@ -0,0 +1,584 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/* Local headers. */ +#include "active.h" +#include "timeline.h" + +/** + * @brief Data collected from the cells at the end of a time-step + */ +struct end_of_step_data { + + size_t updated, g_updated, s_updated, b_updated; + size_t inhibited, g_inhibited, s_inhibited, b_inhibited; + integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; + integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; + integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max; + integertime_t ti_black_holes_end_min, ti_black_holes_end_max, + ti_black_holes_beg_max; + struct engine *e; + struct star_formation_history sfh; +}; + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_hydro(struct cell *c, + const struct engine *e) { + + /* Skip super-cells (Their values are already set) */ + if (c->timestep != NULL) return; +#ifdef WITH_MPI + if (cell_get_recv(c, task_subtype_tend_part) != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + /* if (!c->split) error("Reached a leaf without finding a time-step task! + * c->depth=%d c->maxdepth=%d c->count=%d c->node=%d", */ + /* c->depth, c->maxdepth, c->hydro.count, c->nodeID); */ +#endif + + /* Counters for the different quantities. */ + size_t updated = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + + /* Local Star formation history properties */ + struct star_formation_history sfh_updated; + + /* Initialize the star formation structs */ + star_formation_logger_init(&sfh_updated); + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->hydro.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_hydro(cp, e); + + /* And update */ + ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max); + + updated += cp->hydro.updated; + + /* Check if the cell is inactive and in that case reorder the SFH */ + if (!cell_is_starting_hydro(cp, e)) { + star_formation_logger_log_inactive_cell(&cp->stars.sfh); + } + + /* Add the star formation history in this cell to sfh_updated */ + star_formation_logger_add(&sfh_updated, &cp->stars.sfh); + + /* Collected, so clear for next time. */ + cp->hydro.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->hydro.ti_end_min = ti_hydro_end_min; + c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_beg_max = ti_hydro_beg_max; + c->hydro.updated = updated; + // c->hydro.inhibited = inhibited; + + /* Store the star formation history in the parent cell */ + star_formation_logger_add(&c->stars.sfh, &sfh_updated); +} + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_grav(struct cell *c, + const struct engine *e) { + + /* Skip super-cells (Their values are already set) */ + if (c->timestep != NULL) return; +#ifdef WITH_MPI + if (cell_get_recv(c, task_subtype_tend_gpart) != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step + // task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0; + integertime_t ti_grav_end_min = max_nr_timesteps, ti_grav_end_max = 0, + ti_grav_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->grav.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_grav(cp, e); + + /* And update */ + ti_grav_end_min = min(ti_grav_end_min, cp->grav.ti_end_min); + ti_grav_end_max = max(ti_grav_end_max, cp->grav.ti_end_max); + ti_grav_beg_max = max(ti_grav_beg_max, cp->grav.ti_beg_max); + + updated += cp->grav.updated; + + /* Collected, so clear for next time. */ + cp->grav.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->grav.ti_end_min = ti_grav_end_min; + c->grav.ti_end_max = ti_grav_end_max; + c->grav.ti_beg_max = ti_grav_beg_max; + c->grav.updated = updated; +} + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_stars(struct cell *c, + const struct engine *e) { + + /* Skip super-cells (Their values are already set) */ + if (c->timestep != NULL) return; +#ifdef WITH_MPI + if (cell_get_recv(c, task_subtype_tend_spart) != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0; + integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, + ti_stars_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->stars.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_stars(cp, e); + + /* And update */ + ti_stars_end_min = min(ti_stars_end_min, cp->stars.ti_end_min); + ti_stars_end_max = max(ti_stars_end_max, cp->stars.ti_end_max); + ti_stars_beg_max = max(ti_stars_beg_max, cp->stars.ti_beg_max); + + updated += cp->stars.updated; + + /* Collected, so clear for next time. */ + cp->stars.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->stars.ti_end_min = ti_stars_end_min; + c->stars.ti_end_max = ti_stars_end_max; + c->stars.ti_beg_max = ti_stars_beg_max; + c->stars.updated = updated; +} + +/** + * @brief Recursive function gathering end-of-step data. + * + * We recurse until we encounter a timestep or time-step MPI recv task + * as the values will have been set at that level. We then bring these + * values upwards. + * + * @param c The #cell to recurse into. + * @param e The #engine. + */ +void engine_collect_end_of_step_recurse_black_holes(struct cell *c, + const struct engine *e) { + + /* Skip super-cells (Their values are already set) */ + if (c->timestep != NULL) return; +#ifdef WITH_MPI + if (cell_get_recv(c, task_subtype_tend_bpart) != NULL) return; +#endif /* WITH_MPI */ + +#ifdef SWIFT_DEBUG_CHECKS + // if (!c->split) error("Reached a leaf without finding a time-step task!"); +#endif + + /* Counters for the different quantities. */ + size_t updated = 0; + integertime_t ti_black_holes_end_min = max_nr_timesteps, + ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && cp->black_holes.count > 0) { + + /* Recurse */ + engine_collect_end_of_step_recurse_black_holes(cp, e); + + /* And update */ + ti_black_holes_end_min = + min(ti_black_holes_end_min, cp->black_holes.ti_end_min); + ti_black_holes_end_max = + max(ti_black_holes_end_max, cp->black_holes.ti_end_max); + ti_black_holes_beg_max = + max(ti_black_holes_beg_max, cp->black_holes.ti_beg_max); + + updated += cp->black_holes.updated; + + /* Collected, so clear for next time. */ + cp->black_holes.updated = 0; + } + } + + /* Store the collected values in the cell. */ + c->black_holes.ti_end_min = ti_black_holes_end_min; + c->black_holes.ti_end_max = ti_black_holes_end_max; + c->black_holes.ti_beg_max = ti_black_holes_beg_max; + c->black_holes.updated = updated; +} + +/** + * @brief Mapping function to collect the data from the end of the step + * + * This function will call a recursive function on all the top-level cells + * to collect the information we are after. + * + * @param map_data The list of cells with tasks on this node. + * @param num_elements The number of elements in the list this thread will work + * on. + * @param extra_data The #engine. + */ +void engine_collect_end_of_step_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct end_of_step_data *data = (struct end_of_step_data *)extra_data; + const struct engine *e = data->e; + const int with_hydro = (e->policy & engine_policy_hydro); + const int with_self_grav = (e->policy & engine_policy_self_gravity); + const int with_ext_grav = (e->policy & engine_policy_external_gravity); + const int with_grav = (with_self_grav || with_ext_grav); + const int with_stars = (e->policy & engine_policy_stars); + const int with_black_holes = (e->policy & engine_policy_black_holes); + struct space *s = e->s; + int *local_cells = (int *)map_data; + struct star_formation_history *sfh_top = &data->sfh; + + /* Local collectible */ + size_t updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, + ti_stars_beg_max = 0; + integertime_t ti_black_holes_end_min = max_nr_timesteps, + ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; + + /* Local Star formation history properties */ + struct star_formation_history sfh_updated; + + /* Initialize the star formation structs for this engine to zero */ + star_formation_logger_init(&sfh_updated); + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &s->cells_top[local_cells[ind]]; + + if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0 || + c->black_holes.count > 0) { + + /* Make the top-cells recurse */ + if (with_hydro) { + engine_collect_end_of_step_recurse_hydro(c, e); + } + if (with_grav) { + engine_collect_end_of_step_recurse_grav(c, e); + } + if (with_stars) { + engine_collect_end_of_step_recurse_stars(c, e); + } + if (with_black_holes) { + engine_collect_end_of_step_recurse_black_holes(c, e); + } + + /* And aggregate */ + if (c->hydro.ti_end_min > e->ti_current) + ti_hydro_end_min = min(ti_hydro_end_min, c->hydro.ti_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, c->hydro.ti_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, c->hydro.ti_beg_max); + + if (c->grav.ti_end_min > e->ti_current) + ti_gravity_end_min = min(ti_gravity_end_min, c->grav.ti_end_min); + ti_gravity_end_max = max(ti_gravity_end_max, c->grav.ti_end_max); + ti_gravity_beg_max = max(ti_gravity_beg_max, c->grav.ti_beg_max); + + if (c->stars.ti_end_min > e->ti_current) + ti_stars_end_min = min(ti_stars_end_min, c->stars.ti_end_min); + ti_stars_end_max = max(ti_stars_end_max, c->stars.ti_end_max); + ti_stars_beg_max = max(ti_stars_beg_max, c->stars.ti_beg_max); + + if (c->black_holes.ti_end_min > e->ti_current) + ti_black_holes_end_min = + min(ti_black_holes_end_min, c->black_holes.ti_end_min); + ti_black_holes_end_max = + max(ti_black_holes_end_max, c->black_holes.ti_end_max); + ti_black_holes_beg_max = + max(ti_black_holes_beg_max, c->black_holes.ti_beg_max); + + updated += c->hydro.updated; + g_updated += c->grav.updated; + s_updated += c->stars.updated; + b_updated += c->black_holes.updated; + + /* Check if the cell is inactive and in that case reorder the SFH */ + if (!cell_is_starting_hydro(c, e)) { + star_formation_logger_log_inactive_cell(&c->stars.sfh); + } + + /* Get the star formation history from the current cell and store it in + * the star formation history struct */ + star_formation_logger_add(&sfh_updated, &c->stars.sfh); + + /* Collected, so clear for next time. */ + c->hydro.updated = 0; + c->grav.updated = 0; + c->stars.updated = 0; + c->black_holes.updated = 0; + } + } + + /* Let's write back to the global data. + * We use the space lock to garanty single access*/ + if (lock_lock(&s->lock) == 0) { + data->updated += updated; + data->g_updated += g_updated; + data->s_updated += s_updated; + data->b_updated += b_updated; + + /* Add the SFH information from this engine to the global data */ + star_formation_logger_add(sfh_top, &sfh_updated); + + if (ti_hydro_end_min > e->ti_current) + data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min); + data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max); + data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max); + + if (ti_gravity_end_min > e->ti_current) + data->ti_gravity_end_min = + min(ti_gravity_end_min, data->ti_gravity_end_min); + data->ti_gravity_end_max = + max(ti_gravity_end_max, data->ti_gravity_end_max); + data->ti_gravity_beg_max = + max(ti_gravity_beg_max, data->ti_gravity_beg_max); + + if (ti_stars_end_min > e->ti_current) + data->ti_stars_end_min = min(ti_stars_end_min, data->ti_stars_end_min); + data->ti_stars_end_max = max(ti_stars_end_max, data->ti_stars_end_max); + data->ti_stars_beg_max = max(ti_stars_beg_max, data->ti_stars_beg_max); + + if (ti_black_holes_end_min > e->ti_current) + data->ti_black_holes_end_min = + min(ti_black_holes_end_min, data->ti_black_holes_end_min); + data->ti_black_holes_end_max = + max(ti_black_holes_end_max, data->ti_black_holes_end_max); + data->ti_black_holes_beg_max = + max(ti_black_holes_beg_max, data->ti_black_holes_beg_max); + } + + if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); +} + +/** + * @brief Collects the next time-step and rebuild flag. + * + * The next time-step is determined by making each super-cell recurse to + * collect the minimal of ti_end and the number of updated particles. When in + * MPI mode this routines reduces these across all nodes and also collects the + * forcerebuild flag -- this is so that we only use a single collective MPI + * call per step for all these values. + * + * Note that the results are stored in e->collect_group1 struct not in the + * engine fields, unless apply is true. These can be applied field-by-field + * or all at once using collectgroup1_copy(); + * + * @param e The #engine. + * @param apply whether to apply the results to the engine or just keep in the + * group1 struct. + */ +void engine_collect_end_of_step(struct engine *e, int apply) { + + const ticks tic = getticks(); + struct space *s = e->s; + struct end_of_step_data data; + data.updated = 0, data.g_updated = 0, data.s_updated = 0, data.b_updated = 0; + data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0, + data.ti_hydro_beg_max = 0; + data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0, + data.ti_gravity_beg_max = 0; + data.ti_stars_end_min = max_nr_timesteps, data.ti_stars_end_max = 0, + data.ti_stars_beg_max = 0; + data.ti_black_holes_end_min = max_nr_timesteps, + data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0; + data.e = e; + + /* Initialize the total SFH of the simulation to zero */ + star_formation_logger_init(&data.sfh); + + /* Collect information from the local top-level cells */ + threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper, + s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks, + sizeof(int), 0, &data); + + /* Get the number of inhibited particles from the space-wide counters + * since these have been updated atomically during the time-steps. */ + data.inhibited = s->nr_inhibited_parts; + data.g_inhibited = s->nr_inhibited_gparts; + data.s_inhibited = s->nr_inhibited_sparts; + data.b_inhibited = s->nr_inhibited_bparts; + + /* Store these in the temporary collection group. */ + collectgroup1_init( + &e->collect_group1, data.updated, data.g_updated, data.s_updated, + data.b_updated, data.inhibited, data.g_inhibited, data.s_inhibited, + data.b_inhibited, data.ti_hydro_end_min, data.ti_hydro_end_max, + data.ti_hydro_beg_max, data.ti_gravity_end_min, data.ti_gravity_end_max, + data.ti_gravity_beg_max, data.ti_stars_end_min, data.ti_stars_end_max, + data.ti_stars_beg_max, data.ti_black_holes_end_min, + data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild, + e->s->tot_cells, e->sched.nr_tasks, + (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh); + +/* Aggregate collective data from the different nodes for this step. */ +#ifdef WITH_MPI + collectgroup1_reduce(&e->collect_group1); + +#ifdef SWIFT_DEBUG_CHECKS + { + /* Check the above using the original MPI calls. */ + integertime_t in_i[2], out_i[2]; + in_i[0] = 0; + in_i[1] = 0; + out_i[0] = data.ti_hydro_end_min; + out_i[1] = data.ti_gravity_end_min; + if (MPI_Allreduce(out_i, in_i, 2, MPI_LONG_LONG_INT, MPI_MIN, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate ti_end_min."); + if (in_i[0] != (long long)e->collect_group1.ti_hydro_end_min) + error("Failed to get same ti_hydro_end_min, is %lld, should be %lld", + in_i[0], e->collect_group1.ti_hydro_end_min); + if (in_i[1] != (long long)e->collect_group1.ti_gravity_end_min) + error("Failed to get same ti_gravity_end_min, is %lld, should be %lld", + in_i[1], e->collect_group1.ti_gravity_end_min); + + long long in_ll[4], out_ll[4]; + out_ll[0] = data.updated; + out_ll[1] = data.g_updated; + out_ll[2] = data.s_updated; + out_ll[3] = data.b_updated; + if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate particle counts."); + if (in_ll[0] != (long long)e->collect_group1.updated) + error("Failed to get same updated, is %lld, should be %lld", in_ll[0], + e->collect_group1.updated); + if (in_ll[1] != (long long)e->collect_group1.g_updated) + error("Failed to get same g_updated, is %lld, should be %lld", in_ll[1], + e->collect_group1.g_updated); + if (in_ll[2] != (long long)e->collect_group1.s_updated) + error("Failed to get same s_updated, is %lld, should be %lld", in_ll[2], + e->collect_group1.s_updated); + if (in_ll[3] != (long long)e->collect_group1.b_updated) + error("Failed to get same b_updated, is %lld, should be %lld", in_ll[3], + e->collect_group1.b_updated); + + out_ll[0] = data.inhibited; + out_ll[1] = data.g_inhibited; + out_ll[2] = data.s_inhibited; + out_ll[3] = data.b_inhibited; + if (MPI_Allreduce(out_ll, in_ll, 4, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate particle counts."); + if (in_ll[0] != (long long)e->collect_group1.inhibited) + error("Failed to get same inhibited, is %lld, should be %lld", in_ll[0], + e->collect_group1.inhibited); + if (in_ll[1] != (long long)e->collect_group1.g_inhibited) + error("Failed to get same g_inhibited, is %lld, should be %lld", in_ll[1], + e->collect_group1.g_inhibited); + if (in_ll[2] != (long long)e->collect_group1.s_inhibited) + error("Failed to get same s_inhibited, is %lld, should be %lld", in_ll[2], + e->collect_group1.s_inhibited); + if (in_ll[3] != (long long)e->collect_group1.b_inhibited) + error("Failed to get same b_inhibited, is %lld, should be %lld", in_ll[3], + e->collect_group1.b_inhibited); + + int buff = 0; + if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate the rebuild flag across nodes."); + if (!!buff != !!e->collect_group1.forcerebuild) + error( + "Failed to get same rebuild flag from all nodes, is %d," + "should be %d", + buff, e->collect_group1.forcerebuild); + } +#endif +#endif + + /* Apply to the engine, if requested. */ + if (apply) collectgroup1_apply(&e->collect_group1, e); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} diff --git a/src/engine_fof.c b/src/engine_fof.c new file mode 100644 index 0000000000000000000000000000000000000000..f1bb5b452104642f68b4a9987a1ab8d8e3b0162b --- /dev/null +++ b/src/engine_fof.c @@ -0,0 +1,150 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/** + * @brief Activate all the #gpart communications in preparation + * fof a call to FOF. + * + * @param e The #engine to act on. + */ +void engine_activate_gpart_comms(struct engine *e) { + +#ifdef WITH_MPI + + const ticks tic = getticks(); + + struct scheduler *s = &e->sched; + const int nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + + for (int k = 0; k < nr_tasks; ++k) { + + struct task *t = &tasks[k]; + + if ((t->type == task_type_send) && (t->subtype == task_subtype_gpart)) { + scheduler_activate(s, t); + } else if ((t->type == task_type_recv) && + (t->subtype == task_subtype_gpart)) { + scheduler_activate(s, t); + } else { + t->skip = 1; + } + } + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + +#else + error("Calling an MPI function in non-MPI mode."); +#endif +} + +/** + * @brief Activate all the FOF tasks. + * + * Marks all the other task types to be skipped. + * + * @param e The #engine to act on. + */ +void engine_activate_fof_tasks(struct engine *e) { + + const ticks tic = getticks(); + + struct scheduler *s = &e->sched; + const int nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + + for (int k = 0; k < nr_tasks; k++) { + + struct task *t = &tasks[k]; + + if (t->type == task_type_fof_self || t->type == task_type_fof_pair) + scheduler_activate(s, t); + else + t->skip = 1; + } + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Run a FOF search. + * + * @param e the engine + * @param dump_results Are we writing group catalogues to output files? + * @param seed_black_holes Are we seeding black holes? + */ +void engine_fof(struct engine *e, const int dump_results, + const int seed_black_holes) { + +#ifdef WITH_FOF + + ticks tic = getticks(); + + /* Compute number of DM particles */ + const long long total_nr_baryons = + e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts; + const long long total_nr_dmparts = + e->total_nr_gparts - e->total_nr_DM_background_gparts - total_nr_baryons; + + /* Initialise FOF parameters and allocate FOF arrays. */ + fof_allocate(e->s, total_nr_dmparts, e->fof_properties); + + /* Make FOF tasks */ + engine_make_fof_tasks(e); + + /* and activate them. */ + engine_activate_fof_tasks(e); + + /* Perform local FOF tasks. */ + engine_launch(e); + + /* Perform FOF search over foreign particles and + * find groups which require black hole seeding. */ + fof_search_tree(e->fof_properties, e->black_holes_properties, + e->physical_constants, e->cosmology, e->s, dump_results, + seed_black_holes); + + /* Reset flag. */ + e->run_fof = 0; + + /* Flag that a FOF has taken place */ + e->step_props |= engine_step_prop_fof; + + /* ... and find the next FOF time */ + if (seed_black_holes) engine_compute_next_fof_time(e); + + if (engine_rank == 0) + message("Complete FOF search took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); +#else + error("SWIFT was not compiled with FOF enabled!"); +#endif +} diff --git a/src/engine_redistribute.c b/src/engine_redistribute.c new file mode 100644 index 0000000000000000000000000000000000000000..3132ad2665c67cd244ae1ec9ece75726788c1506 --- /dev/null +++ b/src/engine_redistribute.c @@ -0,0 +1,1031 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "engine.h" + +/* Local headers. */ +#include "memswap.h" + +#ifdef WITH_MPI + +/** + * Do the exchange of one type of particles with all the other nodes. + * + * @param label a label for the memory allocations of this particle type. + * @param counts 2D array with the counts of particles to exchange with + * each other node. + * @param parts the particle data to exchange + * @param new_nr_parts the number of particles this node will have after all + * exchanges have completed. + * @param sizeofparts sizeof the particle struct. + * @param alignsize the memory alignment required for this particle type. + * @param mpi_type the MPI_Datatype for these particles. + * @param nr_nodes the number of nodes to exchange with. + * @param nodeID the id of this node. + * + * @result new particle data constructed from all the exchanges with the + * given alignment. + */ +static void *engine_do_redistribute(const char *label, int *counts, char *parts, + size_t new_nr_parts, size_t sizeofparts, + size_t alignsize, MPI_Datatype mpi_type, + int nr_nodes, int nodeID) { + + /* Allocate a new particle array with some extra margin */ + char *parts_new = NULL; + if (swift_memalign( + label, (void **)&parts_new, alignsize, + sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new particle data."); + + /* Prepare MPI requests for the asynchronous communications */ + MPI_Request *reqs; + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) == + NULL) + error("Failed to allocate MPI request list."); + + /* Only send and receive only "chunk" particles per request. So we need to + * loop as many times as necessary here. Make 2Gb/sizeofparts so we only + * send 2Gb packets. */ + const int chunk = INT_MAX / sizeofparts; + int sent = 0; + int recvd = 0; + + int activenodes = 1; + while (activenodes) { + + for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; + + /* Emit the sends and recvs for the data. */ + size_t offset_send = sent; + size_t offset_recv = recvd; + activenodes = 0; + + for (int k = 0; k < nr_nodes; k++) { + + /* Indices in the count arrays of the node of interest */ + const int ind_send = nodeID * nr_nodes + k; + const int ind_recv = k * nr_nodes + nodeID; + + /* Are we sending any data this loop? */ + int sending = counts[ind_send] - sent; + if (sending > 0) { + activenodes++; + if (sending > chunk) sending = chunk; + + /* If the send and receive is local then just copy. */ + if (k == nodeID) { + int receiving = counts[ind_recv] - recvd; + if (receiving > chunk) receiving = chunk; + memcpy(&parts_new[offset_recv * sizeofparts], + &parts[offset_send * sizeofparts], sizeofparts * receiving); + } else { + /* Otherwise send it. */ + int res = + MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k, + ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]); + if (res != MPI_SUCCESS) + mpi_error(res, "Failed to isend parts to node %i.", k); + } + } + + /* If we're sending to this node, then move past it to next. */ + if (counts[ind_send] > 0) offset_send += counts[ind_send]; + + /* Are we receiving any data from this node? Note already done if coming + * from this node. */ + if (k != nodeID) { + int receiving = counts[ind_recv] - recvd; + if (receiving > 0) { + activenodes++; + if (receiving > chunk) receiving = chunk; + int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving, + mpi_type, k, ind_recv, MPI_COMM_WORLD, + &reqs[2 * k + 1]); + if (res != MPI_SUCCESS) + mpi_error(res, "Failed to emit irecv of parts from node %i.", k); + } + } + + /* If we're receiving from this node, then move past it to next. */ + if (counts[ind_recv] > 0) offset_recv += counts[ind_recv]; + } + + /* Wait for all the sends and recvs to tumble in. */ + MPI_Status stats[2 * nr_nodes]; + int res; + if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 2 * nr_nodes; k++) { + char buff[MPI_MAX_ERROR_STRING]; + MPI_Error_string(stats[k].MPI_ERROR, buff, &res); + message("request from source %i, tag %i has error '%s'.", + stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); + } + error("Failed during waitall for part data."); + } + + /* Move to next chunks. */ + sent += chunk; + recvd += chunk; + } + + /* Free temps. */ + free(reqs); + + /* And return new memory. */ + return parts_new; +} +#endif + +#ifdef WITH_MPI /* redist_mapper */ + +/* Support for engine_redistribute threadpool dest mappers. */ +struct redist_mapper_data { + int *counts; + int *dest; + int nodeID; + int nr_nodes; + struct cell *cells; + struct space *s; + void *base; +}; + +/* Generic function for accumulating counts for TYPE parts. Note + * we use a local counts array to avoid the atomic_add in the parts + * loop. */ +#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE) \ + engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \ + void *extra_data) { \ + struct TYPE *parts = (struct TYPE *)map_data; \ + struct redist_mapper_data *mydata = \ + (struct redist_mapper_data *)extra_data; \ + struct space *s = mydata->s; \ + int *dest = \ + mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base); \ + int *lcounts = NULL; \ + if ((lcounts = (int *)calloc( \ + sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL) \ + error("Failed to allocate counts thread-specific buffer"); \ + for (int k = 0; k < num_elements; k++) { \ + for (int j = 0; j < 3; j++) { \ + if (parts[k].x[j] < 0.0) \ + parts[k].x[j] += s->dim[j]; \ + else if (parts[k].x[j] >= s->dim[j]) \ + parts[k].x[j] -= s->dim[j]; \ + } \ + const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0], \ + parts[k].x[1] * s->iwidth[1], \ + parts[k].x[2] * s->iwidth[2]); \ + dest[k] = s->cells_top[cid].nodeID; \ + size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k]; \ + lcounts[ind] += 1; \ + } \ + for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++) \ + atomic_add(&mydata->counts[k], lcounts[k]); \ + free(lcounts); \ + } + +/** + * @brief Accumulate the counts of particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * part version. + */ +static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part); + +/** + * @brief Accumulate the counts of star particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * spart version. + */ +static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart); + +/** + * @brief Accumulate the counts of gravity particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * gpart version. + */ +static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart); + +/** + * @brief Accumulate the counts of black holes particles per cell. + * Threadpool helper for accumulating the counts of particles per cell. + * + * bpart version. + */ +static void ENGINE_REDISTRIBUTE_DEST_MAPPER(bpart); + +#endif /* redist_mapper_data */ + +#ifdef WITH_MPI /* savelink_mapper_data */ + +/* Support for saving the linkage between gparts and parts/sparts. */ +struct savelink_mapper_data { + int nr_nodes; + int *counts; + void *parts; + int nodeID; +}; + +/** + * @brief Save the offset of each gravity partner of a part or spart. + * + * The offset is from the start of the sorted particles to be sent to a node. + * This is possible as parts without gravity partners have a positive id. + * These offsets are used to restore the pointers on the receiving node. + * + * CHECKS should be eliminated as dead code when optimizing. + */ +#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS) \ + engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \ + void *extra_data) { \ + int *nodes = (int *)map_data; \ + struct savelink_mapper_data *mydata = \ + (struct savelink_mapper_data *)extra_data; \ + int nodeID = mydata->nodeID; \ + int nr_nodes = mydata->nr_nodes; \ + int *counts = mydata->counts; \ + struct TYPE *parts = (struct TYPE *)mydata->parts; \ + \ + for (int j = 0; j < num_elements; j++) { \ + int node = nodes[j]; \ + int count = 0; \ + size_t offset = 0; \ + for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i]; \ + \ + for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) { \ + if (parts[k + offset].gpart != NULL) { \ + if (CHECKS) \ + if (parts[k + offset].gpart->id_or_neg_offset > 0) \ + error("Trying to link a partnerless " #TYPE "!"); \ + parts[k + offset].gpart->id_or_neg_offset = -count; \ + count++; \ + } \ + } \ + } \ + } + +/** + * @brief Save position of part-gpart links. + * Threadpool helper for accumulating the counts of particles per cell. + */ +#ifdef SWIFT_DEBUG_CHECKS +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1); +#else +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0); +#endif + +/** + * @brief Save position of spart-gpart links. + * Threadpool helper for accumulating the counts of particles per cell. + */ +#ifdef SWIFT_DEBUG_CHECKS +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1); +#else +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0); +#endif + +/** + * @brief Save position of bpart-gpart links. + * Threadpool helper for accumulating the counts of particles per cell. + */ +#ifdef SWIFT_DEBUG_CHECKS +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 1); +#else +static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 0); +#endif + +#endif /* savelink_mapper_data */ + +#ifdef WITH_MPI /* relink_mapper_data */ + +/* Support for relinking parts, gparts, sparts and bparts after moving between + * nodes. */ +struct relink_mapper_data { + int nodeID; + int nr_nodes; + int *counts; + int *s_counts; + int *g_counts; + int *b_counts; + struct space *s; +}; + +/** + * @brief Restore the part/gpart and spart/gpart links for a list of nodes. + * + * @param map_data address of nodes to process. + * @param num_elements the number nodes to process. + * @param extra_data additional data defining the context (a + * relink_mapper_data). + */ +static void engine_redistribute_relink_mapper(void *map_data, int num_elements, + void *extra_data) { + + int *nodes = (int *)map_data; + struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data; + + int nodeID = mydata->nodeID; + int nr_nodes = mydata->nr_nodes; + int *counts = mydata->counts; + int *g_counts = mydata->g_counts; + int *s_counts = mydata->s_counts; + int *b_counts = mydata->b_counts; + struct space *s = mydata->s; + + for (int i = 0; i < num_elements; i++) { + + int node = nodes[i]; + + /* Get offsets to correct parts of the counts arrays for this node. */ + size_t offset_parts = 0; + size_t offset_gparts = 0; + size_t offset_sparts = 0; + size_t offset_bparts = 0; + for (int n = 0; n < node; n++) { + int ind_recv = n * nr_nodes + nodeID; + offset_parts += counts[ind_recv]; + offset_gparts += g_counts[ind_recv]; + offset_sparts += s_counts[ind_recv]; + offset_bparts += b_counts[ind_recv]; + } + + /* Number of gparts sent from this node. */ + int ind_recv = node * nr_nodes + nodeID; + const size_t count_gparts = g_counts[ind_recv]; + + /* Loop over the gparts received from this node */ + for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) { + + /* Does this gpart have a gas partner ? */ + if (s->gparts[k].type == swift_type_gas) { + + const ptrdiff_t partner_index = + offset_parts - s->gparts[k].id_or_neg_offset; + + /* Re-link */ + s->gparts[k].id_or_neg_offset = -partner_index; + s->parts[partner_index].gpart = &s->gparts[k]; + } + + /* Does this gpart have a star partner ? */ + else if (s->gparts[k].type == swift_type_stars) { + + const ptrdiff_t partner_index = + offset_sparts - s->gparts[k].id_or_neg_offset; + + /* Re-link */ + s->gparts[k].id_or_neg_offset = -partner_index; + s->sparts[partner_index].gpart = &s->gparts[k]; + } + + /* Does this gpart have a black hole partner ? */ + else if (s->gparts[k].type == swift_type_black_hole) { + + const ptrdiff_t partner_index = + offset_bparts - s->gparts[k].id_or_neg_offset; + + /* Re-link */ + s->gparts[k].id_or_neg_offset = -partner_index; + s->bparts[partner_index].gpart = &s->gparts[k]; + } + } + } +} + +#endif /* relink_mapper_data */ + +/** + * @brief Redistribute the particles amongst the nodes according + * to their cell's node IDs. + * + * The strategy here is as follows: + * 1) Each node counts the number of particles it has to send to each other + * node. + * 2) The number of particles of each type is then exchanged. + * 3) The particles to send are placed in a temporary buffer in which the + * part-gpart links are preserved. + * 4) Each node allocates enough space for the new particles. + * 5) (Asynchronous) communications are issued to transfer the data. + * + * + * @param e The #engine. + */ +void engine_redistribute(struct engine *e) { + +#ifdef WITH_MPI + + const int nr_nodes = e->nr_nodes; + const int nodeID = e->nodeID; + struct space *s = e->s; + struct cell *cells = s->cells_top; + const int nr_cells = s->nr_cells; + struct xpart *xparts = s->xparts; + struct part *parts = s->parts; + struct gpart *gparts = s->gparts; + struct spart *sparts = s->sparts; + struct bpart *bparts = s->bparts; + ticks tic = getticks(); + + size_t nr_parts = s->nr_parts; + size_t nr_gparts = s->nr_gparts; + size_t nr_sparts = s->nr_sparts; + size_t nr_bparts = s->nr_bparts; + + /* Start by moving inhibited particles to the end of the arrays */ + for (size_t k = 0; k < nr_parts; /* void */) { + if (parts[k].time_bin == time_bin_inhibited || + parts[k].time_bin == time_bin_not_created) { + nr_parts -= 1; + + /* Swap the particle */ + memswap(&parts[k], &parts[nr_parts], sizeof(struct part)); + + /* Swap the xpart */ + memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart)); + + /* Swap the link with the gpart */ + if (parts[k].gpart != NULL) { + parts[k].gpart->id_or_neg_offset = -k; + } + if (parts[nr_parts].gpart != NULL) { + parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; + } + } else { + k++; + } + } + + /* Now move inhibited star particles to the end of the arrays */ + for (size_t k = 0; k < nr_sparts; /* void */) { + if (sparts[k].time_bin == time_bin_inhibited || + sparts[k].time_bin == time_bin_not_created) { + nr_sparts -= 1; + + /* Swap the particle */ + memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart)); + + /* Swap the link with the gpart */ + if (s->sparts[k].gpart != NULL) { + s->sparts[k].gpart->id_or_neg_offset = -k; + } + if (s->sparts[nr_sparts].gpart != NULL) { + s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts; + } + } else { + k++; + } + } + + /* Now move inhibited black hole particles to the end of the arrays */ + for (size_t k = 0; k < nr_bparts; /* void */) { + if (bparts[k].time_bin == time_bin_inhibited || + bparts[k].time_bin == time_bin_not_created) { + nr_bparts -= 1; + + /* Swap the particle */ + memswap(&s->bparts[k], &s->bparts[nr_bparts], sizeof(struct bpart)); + + /* Swap the link with the gpart */ + if (s->bparts[k].gpart != NULL) { + s->bparts[k].gpart->id_or_neg_offset = -k; + } + if (s->bparts[nr_bparts].gpart != NULL) { + s->bparts[nr_bparts].gpart->id_or_neg_offset = -nr_bparts; + } + } else { + k++; + } + } + + /* Finally do the same with the gravity particles */ + for (size_t k = 0; k < nr_gparts; /* void */) { + if (gparts[k].time_bin == time_bin_inhibited || + gparts[k].time_bin == time_bin_not_created) { + nr_gparts -= 1; + + /* Swap the particle */ + memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart)); + + /* Swap the link with part/spart */ + if (s->gparts[k].type == swift_type_gas) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_stars) { + s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_black_hole) { + s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } + + if (s->gparts[nr_gparts].type == swift_type_gas) { + s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } else if (s->gparts[nr_gparts].type == swift_type_stars) { + s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } else if (s->gparts[nr_gparts].type == swift_type_black_hole) { + s->bparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } + } else { + k++; + } + } + + /* Now we are ready to deal with real particles and can start the exchange. */ + + /* Allocate temporary arrays to store the counts of particles to be sent + * and the destination of each particle */ + int *counts; + if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate counts temporary buffer."); + + int *dest; + if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL) + error("Failed to allocate dest temporary buffer."); + + /* Simple index of node IDs, used for mappers over nodes. */ + int *nodes = NULL; + if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL) + error("Failed to allocate nodes temporary buffer."); + for (int k = 0; k < nr_nodes; k++) nodes[k] = k; + + /* Get destination of each particle */ + struct redist_mapper_data redist_data; + redist_data.s = s; + redist_data.nodeID = nodeID; + redist_data.nr_nodes = nr_nodes; + + redist_data.counts = counts; + redist_data.dest = dest; + redist_data.base = (void *)parts; + + threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts, + nr_parts, sizeof(struct part), 0, &redist_data); + + /* Sort the particles according to their cell index. */ + if (nr_parts > 0) + space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes], + nr_nodes, 0); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the part have been sorted correctly. */ + for (size_t k = 0; k < nr_parts; k++) { + const struct part *p = &s->parts[k]; + + if (p->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (p->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1], + p->x[2] * s->iwidth[2]); + + /* New cell of this part */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (dest[k] != new_node) + error("part's new node index not matching sorted index."); + + if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] || + p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] || + p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2]) + error("part not sorted into the right top-level cell!"); + } +#endif + + /* We will need to re-link the gpart partners of parts, so save their + * relative positions in the sent lists. */ + if (nr_parts > 0 && nr_gparts > 0) { + + struct savelink_mapper_data savelink_data; + savelink_data.nr_nodes = nr_nodes; + savelink_data.counts = counts; + savelink_data.parts = (void *)parts; + savelink_data.nodeID = nodeID; + threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part, + nodes, nr_nodes, sizeof(int), 0, &savelink_data); + } + swift_free("dest", dest); + + /* Get destination of each s-particle */ + int *s_counts; + if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate s_counts temporary buffer."); + + int *s_dest; + if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL) + error("Failed to allocate s_dest temporary buffer."); + + redist_data.counts = s_counts; + redist_data.dest = s_dest; + redist_data.base = (void *)sparts; + + threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts, + nr_sparts, sizeof(struct spart), 0, &redist_data); + + /* Sort the particles according to their cell index. */ + if (nr_sparts > 0) + space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes, + 0); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the spart have been sorted correctly. */ + for (size_t k = 0; k < nr_sparts; k++) { + const struct spart *sp = &s->sparts[k]; + + if (sp->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (sp->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1], + sp->x[2] * s->iwidth[2]); + + /* New cell of this spart */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (s_dest[k] != new_node) + error("spart's new node index not matching sorted index."); + + if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] || + sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] || + sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2]) + error("spart not sorted into the right top-level cell!"); + } +#endif + + /* We need to re-link the gpart partners of sparts. */ + if (nr_sparts > 0) { + + struct savelink_mapper_data savelink_data; + savelink_data.nr_nodes = nr_nodes; + savelink_data.counts = s_counts; + savelink_data.parts = (void *)sparts; + savelink_data.nodeID = nodeID; + threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart, + nodes, nr_nodes, sizeof(int), 0, &savelink_data); + } + swift_free("s_dest", s_dest); + + /* Get destination of each b-particle */ + int *b_counts; + if ((b_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate b_counts temporary buffer."); + + int *b_dest; + if ((b_dest = (int *)swift_malloc("b_dest", sizeof(int) * nr_bparts)) == NULL) + error("Failed to allocate b_dest temporary buffer."); + + redist_data.counts = b_counts; + redist_data.dest = b_dest; + redist_data.base = (void *)bparts; + + threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_bpart, bparts, + nr_bparts, sizeof(struct bpart), 0, &redist_data); + + /* Sort the particles according to their cell index. */ + if (nr_bparts > 0) + space_bparts_sort(s->bparts, b_dest, &b_counts[nodeID * nr_nodes], nr_nodes, + 0); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the bpart have been sorted correctly. */ + for (size_t k = 0; k < nr_bparts; k++) { + const struct bpart *bp = &s->bparts[k]; + + if (bp->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (bp->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, bp->x[0] * s->iwidth[0], bp->x[1] * s->iwidth[1], + bp->x[2] * s->iwidth[2]); + + /* New cell of this bpart */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (b_dest[k] != new_node) + error("bpart's new node index not matching sorted index."); + + if (bp->x[0] < c->loc[0] || bp->x[0] > c->loc[0] + c->width[0] || + bp->x[1] < c->loc[1] || bp->x[1] > c->loc[1] + c->width[1] || + bp->x[2] < c->loc[2] || bp->x[2] > c->loc[2] + c->width[2]) + error("bpart not sorted into the right top-level cell!"); + } +#endif + + /* We need to re-link the gpart partners of bparts. */ + if (nr_bparts > 0) { + + struct savelink_mapper_data savelink_data; + savelink_data.nr_nodes = nr_nodes; + savelink_data.counts = b_counts; + savelink_data.parts = (void *)bparts; + savelink_data.nodeID = nodeID; + threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart, + nodes, nr_nodes, sizeof(int), 0, &savelink_data); + } + swift_free("b_dest", b_dest); + + /* Get destination of each g-particle */ + int *g_counts; + if ((g_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate g_gcount temporary buffer."); + + int *g_dest; + if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL) + error("Failed to allocate g_dest temporary buffer."); + + redist_data.counts = g_counts; + redist_data.dest = g_dest; + redist_data.base = (void *)gparts; + + threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts, + nr_gparts, sizeof(struct gpart), 0, &redist_data); + + /* Sort the gparticles according to their cell index. */ + if (nr_gparts > 0) + space_gparts_sort(s->gparts, s->parts, s->sparts, s->bparts, g_dest, + &g_counts[nodeID * nr_nodes], nr_nodes); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the gpart have been sorted correctly. */ + for (size_t k = 0; k < nr_gparts; k++) { + const struct gpart *gp = &s->gparts[k]; + + if (gp->time_bin == time_bin_inhibited) + error("Inhibited particle found after sorting!"); + + if (gp->time_bin == time_bin_not_created) + error("Inhibited particle found after sorting!"); + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1], + gp->x[2] * s->iwidth[2]); + + /* New cell of this gpart */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (g_dest[k] != new_node) + error("gpart's new node index not matching sorted index (%d != %d).", + g_dest[k], new_node); + + if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] || + gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] || + gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2]) + error("gpart not sorted into the right top-level cell!"); + } +#endif + + swift_free("g_dest", g_dest); + + /* Get all the counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce particle transfer counts."); + + /* Get all the g_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce gparticle transfer counts."); + + /* Get all the s_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce sparticle transfer counts."); + + /* Get all the b_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, b_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce bparticle transfer counts."); + + /* Report how many particles will be moved. */ + if (e->verbose) { + if (e->nodeID == 0) { + size_t total = 0, g_total = 0, s_total = 0, b_total = 0; + size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0, b_unmoved = 0; + for (int p = 0, r = 0; p < nr_nodes; p++) { + for (int n = 0; n < nr_nodes; n++) { + total += counts[r]; + g_total += g_counts[r]; + s_total += s_counts[r]; + b_total += b_counts[r]; + if (p == n) { + unmoved += counts[r]; + g_unmoved += g_counts[r]; + s_unmoved += s_counts[r]; + b_unmoved += b_counts[r]; + } + r++; + } + } + if (total > 0) + message("%zu of %zu (%.2f%%) of particles moved", total - unmoved, + total, 100.0 * (double)(total - unmoved) / (double)total); + if (g_total > 0) + message("%zu of %zu (%.2f%%) of g-particles moved", g_total - g_unmoved, + g_total, + 100.0 * (double)(g_total - g_unmoved) / (double)g_total); + if (s_total > 0) + message("%zu of %zu (%.2f%%) of s-particles moved", s_total - s_unmoved, + s_total, + 100.0 * (double)(s_total - s_unmoved) / (double)s_total); + if (b_total > 0) + message("%ld of %ld (%.2f%%) of b-particles moved", b_total - b_unmoved, + b_total, + 100.0 * (double)(b_total - b_unmoved) / (double)b_total); + } + } + + /* Now each node knows how many parts, sparts, bparts, and gparts will be + * transferred to every other node. Get the new numbers of particles for this + * node. */ + size_t nr_parts_new = 0, nr_gparts_new = 0, nr_sparts_new = 0, + nr_bparts_new = 0; + for (int k = 0; k < nr_nodes; k++) + nr_parts_new += counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_gparts_new += g_counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_sparts_new += s_counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_bparts_new += b_counts[k * nr_nodes + nodeID]; + + /* Now exchange the particles, type by type to keep the memory required + * under control. */ + + /* SPH particles. */ + void *new_parts = engine_do_redistribute( + "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part), + part_align, part_mpi_type, nr_nodes, nodeID); + swift_free("parts", s->parts); + s->parts = (struct part *)new_parts; + s->nr_parts = nr_parts_new; + s->size_parts = engine_redistribute_alloc_margin * nr_parts_new; + + /* Extra SPH particle properties. */ + new_parts = engine_do_redistribute( + "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart), + xpart_align, xpart_mpi_type, nr_nodes, nodeID); + swift_free("xparts", s->xparts); + s->xparts = (struct xpart *)new_parts; + + /* Gravity particles. */ + new_parts = engine_do_redistribute( + "gparts", g_counts, (char *)s->gparts, nr_gparts_new, + sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID); + swift_free("gparts", s->gparts); + s->gparts = (struct gpart *)new_parts; + s->nr_gparts = nr_gparts_new; + s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new; + + /* Star particles. */ + new_parts = engine_do_redistribute( + "sparts", s_counts, (char *)s->sparts, nr_sparts_new, + sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID); + swift_free("sparts", s->sparts); + s->sparts = (struct spart *)new_parts; + s->nr_sparts = nr_sparts_new; + s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new; + + /* Black holes particles. */ + new_parts = engine_do_redistribute( + "bparts", b_counts, (char *)s->bparts, nr_bparts_new, + sizeof(struct bpart), bpart_align, bpart_mpi_type, nr_nodes, nodeID); + swift_free("bparts", s->bparts); + s->bparts = (struct bpart *)new_parts; + s->nr_bparts = nr_bparts_new; + s->size_bparts = engine_redistribute_alloc_margin * nr_bparts_new; + + /* All particles have now arrived. Time for some final operations on the + stuff we just received */ + + /* Restore the part<->gpart and spart<->gpart links. + * Generate indices and counts for threadpool tasks. Note we process a node + * at a time. */ + struct relink_mapper_data relink_data; + relink_data.s = s; + relink_data.counts = counts; + relink_data.g_counts = g_counts; + relink_data.s_counts = s_counts; + relink_data.b_counts = b_counts; + relink_data.nodeID = nodeID; + relink_data.nr_nodes = nr_nodes; + + threadpool_map(&e->threadpool, engine_redistribute_relink_mapper, nodes, + nr_nodes, sizeof(int), 1, &relink_data); + free(nodes); + + /* Clean up the counts now we are done. */ + free(counts); + free(g_counts); + free(s_counts); + free(b_counts); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that all parts are in the right place. */ + for (size_t k = 0; k < nr_parts_new; k++) { + const int cid = cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0], + s->parts[k].x[1] * s->iwidth[1], + s->parts[k].x[2] * s->iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received particle (%zu) that does not belong here (nodeID=%i).", k, + cells[cid].nodeID); + } + for (size_t k = 0; k < nr_gparts_new; k++) { + const int cid = cell_getid(s->cdim, s->gparts[k].x[0] * s->iwidth[0], + s->gparts[k].x[1] * s->iwidth[1], + s->gparts[k].x[2] * s->iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received g-particle (%zu) that does not belong here (nodeID=%i).", + k, cells[cid].nodeID); + } + for (size_t k = 0; k < nr_sparts_new; k++) { + const int cid = cell_getid(s->cdim, s->sparts[k].x[0] * s->iwidth[0], + s->sparts[k].x[1] * s->iwidth[1], + s->sparts[k].x[2] * s->iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received s-particle (%zu) that does not belong here (nodeID=%i).", + k, cells[cid].nodeID); + } + for (size_t k = 0; k < nr_bparts_new; k++) { + const int cid = cell_getid(s->cdim, s->bparts[k].x[0] * s->iwidth[0], + s->bparts[k].x[1] * s->iwidth[1], + s->bparts[k].x[2] * s->iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received b-particle (%zu) that does not belong here (nodeID=%i).", + k, cells[cid].nodeID); + } + + /* Verify that the links are correct */ + part_verify_links(s->parts, s->gparts, s->sparts, s->bparts, nr_parts_new, + nr_gparts_new, nr_sparts_new, nr_bparts_new, e->verbose); + +#endif + + /* Be verbose about what just happened. */ + if (e->verbose) { + int my_cells = 0; + for (int k = 0; k < nr_cells; k++) + if (cells[k].nodeID == nodeID) my_cells += 1; + message( + "node %i now has %zu parts, %zu sparts, %zu bparts and %zu gparts in " + "%i cells.", + nodeID, nr_parts_new, nr_sparts_new, nr_bparts_new, nr_gparts_new, + my_cells); + } + + /* Flag that we do not have any extra particles any more */ + s->nr_extra_parts = 0; + s->nr_extra_gparts = 0; + s->nr_extra_sparts = 0; + s->nr_extra_bparts = 0; + + /* Flag that a redistribute has taken place */ + e->step_props |= engine_step_prop_redistribute; + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +#else + error("SWIFT was not compiled with MPI support."); +#endif +} diff --git a/src/runner.c b/src/runner.c deleted file mode 100644 index 38c31971555a16e01f6b5f3d056a018ee2c299a2..0000000000000000000000000000000000000000 --- a/src/runner.c +++ /dev/null @@ -1,4885 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) - * 2016 John A. Regan (john.a.regan@durham.ac.uk) - * Tom Theuns (tom.theuns@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ - -/* Config parameters. */ -#include "../config.h" - -/* Some standard headers. */ -#include <float.h> -#include <limits.h> -#include <stdlib.h> - -/* MPI headers. */ -#ifdef WITH_MPI -#include <mpi.h> -#endif - -/* This object's header. */ -#include "runner.h" - -/* Local headers. */ -#include "active.h" -#include "approx_math.h" -#include "atomic.h" -#include "black_holes.h" -#include "black_holes_properties.h" -#include "cell.h" -#include "chemistry.h" -#include "const.h" -#include "cooling.h" -#include "debug.h" -#include "drift.h" -#include "engine.h" -#include "entropy_floor.h" -#include "error.h" -#include "feedback.h" -#include "gravity.h" -#include "hydro.h" -#include "hydro_properties.h" -#include "kick.h" -#include "logger.h" -#include "memuse.h" -#include "minmax.h" -#include "pressure_floor.h" -#include "pressure_floor_iact.h" -#include "runner_doiact_vec.h" -#include "scheduler.h" -#include "sort_part.h" -#include "space.h" -#include "space_getsid.h" -#include "star_formation.h" -#include "star_formation_logger.h" -#include "stars.h" -#include "task.h" -#include "timers.h" -#include "timestep.h" -#include "timestep_limiter.h" -#include "tracers.h" - -/* Unique identifier of loop types */ -#define TASK_LOOP_DENSITY 0 -#define TASK_LOOP_GRADIENT 1 -#define TASK_LOOP_FORCE 2 -#define TASK_LOOP_LIMITER 3 -#define TASK_LOOP_FEEDBACK 4 -#define TASK_LOOP_SWALLOW 5 - -/* Import the density loop functions. */ -#define FUNCTION density -#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY -#include "runner_doiact.h" -#undef FUNCTION -#undef FUNCTION_TASK_LOOP - -/* Import the gradient loop functions (if required). */ -#ifdef EXTRA_HYDRO_LOOP -#define FUNCTION gradient -#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT -#include "runner_doiact.h" -#undef FUNCTION -#undef FUNCTION_TASK_LOOP -#endif - -/* Import the force loop functions. */ -#define FUNCTION force -#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE -#include "runner_doiact.h" -#undef FUNCTION -#undef FUNCTION_TASK_LOOP - -/* Import the limiter loop functions. */ -#define FUNCTION limiter -#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER -#include "runner_doiact.h" -#undef FUNCTION -#undef FUNCTION_TASK_LOOP - -/* Import the gravity loop functions. */ -#include "runner_doiact_grav.h" - -/* Import the stars density loop functions. */ -#define FUNCTION density -#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY -#include "runner_doiact_stars.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/* Import the stars feedback loop functions. */ -#define FUNCTION feedback -#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK -#include "runner_doiact_stars.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/* Import the black hole density loop functions. */ -#define FUNCTION density -#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY -#include "runner_doiact_black_holes.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/* Import the black hole feedback loop functions. */ -#define FUNCTION swallow -#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW -#include "runner_doiact_black_holes.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/* Import the black hole feedback loop functions. */ -#define FUNCTION feedback -#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK -#include "runner_doiact_black_holes.h" -#undef FUNCTION_TASK_LOOP -#undef FUNCTION - -/** - * @brief Intermediate task after the density to check that the smoothing - * lengths are correct. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { - - struct spart *restrict sparts = c->stars.parts; - const struct engine *e = r->e; - const struct unit_system *us = e->internal_units; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const struct cosmology *cosmo = e->cosmology; - const struct feedback_props *feedback_props = e->feedback_props; - const float stars_h_max = e->hydro_properties->h_max; - const float stars_h_min = e->hydro_properties->h_min; - const float eps = e->stars_properties->h_tolerance; - const float stars_eta_dim = - pow_dimension(e->stars_properties->eta_neighbours); - const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations; - int redo = 0, scount = 0; - - /* Running value of the maximal smoothing length */ - double h_max = c->stars.h_max; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != e->nodeID) - error("Running the star ghost on a foreign node!"); -#endif - - /* Anything to do here? */ - if (c->stars.count == 0) return; - if (!cell_is_active_stars(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - runner_do_stars_ghost(r, c->progeny[k], 0); - - /* Update h_max */ - h_max = max(h_max, c->progeny[k]->stars.h_max); - } - } - } else { - - /* Init the list of active particles that have to be updated. */ - int *sid = NULL; - float *h_0 = NULL; - float *left = NULL; - float *right = NULL; - if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL) - error("Can't allocate memory for sid."); - if ((h_0 = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) - error("Can't allocate memory for h_0."); - if ((left = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) - error("Can't allocate memory for left."); - if ((right = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) - error("Can't allocate memory for right."); - for (int k = 0; k < c->stars.count; k++) - if (spart_is_active(&sparts[k], e) && - feedback_is_active(&sparts[k], e->time, cosmo, with_cosmology)) { - sid[scount] = k; - h_0[scount] = sparts[k].h; - left[scount] = 0.f; - right[scount] = stars_h_max; - ++scount; - } - - /* While there are particles that need to be updated... */ - for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter; - num_reruns++) { - - /* Reset the redo-count. */ - redo = 0; - - /* Loop over the remaining active parts in this cell. */ - for (int i = 0; i < scount; i++) { - - /* Get a direct pointer on the part. */ - struct spart *sp = &sparts[sid[i]]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Is this part within the timestep? */ - if (!spart_is_active(sp, e)) - error("Ghost applied to inactive particle"); -#endif - - /* Get some useful values */ - const float h_init = h_0[i]; - const float h_old = sp->h; - const float h_old_dim = pow_dimension(h_old); - const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); - - float h_new; - int has_no_neighbours = 0; - - if (sp->density.wcount == 0.f) { /* No neighbours case */ - - /* Flag that there were no neighbours */ - has_no_neighbours = 1; - - /* Double h and try again */ - h_new = 2.f * h_old; - - } else { - - /* Finish the density calculation */ - stars_end_density(sp, cosmo); - - /* Compute one step of the Newton-Raphson scheme */ - const float n_sum = sp->density.wcount * h_old_dim; - const float n_target = stars_eta_dim; - const float f = n_sum - n_target; - const float f_prime = - sp->density.wcount_dh * h_old_dim + - hydro_dimension * sp->density.wcount * h_old_dim_minus_one; - - /* Improve the bisection bounds */ - if (n_sum < n_target) - left[i] = max(left[i], h_old); - else if (n_sum > n_target) - right[i] = min(right[i], h_old); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check the validity of the left and right bounds */ - if (left[i] > right[i]) - error("Invalid left (%e) and right (%e)", left[i], right[i]); -#endif - - /* Skip if h is already h_max and we don't have enough neighbours */ - /* Same if we are below h_min */ - if (((sp->h >= stars_h_max) && (f < 0.f)) || - ((sp->h <= stars_h_min) && (f > 0.f))) { - - stars_reset_feedback(sp); - - /* Only do feedback if stars have a reasonable birth time */ - if (feedback_do_feedback(sp)) { - - const integertime_t ti_step = get_integer_timestep(sp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(e->ti_current - 1, sp->time_bin); - - /* Get particle time-step */ - double dt; - if (with_cosmology) { - dt = cosmology_get_delta_time(e->cosmology, ti_begin, - ti_begin + ti_step); - } else { - dt = get_timestep(sp->time_bin, e->time_base); - } - - /* Calculate age of the star at current time */ - double star_age_end_of_step; - if (with_cosmology) { - star_age_end_of_step = - cosmology_get_delta_time_from_scale_factors( - cosmo, (double)sp->birth_scale_factor, cosmo->a); - } else { - star_age_end_of_step = (float)e->time - sp->birth_time; - } - - /* Has this star been around for a while ? */ - if (star_age_end_of_step > 0.) { - - /* Age of the star at the start of the step */ - const double star_age_beg_of_step = - max(star_age_end_of_step - dt, 0.); - - /* Compute the stellar evolution */ - feedback_evolve_spart(sp, feedback_props, cosmo, us, - star_age_beg_of_step, dt); - } else { - - /* Reset the feedback fields of the star particle */ - feedback_reset_feedback(sp, feedback_props); - } - } else { - - feedback_reset_feedback(sp, feedback_props); - } - - /* Ok, we are done with this particle */ - continue; - } - - /* Normal case: Use Newton-Raphson to get a better value of h */ - - /* Avoid floating point exception from f_prime = 0 */ - h_new = h_old - f / (f_prime + FLT_MIN); - - /* Be verbose about the particles that struggle to converge */ - if (num_reruns > max_smoothing_iter - 10) { - - message( - "Smoothing length convergence problem: iter=%d p->id=%lld " - "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " - "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", - num_reruns, sp->id, h_init, h_old, h_new, f, f_prime, n_sum, - n_target, left[i], right[i]); - } - - /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ - h_new = min(h_new, 2.f * h_old); - h_new = max(h_new, 0.5f * h_old); - - /* Verify that we are actually progrssing towards the answer */ - h_new = max(h_new, left[i]); - h_new = min(h_new, right[i]); - } - - /* Check whether the particle has an inappropriate smoothing length */ - if (fabsf(h_new - h_old) > eps * h_old) { - - /* Ok, correct then */ - - /* Case where we have been oscillating around the solution */ - if ((h_new == left[i] && h_old == right[i]) || - (h_old == left[i] && h_new == right[i])) { - - /* Bissect the remaining interval */ - sp->h = pow_inv_dimension( - 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); - - } else { - - /* Normal case */ - sp->h = h_new; - } - - /* If below the absolute maximum, try again */ - if (sp->h < stars_h_max && sp->h > stars_h_min) { - - /* Flag for another round of fun */ - sid[redo] = sid[i]; - h_0[redo] = h_0[i]; - left[redo] = left[i]; - right[redo] = right[i]; - redo += 1; - - /* Re-initialise everything */ - stars_init_spart(sp); - feedback_init_spart(sp); - - /* Off we go ! */ - continue; - - } else if (sp->h <= stars_h_min) { - - /* Ok, this particle is a lost cause... */ - sp->h = stars_h_min; - - } else if (sp->h >= stars_h_max) { - - /* Ok, this particle is a lost cause... */ - sp->h = stars_h_max; - - /* Do some damage control if no neighbours at all were found */ - if (has_no_neighbours) { - stars_spart_has_no_neighbours(sp, cosmo); - } - - } else { - error( - "Fundamental problem with the smoothing length iteration " - "logic."); - } - } - - /* We now have a particle whose smoothing length has converged */ - - /* Check if h_max has increased */ - h_max = max(h_max, sp->h); - - stars_reset_feedback(sp); - - /* Only do feedback if stars have a reasonable birth time */ - if (feedback_do_feedback(sp)) { - - const integertime_t ti_step = get_integer_timestep(sp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(e->ti_current - 1, sp->time_bin); - - /* Get particle time-step */ - double dt; - if (with_cosmology) { - dt = cosmology_get_delta_time(e->cosmology, ti_begin, - ti_begin + ti_step); - } else { - dt = get_timestep(sp->time_bin, e->time_base); - } - - /* Calculate age of the star at current time */ - double star_age_end_of_step; - if (with_cosmology) { - star_age_end_of_step = cosmology_get_delta_time_from_scale_factors( - cosmo, sp->birth_scale_factor, (float)cosmo->a); - } else { - star_age_end_of_step = (float)e->time - sp->birth_time; - } - - /* Has this star been around for a while ? */ - if (star_age_end_of_step > 0.) { - - /* Age of the star at the start of the step */ - const double star_age_beg_of_step = - max(star_age_end_of_step - dt, 0.); - - /* Compute the stellar evolution */ - feedback_evolve_spart(sp, feedback_props, cosmo, us, - star_age_beg_of_step, dt); - } else { - - /* Reset the feedback fields of the star particle */ - feedback_reset_feedback(sp, feedback_props); - } - } else { - - /* Reset the feedback fields of the star particle */ - feedback_reset_feedback(sp, feedback_props); - } - } - - /* We now need to treat the particles whose smoothing length had not - * converged again */ - - /* Re-set the counter for the next loop (potentially). */ - scount = redo; - if (scount > 0) { - - /* Climb up the cell hierarchy. */ - for (struct cell *finger = c; finger != NULL; finger = finger->parent) { - - /* Run through this cell's density interactions. */ - for (struct link *l = finger->stars.density; l != NULL; l = l->next) { - -#ifdef SWIFT_DEBUG_CHECKS - if (l->t->ti_run < r->e->ti_current) - error("Density task should have been run."); -#endif - - /* Self-interaction? */ - if (l->t->type == task_type_self) - runner_doself_subset_branch_stars_density(r, finger, sparts, sid, - scount); - - /* Otherwise, pair interaction? */ - else if (l->t->type == task_type_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dopair_subset_branch_stars_density( - r, finger, sparts, sid, scount, l->t->cj); - else - runner_dopair_subset_branch_stars_density( - r, finger, sparts, sid, scount, l->t->ci); - } - - /* Otherwise, sub-self interaction? */ - else if (l->t->type == task_type_sub_self) - runner_dosub_subset_stars_density(r, finger, sparts, sid, scount, - NULL, 1); - - /* Otherwise, sub-pair interaction? */ - else if (l->t->type == task_type_sub_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dosub_subset_stars_density(r, finger, sparts, sid, - scount, l->t->cj, 1); - else - runner_dosub_subset_stars_density(r, finger, sparts, sid, - scount, l->t->ci, 1); - } - } - } - } - } - - if (scount) { - error("Smoothing length failed to converge on %i particles.", scount); - } - - /* Be clean */ - free(left); - free(right); - free(sid); - free(h_0); - } - - /* Update h_max */ - c->stars.h_max = h_max; - - /* The ghost may not always be at the top level. - * Therefore we need to update h_max between the super- and top-levels */ - if (c->stars.ghost) { - for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->stars.h_max, h_max); - } - } - - if (timer) TIMER_TOC(timer_do_stars_ghost); -} - -/** - * @brief Intermediate task after the density to check that the smoothing - * lengths are correct. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c, - int timer) { - - struct bpart *restrict bparts = c->black_holes.parts; - const struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const float black_holes_h_max = e->hydro_properties->h_max; - const float black_holes_h_min = e->hydro_properties->h_min; - const float eps = e->black_holes_properties->h_tolerance; - const float black_holes_eta_dim = - pow_dimension(e->black_holes_properties->eta_neighbours); - const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; - int redo = 0, bcount = 0; - - /* Running value of the maximal smoothing length */ - double h_max = c->black_holes.h_max; - - TIMER_TIC; - - /* Anything to do here? */ - if (c->black_holes.count == 0) return; - if (!cell_is_active_black_holes(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - runner_do_black_holes_density_ghost(r, c->progeny[k], 0); - - /* Update h_max */ - h_max = max(h_max, c->progeny[k]->black_holes.h_max); - } - } - } else { - - /* Init the list of active particles that have to be updated. */ - int *sid = NULL; - float *h_0 = NULL; - float *left = NULL; - float *right = NULL; - if ((sid = (int *)malloc(sizeof(int) * c->black_holes.count)) == NULL) - error("Can't allocate memory for sid."); - if ((h_0 = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) - error("Can't allocate memory for h_0."); - if ((left = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) - error("Can't allocate memory for left."); - if ((right = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) - error("Can't allocate memory for right."); - for (int k = 0; k < c->black_holes.count; k++) - if (bpart_is_active(&bparts[k], e)) { - sid[bcount] = k; - h_0[bcount] = bparts[k].h; - left[bcount] = 0.f; - right[bcount] = black_holes_h_max; - ++bcount; - } - - /* While there are particles that need to be updated... */ - for (int num_reruns = 0; bcount > 0 && num_reruns < max_smoothing_iter; - num_reruns++) { - - /* Reset the redo-count. */ - redo = 0; - - /* Loop over the remaining active parts in this cell. */ - for (int i = 0; i < bcount; i++) { - - /* Get a direct pointer on the part. */ - struct bpart *bp = &bparts[sid[i]]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Is this part within the timestep? */ - if (!bpart_is_active(bp, e)) - error("Ghost applied to inactive particle"); -#endif - - /* Get some useful values */ - const float h_init = h_0[i]; - const float h_old = bp->h; - const float h_old_dim = pow_dimension(h_old); - const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); - - float h_new; - int has_no_neighbours = 0; - - if (bp->density.wcount == 0.f) { /* No neighbours case */ - - /* Flag that there were no neighbours */ - has_no_neighbours = 1; - - /* Double h and try again */ - h_new = 2.f * h_old; - - } else { - - /* Finish the density calculation */ - black_holes_end_density(bp, cosmo); - - /* Compute one step of the Newton-Raphson scheme */ - const float n_sum = bp->density.wcount * h_old_dim; - const float n_target = black_holes_eta_dim; - const float f = n_sum - n_target; - const float f_prime = - bp->density.wcount_dh * h_old_dim + - hydro_dimension * bp->density.wcount * h_old_dim_minus_one; - - /* Improve the bisection bounds */ - if (n_sum < n_target) - left[i] = max(left[i], h_old); - else if (n_sum > n_target) - right[i] = min(right[i], h_old); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check the validity of the left and right bounds */ - if (left[i] > right[i]) - error("Invalid left (%e) and right (%e)", left[i], right[i]); -#endif - - /* Skip if h is already h_max and we don't have enough neighbours */ - /* Same if we are below h_min */ - if (((bp->h >= black_holes_h_max) && (f < 0.f)) || - ((bp->h <= black_holes_h_min) && (f > 0.f))) { - - black_holes_reset_feedback(bp); - - /* Ok, we are done with this particle */ - continue; - } - - /* Normal case: Use Newton-Raphson to get a better value of h */ - - /* Avoid floating point exception from f_prime = 0 */ - h_new = h_old - f / (f_prime + FLT_MIN); - - /* Be verbose about the particles that struggle to converge */ - if (num_reruns > max_smoothing_iter - 10) { - - message( - "Smoothing length convergence problem: iter=%d p->id=%lld " - "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " - "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", - num_reruns, bp->id, h_init, h_old, h_new, f, f_prime, n_sum, - n_target, left[i], right[i]); - } - - /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ - h_new = min(h_new, 2.f * h_old); - h_new = max(h_new, 0.5f * h_old); - - /* Verify that we are actually progrssing towards the answer */ - h_new = max(h_new, left[i]); - h_new = min(h_new, right[i]); - } - - /* Check whether the particle has an inappropriate smoothing length */ - if (fabsf(h_new - h_old) > eps * h_old) { - - /* Ok, correct then */ - - /* Case where we have been oscillating around the solution */ - if ((h_new == left[i] && h_old == right[i]) || - (h_old == left[i] && h_new == right[i])) { - - /* Bissect the remaining interval */ - bp->h = pow_inv_dimension( - 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); - - } else { - - /* Normal case */ - bp->h = h_new; - } - - /* If below the absolute maximum, try again */ - if (bp->h < black_holes_h_max && bp->h > black_holes_h_min) { - - /* Flag for another round of fun */ - sid[redo] = sid[i]; - h_0[redo] = h_0[i]; - left[redo] = left[i]; - right[redo] = right[i]; - redo += 1; - - /* Re-initialise everything */ - black_holes_init_bpart(bp); - - /* Off we go ! */ - continue; - - } else if (bp->h <= black_holes_h_min) { - - /* Ok, this particle is a lost cause... */ - bp->h = black_holes_h_min; - - } else if (bp->h >= black_holes_h_max) { - - /* Ok, this particle is a lost cause... */ - bp->h = black_holes_h_max; - - /* Do some damage control if no neighbours at all were found */ - if (has_no_neighbours) { - black_holes_bpart_has_no_neighbours(bp, cosmo); - } - - } else { - error( - "Fundamental problem with the smoothing length iteration " - "logic."); - } - } - - /* We now have a particle whose smoothing length has converged */ - - black_holes_reset_feedback(bp); - - /* Check if h_max has increased */ - h_max = max(h_max, bp->h); - } - - /* We now need to treat the particles whose smoothing length had not - * converged again */ - - /* Re-set the counter for the next loop (potentially). */ - bcount = redo; - if (bcount > 0) { - - /* Climb up the cell hierarchy. */ - for (struct cell *finger = c; finger != NULL; finger = finger->parent) { - - /* Run through this cell's density interactions. */ - for (struct link *l = finger->black_holes.density; l != NULL; - l = l->next) { - -#ifdef SWIFT_DEBUG_CHECKS - if (l->t->ti_run < r->e->ti_current) - error("Density task should have been run."); -#endif - - /* Self-interaction? */ - if (l->t->type == task_type_self) - runner_doself_subset_branch_bh_density(r, finger, bparts, sid, - bcount); - - /* Otherwise, pair interaction? */ - else if (l->t->type == task_type_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dopair_subset_branch_bh_density(r, finger, bparts, sid, - bcount, l->t->cj); - else - runner_dopair_subset_branch_bh_density(r, finger, bparts, sid, - bcount, l->t->ci); - } - - /* Otherwise, sub-self interaction? */ - else if (l->t->type == task_type_sub_self) - runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, - NULL, 1); - - /* Otherwise, sub-pair interaction? */ - else if (l->t->type == task_type_sub_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, - l->t->cj, 1); - else - runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, - l->t->ci, 1); - } - } - } - } - } - - if (bcount) { - error("Smoothing length failed to converge on %i particles.", bcount); - } - - /* Be clean */ - free(left); - free(right); - free(sid); - free(h_0); - } - - /* Update h_max */ - c->black_holes.h_max = h_max; - - /* The ghost may not always be at the top level. - * Therefore we need to update h_max between the super- and top-levels */ - if (c->black_holes.density_ghost) { - for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->black_holes.h_max, h_max); - } - } - - if (timer) TIMER_TOC(timer_do_black_holes_ghost); -} - -/** - * @brief Intermediate task after the BHs have done their swallowing step. - * This is used to update the BH quantities if necessary. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c, - int timer) { - - struct bpart *restrict bparts = c->black_holes.parts; - const int count = c->black_holes.count; - const struct engine *e = r->e; - const int with_cosmology = e->policy & engine_policy_cosmology; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - runner_do_black_holes_swallow_ghost(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct bpart *bp = &bparts[i]; - - if (bpart_is_active(bp, e)) { - - /* Compute the final operations for repositioning of this BH */ - black_holes_end_reposition(bp, e->black_holes_properties, - e->physical_constants, e->cosmology); - - /* Get particle time-step */ - double dt; - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(bp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(e->ti_current - 1, bp->time_bin); - - dt = cosmology_get_delta_time(e->cosmology, ti_begin, - ti_begin + ti_step); - } else { - dt = get_timestep(bp->time_bin, e->time_base); - } - - /* Compute variables required for the feedback loop */ - black_holes_prepare_feedback(bp, e->black_holes_properties, - e->physical_constants, e->cosmology, dt); - } - } - } - - if (timer) TIMER_TOC(timer_do_black_holes_ghost); -} - -/** - * @brief Calculate gravity acceleration from external potential - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { - - struct gpart *restrict gparts = c->grav.parts; - const int gcount = c->grav.count; - const struct engine *e = r->e; - const struct external_potential *potential = e->external_potential; - const struct phys_const *constants = e->physical_constants; - const double time = r->e->time; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0); - } else { - - /* Loop over the gparts in this cell. */ - for (int i = 0; i < gcount; i++) { - - /* Get a direct pointer on the part. */ - struct gpart *restrict gp = &gparts[i]; - - /* Is this part within the time step? */ - if (gpart_is_active(gp, e)) { - external_gravity_acceleration(time, potential, constants, gp); - } - } - } - - if (timer) TIMER_TOC(timer_dograv_external); -} - -/** - * @brief Calculate gravity accelerations from the periodic mesh - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) { - - struct gpart *restrict gparts = c->grav.parts; - const int gcount = c->grav.count; - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (!e->s->periodic) error("Calling mesh forces in non-periodic mode."); -#endif - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0); - } else { - - /* Get the forces from the gravity mesh */ - pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount); - } - - if (timer) TIMER_TOC(timer_dograv_mesh); -} - -/** - * @brief Calculate change in thermal state of particles induced - * by radiative cooling and heating. - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void runner_do_cooling(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const struct cooling_function_data *cooling_func = e->cooling_func; - const struct phys_const *constants = e->physical_constants; - const struct unit_system *us = e->internal_units; - const struct hydro_props *hydro_props = e->hydro_properties; - const struct entropy_floor_properties *entropy_floor_props = e->entropy_floor; - const double time_base = e->time_base; - const integertime_t ti_current = e->ti_current; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const int count = c->hydro.count; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct part *restrict p = &parts[i]; - struct xpart *restrict xp = &xparts[i]; - - if (part_is_active(p, e)) { - - double dt_cool, dt_therm; - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_cool = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin, - ti_begin + ti_step); - - } else { - dt_cool = get_timestep(p->time_bin, time_base); - dt_therm = get_timestep(p->time_bin, time_base); - } - - /* Let's cool ! */ - cooling_cool_part(constants, us, cosmo, hydro_props, - entropy_floor_props, cooling_func, p, xp, dt_cool, - dt_therm); - } - } - } - - if (timer) TIMER_TOC(timer_do_cooling); -} - -/** - * - */ -void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { - - struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const struct star_formation *sf_props = e->star_formation; - const struct phys_const *phys_const = e->physical_constants; - const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const int with_feedback = (e->policy & engine_policy_feedback); - const struct hydro_props *restrict hydro_props = e->hydro_properties; - const struct unit_system *restrict us = e->internal_units; - struct cooling_function_data *restrict cooling = e->cooling_func; - const struct entropy_floor_properties *entropy_floor = e->entropy_floor; - const double time_base = e->time_base; - const integertime_t ti_current = e->ti_current; - const int current_stars_count = c->stars.count; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != e->nodeID) - error("Running star formation task on a foreign node!"); -#endif - - /* Anything to do here? */ - if (c->hydro.count == 0 || !cell_is_active_hydro(c, e)) { - star_formation_logger_log_inactive_cell(&c->stars.sfh); - return; - } - - /* Reset the SFR */ - star_formation_logger_init(&c->stars.sfh); - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) { - /* Load the child cell */ - struct cell *restrict cp = c->progeny[k]; - - /* Do the recursion */ - runner_do_star_formation(r, cp, 0); - - /* Update current cell using child cells */ - star_formation_logger_add(&c->stars.sfh, &cp->stars.sfh); - } - } else { - - /* Loop over the gas particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* Only work on active particles */ - if (part_is_active(p, e)) { - - /* Is this particle star forming? */ - if (star_formation_is_star_forming(p, xp, sf_props, phys_const, cosmo, - hydro_props, us, cooling, - entropy_floor)) { - - /* Time-step size for this particle */ - double dt_star; - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_star = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - - } else { - dt_star = get_timestep(p->time_bin, time_base); - } - - /* Compute the SF rate of the particle */ - star_formation_compute_SFR(p, xp, sf_props, phys_const, cosmo, - dt_star); - - /* Add the SFR and SFR*dt to the SFH struct of this cell */ - star_formation_logger_log_active_part(p, xp, &c->stars.sfh, dt_star); - - /* Are we forming a star particle from this SF rate? */ - if (star_formation_should_convert_to_star(p, xp, sf_props, e, - dt_star)) { - - /* Convert the gas particle to a star particle */ - struct spart *sp = cell_convert_part_to_spart(e, c, p, xp); - - /* Did we get a star? (Or did we run out of spare ones?) */ - if (sp != NULL) { - - /* message("We formed a star id=%lld cellID=%d", sp->id, - * c->cellID); */ - - /* Copy the properties of the gas particle to the star particle */ - star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo, - with_cosmology, phys_const, - hydro_props, us, cooling); - - /* Update the Star formation history */ - star_formation_logger_log_new_spart(sp, &c->stars.sfh); - } - } - - } else { /* Are we not star-forming? */ - - /* Update the particle to flag it as not star-forming */ - star_formation_update_part_not_SFR(p, xp, e, sf_props, - with_cosmology); - - } /* Not Star-forming? */ - - } else { /* is active? */ - - /* Check if the particle is not inhibited */ - if (!part_is_inhibited(p, e)) { - star_formation_logger_log_inactive_part(p, xp, &c->stars.sfh); - } - } - } /* Loop over particles */ - } - - /* If we formed any stars, the star sorts are now invalid. We need to - * re-compute them. */ - if (with_feedback && (c == c->top) && - (current_stars_count != c->stars.count)) { - cell_set_star_resort_flag(c); - } - - if (timer) TIMER_TOC(timer_do_star_formation); -} - -/** - * @brief Sorts again all the stars in a given cell hierarchy. - * - * This is intended to be used after the star formation task has been run - * to get the cells back into a state where self/pair star tasks can be run. - * - * @param r The thread #runner. - * @param c The top-level cell to run on. - * @param timer Are we timing this? - */ -void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != r->e->nodeID) error("Task must be run locally!"); -#endif - - TIMER_TIC; - - /* Did we demand a recalculation of the stars'sorts? */ - if (cell_get_flag(c, cell_flag_do_stars_resort)) { - runner_do_all_stars_sort(r, c); - cell_clear_flag(c, cell_flag_do_stars_resort); - } - - if (timer) TIMER_TOC(timer_do_stars_resort); -} - -/** - * @brief Sort the entries in ascending order using QuickSort. - * - * @param sort The entries - * @param N The number of entries. - */ -void runner_do_sort_ascending(struct sort_entry *sort, int N) { - - struct { - short int lo, hi; - } qstack[10]; - int qpos, i, j, lo, hi, imin; - struct sort_entry temp; - float pivot; - - /* Sort parts in cell_i in decreasing order with quicksort */ - qstack[0].lo = 0; - qstack[0].hi = N - 1; - qpos = 0; - while (qpos >= 0) { - lo = qstack[qpos].lo; - hi = qstack[qpos].hi; - qpos -= 1; - if (hi - lo < 15) { - for (i = lo; i < hi; i++) { - imin = i; - for (j = i + 1; j <= hi; j++) - if (sort[j].d < sort[imin].d) imin = j; - if (imin != i) { - temp = sort[imin]; - sort[imin] = sort[i]; - sort[i] = temp; - } - } - } else { - pivot = sort[(lo + hi) / 2].d; - i = lo; - j = hi; - while (i <= j) { - while (sort[i].d < pivot) i++; - while (sort[j].d > pivot) j--; - if (i <= j) { - if (i < j) { - temp = sort[i]; - sort[i] = sort[j]; - sort[j] = temp; - } - i += 1; - j -= 1; - } - } - if (j > (lo + hi) / 2) { - if (lo < j) { - qpos += 1; - qstack[qpos].lo = lo; - qstack[qpos].hi = j; - } - if (i < hi) { - qpos += 1; - qstack[qpos].lo = i; - qstack[qpos].hi = hi; - } - } else { - if (i < hi) { - qpos += 1; - qstack[qpos].lo = i; - qstack[qpos].hi = hi; - } - if (lo < j) { - qpos += 1; - qstack[qpos].lo = lo; - qstack[qpos].hi = j; - } - } - } - } -} - -#ifdef SWIFT_DEBUG_CHECKS -/** - * @brief Recursively checks that the flags are consistent in a cell hierarchy. - * - * Debugging function. Exists in two flavours: hydro & stars. - */ -#define RUNNER_CHECK_SORTS(TYPE) \ - void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ - \ - if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \ - if (c->split) \ - for (int k = 0; k < 8; k++) \ - if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0) \ - runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted); \ - } -#else -#define RUNNER_CHECK_SORTS(TYPE) \ - void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ - error("Calling debugging code without debugging flag activated."); \ - } -#endif - -RUNNER_CHECK_SORTS(hydro) -RUNNER_CHECK_SORTS(stars) - -/** - * @brief Sort the particles in the given cell along all cardinal directions. - * - * @param r The #runner. - * @param c The #cell. - * @param flags Cell flag. - * @param cleanup If true, re-build the sorts for the selected flags instead - * of just adding them. - * @param clock Flag indicating whether to record the timing or not, needed - * for recursive calls. - */ -void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, - int cleanup, int clock) { - - struct sort_entry *fingers[8]; - const int count = c->hydro.count; - const struct part *parts = c->hydro.parts; - struct xpart *xparts = c->hydro.xparts; - float buff[8]; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.super == NULL) error("Task called above the super level!!!"); -#endif - - /* We need to do the local sorts plus whatever was requested further up. */ - flags |= c->hydro.do_sort; - if (cleanup) { - c->hydro.sorted = 0; - } else { - flags &= ~c->hydro.sorted; - } - if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort)) return; - - /* Check that the particles have been moved to the current time */ - if (flags && !cell_are_part_drifted(c, r->e)) - error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); - -#ifdef SWIFT_DEBUG_CHECKS - /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts_hydro(c, c->hydro.sorted); - - /* Make sure the sort flags are consistent (upard). */ - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) { - if (finger->hydro.sorted & ~c->hydro.sorted) - error("Inconsistent sort flags (upward)."); - } - - /* Update the sort timer which represents the last time the sorts - were re-set. */ - if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current; -#endif - - /* Allocate memory for sorting. */ - cell_malloc_hydro_sorts(c, flags); - - /* Does this cell have any progeny? */ - if (c->split) { - - /* Fill in the gaps within the progeny. */ - float dx_max_sort = 0.0f; - float dx_max_sort_old = 0.0f; - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - - if (c->progeny[k]->hydro.count > 0) { - - /* Only propagate cleanup if the progeny is stale. */ - runner_do_hydro_sort( - r, c->progeny[k], flags, - cleanup && (c->progeny[k]->hydro.dx_max_sort_old > - space_maxreldx * c->progeny[k]->dmin), - 0); - dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort); - dx_max_sort_old = - max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old); - } else { - - /* We need to clean up the unused flags that were in case the - number of particles in the cell would change */ - cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1); - } - } - } - c->hydro.dx_max_sort = dx_max_sort; - c->hydro.dx_max_sort_old = dx_max_sort_old; - - /* Loop over the 13 different sort arrays. */ - for (int j = 0; j < 13; j++) { - - /* Has this sort array been flagged? */ - if (!(flags & (1 << j))) continue; - - /* Init the particle index offsets. */ - int off[8]; - off[0] = 0; - for (int k = 1; k < 8; k++) - if (c->progeny[k - 1] != NULL) - off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count; - else - off[k] = off[k - 1]; - - /* Init the entries and indices. */ - int inds[8]; - for (int k = 0; k < 8; k++) { - inds[k] = k; - if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { - fingers[k] = c->progeny[k]->hydro.sort[j]; - buff[k] = fingers[k]->d; - off[k] = off[k]; - } else - buff[k] = FLT_MAX; - } - - /* Sort the buffer. */ - for (int i = 0; i < 7; i++) - for (int k = i + 1; k < 8; k++) - if (buff[inds[k]] < buff[inds[i]]) { - int temp_i = inds[i]; - inds[i] = inds[k]; - inds[k] = temp_i; - } - - /* For each entry in the new sort list. */ - struct sort_entry *finger = c->hydro.sort[j]; - for (int ind = 0; ind < count; ind++) { - - /* Copy the minimum into the new sort array. */ - finger[ind].d = buff[inds[0]]; - finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; - - /* Update the buffer. */ - fingers[inds[0]] += 1; - buff[inds[0]] = fingers[inds[0]]->d; - - /* Find the smallest entry. */ - for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { - int temp_i = inds[k - 1]; - inds[k - 1] = inds[k]; - inds[k] = temp_i; - } - - } /* Merge. */ - - /* Add a sentinel. */ - c->hydro.sort[j][count].d = FLT_MAX; - c->hydro.sort[j][count].i = 0; - - /* Mark as sorted. */ - atomic_or(&c->hydro.sorted, 1 << j); - - } /* loop over sort arrays. */ - - } /* progeny? */ - - /* Otherwise, just sort. */ - else { - - /* Reset the sort distance */ - if (c->hydro.sorted == 0) { -#ifdef SWIFT_DEBUG_CHECKS - if (xparts != NULL && c->nodeID != engine_rank) - error("Have non-NULL xparts in foreign cell"); -#endif - - /* And the individual sort distances if we are a local cell */ - if (xparts != NULL) { - for (int k = 0; k < count; k++) { - xparts[k].x_diff_sort[0] = 0.0f; - xparts[k].x_diff_sort[1] = 0.0f; - xparts[k].x_diff_sort[2] = 0.0f; - } - } - c->hydro.dx_max_sort_old = 0.f; - c->hydro.dx_max_sort = 0.f; - } - - /* Fill the sort array. */ - for (int k = 0; k < count; k++) { - const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; - for (int j = 0; j < 13; j++) - if (flags & (1 << j)) { - c->hydro.sort[j][k].i = k; - c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] + - px[1] * runner_shift[j][1] + - px[2] * runner_shift[j][2]; - } - } - - /* Add the sentinel and sort. */ - for (int j = 0; j < 13; j++) - if (flags & (1 << j)) { - c->hydro.sort[j][count].d = FLT_MAX; - c->hydro.sort[j][count].i = 0; - runner_do_sort_ascending(c->hydro.sort[j], count); - atomic_or(&c->hydro.sorted, 1 << j); - } - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify the sorting. */ - for (int j = 0; j < 13; j++) { - if (!(flags & (1 << j))) continue; - struct sort_entry *finger = c->hydro.sort[j]; - for (int k = 1; k < count; k++) { - if (finger[k].d < finger[k - 1].d) - error("Sorting failed, ascending array."); - if (finger[k].i >= count) error("Sorting failed, indices borked."); - } - } - - /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts_hydro(c, flags); - - /* Make sure the sort flags are consistent (upward). */ - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) { - if (finger->hydro.sorted & ~c->hydro.sorted) - error("Inconsistent sort flags."); - } -#endif - - /* Clear the cell's sort flags. */ - c->hydro.do_sort = 0; - cell_clear_flag(c, cell_flag_do_hydro_sub_sort); - c->hydro.requires_sorts = 0; - - if (clock) TIMER_TOC(timer_dosort); -} - -/** - * @brief Sort the stars particles in the given cell along all cardinal - * directions. - * - * @param r The #runner. - * @param c The #cell. - * @param flags Cell flag. - * @param cleanup If true, re-build the sorts for the selected flags instead - * of just adding them. - * @param clock Flag indicating whether to record the timing or not, needed - * for recursive calls. - */ -void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, - int cleanup, int clock) { - - struct sort_entry *fingers[8]; - const int count = c->stars.count; - struct spart *sparts = c->stars.parts; - float buff[8]; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.super == NULL) error("Task called above the super level!!!"); -#endif - - /* We need to do the local sorts plus whatever was requested further up. */ - flags |= c->stars.do_sort; - if (cleanup) { - c->stars.sorted = 0; - } else { - flags &= ~c->stars.sorted; - } - if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return; - - /* Check that the particles have been moved to the current time */ - if (flags && !cell_are_spart_drifted(c, r->e)) { - error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts_stars(c, c->stars.sorted); - - /* Make sure the sort flags are consistent (upward). */ - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) { - if (finger->stars.sorted & ~c->stars.sorted) - error("Inconsistent sort flags (upward)."); - } - - /* Update the sort timer which represents the last time the sorts - were re-set. */ - if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current; -#endif - - /* start by allocating the entry arrays in the requested dimensions. */ - cell_malloc_stars_sorts(c, flags); - - /* Does this cell have any progeny? */ - if (c->split) { - - /* Fill in the gaps within the progeny. */ - float dx_max_sort = 0.0f; - float dx_max_sort_old = 0.0f; - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - - if (c->progeny[k]->stars.count > 0) { - - /* Only propagate cleanup if the progeny is stale. */ - const int cleanup_prog = - cleanup && (c->progeny[k]->stars.dx_max_sort_old > - space_maxreldx * c->progeny[k]->dmin); - runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0); - dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort); - dx_max_sort_old = - max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old); - } else { - - /* We need to clean up the unused flags that were in case the - number of particles in the cell would change */ - cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1); - } - } - } - c->stars.dx_max_sort = dx_max_sort; - c->stars.dx_max_sort_old = dx_max_sort_old; - - /* Loop over the 13 different sort arrays. */ - for (int j = 0; j < 13; j++) { - - /* Has this sort array been flagged? */ - if (!(flags & (1 << j))) continue; - - /* Init the particle index offsets. */ - int off[8]; - off[0] = 0; - for (int k = 1; k < 8; k++) - if (c->progeny[k - 1] != NULL) - off[k] = off[k - 1] + c->progeny[k - 1]->stars.count; - else - off[k] = off[k - 1]; - - /* Init the entries and indices. */ - int inds[8]; - for (int k = 0; k < 8; k++) { - inds[k] = k; - if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { - fingers[k] = c->progeny[k]->stars.sort[j]; - buff[k] = fingers[k]->d; - off[k] = off[k]; - } else - buff[k] = FLT_MAX; - } - - /* Sort the buffer. */ - for (int i = 0; i < 7; i++) - for (int k = i + 1; k < 8; k++) - if (buff[inds[k]] < buff[inds[i]]) { - int temp_i = inds[i]; - inds[i] = inds[k]; - inds[k] = temp_i; - } - - /* For each entry in the new sort list. */ - struct sort_entry *finger = c->stars.sort[j]; - for (int ind = 0; ind < count; ind++) { - - /* Copy the minimum into the new sort array. */ - finger[ind].d = buff[inds[0]]; - finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; - - /* Update the buffer. */ - fingers[inds[0]] += 1; - buff[inds[0]] = fingers[inds[0]]->d; - - /* Find the smallest entry. */ - for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { - int temp_i = inds[k - 1]; - inds[k - 1] = inds[k]; - inds[k] = temp_i; - } - - } /* Merge. */ - - /* Add a sentinel. */ - c->stars.sort[j][count].d = FLT_MAX; - c->stars.sort[j][count].i = 0; - - /* Mark as sorted. */ - atomic_or(&c->stars.sorted, 1 << j); - - } /* loop over sort arrays. */ - - } /* progeny? */ - - /* Otherwise, just sort. */ - else { - - /* Reset the sort distance */ - if (c->stars.sorted == 0) { - - /* And the individual sort distances if we are a local cell */ - for (int k = 0; k < count; k++) { - sparts[k].x_diff_sort[0] = 0.0f; - sparts[k].x_diff_sort[1] = 0.0f; - sparts[k].x_diff_sort[2] = 0.0f; - } - c->stars.dx_max_sort_old = 0.f; - c->stars.dx_max_sort = 0.f; - } - - /* Fill the sort array. */ - for (int k = 0; k < count; k++) { - const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]}; - for (int j = 0; j < 13; j++) - if (flags & (1 << j)) { - c->stars.sort[j][k].i = k; - c->stars.sort[j][k].d = px[0] * runner_shift[j][0] + - px[1] * runner_shift[j][1] + - px[2] * runner_shift[j][2]; - } - } - - /* Add the sentinel and sort. */ - for (int j = 0; j < 13; j++) - if (flags & (1 << j)) { - c->stars.sort[j][count].d = FLT_MAX; - c->stars.sort[j][count].i = 0; - runner_do_sort_ascending(c->stars.sort[j], count); - atomic_or(&c->stars.sorted, 1 << j); - } - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Verify the sorting. */ - for (int j = 0; j < 13; j++) { - if (!(flags & (1 << j))) continue; - struct sort_entry *finger = c->stars.sort[j]; - for (int k = 1; k < count; k++) { - if (finger[k].d < finger[k - 1].d) - error("Sorting failed, ascending array."); - if (finger[k].i >= count) error("Sorting failed, indices borked."); - } - } - - /* Make sure the sort flags are consistent (downward). */ - runner_check_sorts_stars(c, flags); - - /* Make sure the sort flags are consistent (upward). */ - for (struct cell *finger = c->parent; finger != NULL; - finger = finger->parent) { - if (finger->stars.sorted & ~c->stars.sorted) - error("Inconsistent sort flags."); - } -#endif - - /* Clear the cell's sort flags. */ - c->stars.do_sort = 0; - cell_clear_flag(c, cell_flag_do_stars_sub_sort); - c->stars.requires_sorts = 0; - - if (clock) TIMER_TOC(timer_do_stars_sort); -} - -/** - * @brief Recurse into a cell until reaching the super level and call - * the hydro sorting function there. - * - * This function must be called at or above the super level! - * - * This function will sort the particles in all 13 directions. - * - * @param r the #runner. - * @param c the #cell. - */ -void runner_do_all_hydro_sort(struct runner *r, struct cell *c) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); -#endif - - if (!cell_is_active_hydro(c, r->e)) return; - - /* Shall we sort at this level? */ - if (c->hydro.super == c) { - - /* Sort everything */ - runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); - - } else { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.super != NULL) error("Function called below the super level!"); -#endif - - /* Ok, then, let's try lower */ - if (c->split) { - for (int k = 0; k < 8; ++k) { - if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]); - } - } else { -#ifdef SWIFT_DEBUG_CHECKS - error("Reached a leaf without encountering a hydro super cell!"); -#endif - } - } -} - -/** - * @brief Recurse into a cell until reaching the super level and call - * the star sorting function there. - * - * This function must be called at or above the super level! - * - * This function will sort the particles in all 13 directions. - * - * @param r the #runner. - * @param c the #cell. - */ -void runner_do_all_stars_sort(struct runner *r, struct cell *c) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); -#endif - - if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return; - - /* Shall we sort at this level? */ - if (c->hydro.super == c) { - - /* Sort everything */ - runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); - - } else { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.super != NULL) error("Function called below the super level!"); -#endif - - /* Ok, then, let's try lower */ - if (c->split) { - for (int k = 0; k < 8; ++k) { - if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]); - } - } else { -#ifdef SWIFT_DEBUG_CHECKS - error("Reached a leaf without encountering a hydro super cell!"); -#endif - } - } -} - -/** - * @brief Initialize the multipoles before the gravity calculation. - * - * @param r The runner thread. - * @param c The cell. - * @param timer 1 if the time is to be recorded. - */ -void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (!(e->policy & engine_policy_self_gravity)) - error("Grav-init task called outside of self-gravity calculation"); -#endif - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Reset the gravity acceleration tensors */ - gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current); - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) runner_do_init_grav(r, c->progeny[k], 0); - } - } - - if (timer) TIMER_TOC(timer_init_grav); -} - -/** - * @brief Intermediate task after the gradient loop that does final operations - * on the gradient quantities and optionally slope limits the gradients - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { - -#ifdef EXTRA_HYDRO_LOOP - - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const int count = c->hydro.count; - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const double time_base = e->time_base; - const struct cosmology *cosmo = e->cosmology; - const struct hydro_props *hydro_props = e->hydro_properties; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct part *restrict p = &parts[i]; - struct xpart *restrict xp = &xparts[i]; - - if (part_is_active(p, e)) { - - /* Finish the gradient calculation */ - hydro_end_gradient(p); - - /* As of here, particle force variables will be set. */ - - /* Calculate the time-step for passing to hydro_prepare_force. - * This is the physical time between the start and end of the time-step - * without any scale-factor powers. */ - double dt_alpha; - - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_alpha = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - } else { - dt_alpha = get_timestep(p->time_bin, time_base); - } - - /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); - - /* The particle force values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the force loop over neighbours */ - hydro_reset_acceleration(p); - } - } - } - - if (timer) TIMER_TOC(timer_do_extra_ghost); - -#else - error("SWIFT was not compiled with the extra hydro loop activated."); -#endif -} - -/** - * @brief Intermediate task after the density to check that the smoothing - * lengths are correct. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_ghost(struct runner *r, struct cell *c, int timer) { - - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const struct engine *e = r->e; - const struct space *s = e->s; - const struct hydro_space *hs = &s->hs; - const struct cosmology *cosmo = e->cosmology; - const struct chemistry_global_data *chemistry = e->chemistry; - - const int with_cosmology = (e->policy & engine_policy_cosmology); - - const float hydro_h_max = e->hydro_properties->h_max; - const float hydro_h_min = e->hydro_properties->h_min; - const float eps = e->hydro_properties->h_tolerance; - const float hydro_eta_dim = - pow_dimension(e->hydro_properties->eta_neighbours); - const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; - int redo = 0, count = 0; - - /* Running value of the maximal smoothing length */ - double h_max = c->hydro.h_max; - - TIMER_TIC; - - /* Anything to do here? */ - if (c->hydro.count == 0) return; - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - runner_do_ghost(r, c->progeny[k], 0); - - /* Update h_max */ - h_max = max(h_max, c->progeny[k]->hydro.h_max); - } - } - } else { - - /* Init the list of active particles that have to be updated and their - * current smoothing lengths. */ - int *pid = NULL; - float *h_0 = NULL; - float *left = NULL; - float *right = NULL; - if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL) - error("Can't allocate memory for pid."); - if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) - error("Can't allocate memory for h_0."); - if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) - error("Can't allocate memory for left."); - if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) - error("Can't allocate memory for right."); - for (int k = 0; k < c->hydro.count; k++) - if (part_is_active(&parts[k], e)) { - pid[count] = k; - h_0[count] = parts[k].h; - left[count] = 0.f; - right[count] = hydro_h_max; - ++count; - } - - /* While there are particles that need to be updated... */ - for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter; - num_reruns++) { - - /* Reset the redo-count. */ - redo = 0; - - /* Loop over the remaining active parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct part *p = &parts[pid[i]]; - struct xpart *xp = &xparts[pid[i]]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Is this part within the timestep? */ - if (!part_is_active(p, e)) error("Ghost applied to inactive particle"); -#endif - - /* Get some useful values */ - const float h_init = h_0[i]; - const float h_old = p->h; - const float h_old_dim = pow_dimension(h_old); - const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); - - float h_new; - int has_no_neighbours = 0; - - if (p->density.wcount == 0.f) { /* No neighbours case */ - - /* Flag that there were no neighbours */ - has_no_neighbours = 1; - - /* Double h and try again */ - h_new = 2.f * h_old; - - } else { - - /* Finish the density calculation */ - hydro_end_density(p, cosmo); - chemistry_end_density(p, chemistry, cosmo); - pressure_floor_end_density(p, cosmo); - - /* Compute one step of the Newton-Raphson scheme */ - const float n_sum = p->density.wcount * h_old_dim; - const float n_target = hydro_eta_dim; - const float f = n_sum - n_target; - const float f_prime = - p->density.wcount_dh * h_old_dim + - hydro_dimension * p->density.wcount * h_old_dim_minus_one; - - /* Improve the bisection bounds */ - if (n_sum < n_target) - left[i] = max(left[i], h_old); - else if (n_sum > n_target) - right[i] = min(right[i], h_old); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check the validity of the left and right bounds */ - if (left[i] > right[i]) - error("Invalid left (%e) and right (%e)", left[i], right[i]); -#endif - - /* Skip if h is already h_max and we don't have enough neighbours */ - /* Same if we are below h_min */ - if (((p->h >= hydro_h_max) && (f < 0.f)) || - ((p->h <= hydro_h_min) && (f > 0.f))) { - - /* We have a particle whose smoothing length is already set (wants - * to be larger but has already hit the maximum OR wants to be - * smaller but has already reached the minimum). So, just tidy up as - * if the smoothing length had converged correctly */ - -#ifdef EXTRA_HYDRO_LOOP - - /* As of here, particle gradient variables will be set. */ - /* The force variables are set in the extra ghost. */ - - /* Compute variables required for the gradient loop */ - hydro_prepare_gradient(p, xp, cosmo); - - /* The particle gradient values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the gradient loop over neighbours */ - hydro_reset_gradient(p); - -#else - const struct hydro_props *hydro_props = e->hydro_properties; - - /* Calculate the time-step for passing to hydro_prepare_force, used - * for the evolution of alpha factors (i.e. those involved in the - * artificial viscosity and thermal conduction terms) */ - const double time_base = e->time_base; - const integertime_t ti_current = e->ti_current; - double dt_alpha; - - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_alpha = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - } else { - dt_alpha = get_timestep(p->time_bin, time_base); - } - - /* As of here, particle force variables will be set. */ - - /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); - - /* The particle force values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the force loop over neighbours */ - hydro_reset_acceleration(p); - -#endif /* EXTRA_HYDRO_LOOP */ - - /* Ok, we are done with this particle */ - continue; - } - - /* Normal case: Use Newton-Raphson to get a better value of h */ - - /* Avoid floating point exception from f_prime = 0 */ - h_new = h_old - f / (f_prime + FLT_MIN); - - /* Be verbose about the particles that struggle to converge */ - if (num_reruns > max_smoothing_iter - 10) { - - message( - "Smoothing length convergence problem: iter=%d p->id=%lld " - "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " - "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", - num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum, - n_target, left[i], right[i]); - } - -#ifdef SWIFT_DEBUG_CHECKS - if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old)) - error( - "Smoothing length correction not going in the right direction"); -#endif - - /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ - h_new = min(h_new, 2.f * h_old); - h_new = max(h_new, 0.5f * h_old); - - /* Verify that we are actually progrssing towards the answer */ - h_new = max(h_new, left[i]); - h_new = min(h_new, right[i]); - } - - /* Check whether the particle has an inappropriate smoothing length */ - if (fabsf(h_new - h_old) > eps * h_old) { - - /* Ok, correct then */ - - /* Case where we have been oscillating around the solution */ - if ((h_new == left[i] && h_old == right[i]) || - (h_old == left[i] && h_new == right[i])) { - - /* Bissect the remaining interval */ - p->h = pow_inv_dimension( - 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); - - } else { - - /* Normal case */ - p->h = h_new; - } - - /* If within the allowed range, try again */ - if (p->h < hydro_h_max && p->h > hydro_h_min) { - - /* Flag for another round of fun */ - pid[redo] = pid[i]; - h_0[redo] = h_0[i]; - left[redo] = left[i]; - right[redo] = right[i]; - redo += 1; - - /* Re-initialise everything */ - hydro_init_part(p, hs); - chemistry_init_part(p, chemistry); - pressure_floor_init_part(p, xp); - tracers_after_init(p, xp, e->internal_units, e->physical_constants, - with_cosmology, e->cosmology, - e->hydro_properties, e->cooling_func, e->time); - - /* Off we go ! */ - continue; - - } else if (p->h <= hydro_h_min) { - - /* Ok, this particle is a lost cause... */ - p->h = hydro_h_min; - - } else if (p->h >= hydro_h_max) { - - /* Ok, this particle is a lost cause... */ - p->h = hydro_h_max; - - /* Do some damage control if no neighbours at all were found */ - if (has_no_neighbours) { - hydro_part_has_no_neighbours(p, xp, cosmo); - chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo); - pressure_floor_part_has_no_neighbours(p, xp, cosmo); - } - - } else { - error( - "Fundamental problem with the smoothing length iteration " - "logic."); - } - } - - /* We now have a particle whose smoothing length has converged */ - - /* Check if h_max is increased */ - h_max = max(h_max, p->h); - -#ifdef EXTRA_HYDRO_LOOP - - /* As of here, particle gradient variables will be set. */ - /* The force variables are set in the extra ghost. */ - - /* Compute variables required for the gradient loop */ - hydro_prepare_gradient(p, xp, cosmo); - - /* The particle gradient values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the gradient loop over neighbours */ - hydro_reset_gradient(p); - -#else - const struct hydro_props *hydro_props = e->hydro_properties; - - /* Calculate the time-step for passing to hydro_prepare_force, used for - * the evolution of alpha factors (i.e. those involved in the artificial - * viscosity and thermal conduction terms) */ - const double time_base = e->time_base; - const integertime_t ti_current = e->ti_current; - double dt_alpha; - - if (with_cosmology) { - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current - 1, p->time_bin); - - dt_alpha = - cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); - } else { - dt_alpha = get_timestep(p->time_bin, time_base); - } - - /* As of here, particle force variables will be set. */ - - /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); - - /* The particle force values are now set. Do _NOT_ - try to read any particle density variables! */ - - /* Prepare the particle for the force loop over neighbours */ - hydro_reset_acceleration(p); - -#endif /* EXTRA_HYDRO_LOOP */ - } - - /* We now need to treat the particles whose smoothing length had not - * converged again */ - - /* Re-set the counter for the next loop (potentially). */ - count = redo; - if (count > 0) { - - /* Climb up the cell hierarchy. */ - for (struct cell *finger = c; finger != NULL; finger = finger->parent) { - - /* Run through this cell's density interactions. */ - for (struct link *l = finger->hydro.density; l != NULL; l = l->next) { - -#ifdef SWIFT_DEBUG_CHECKS - if (l->t->ti_run < r->e->ti_current) - error("Density task should have been run."); -#endif - - /* Self-interaction? */ - if (l->t->type == task_type_self) - runner_doself_subset_branch_density(r, finger, parts, pid, count); - - /* Otherwise, pair interaction? */ - else if (l->t->type == task_type_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dopair_subset_branch_density(r, finger, parts, pid, - count, l->t->cj); - else - runner_dopair_subset_branch_density(r, finger, parts, pid, - count, l->t->ci); - } - - /* Otherwise, sub-self interaction? */ - else if (l->t->type == task_type_sub_self) - runner_dosub_subset_density(r, finger, parts, pid, count, NULL, - 1); - - /* Otherwise, sub-pair interaction? */ - else if (l->t->type == task_type_sub_pair) { - - /* Left or right? */ - if (l->t->ci == finger) - runner_dosub_subset_density(r, finger, parts, pid, count, - l->t->cj, 1); - else - runner_dosub_subset_density(r, finger, parts, pid, count, - l->t->ci, 1); - } - } - } - } - } - - if (count) { - error("Smoothing length failed to converge on %i particles.", count); - } - - /* Be clean */ - free(left); - free(right); - free(pid); - free(h_0); - } - - /* Update h_max */ - c->hydro.h_max = h_max; - - /* The ghost may not always be at the top level. - * Therefore we need to update h_max between the super- and top-levels */ - if (c->hydro.ghost) { - for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->hydro.h_max, h_max); - } - } - - if (timer) TIMER_TOC(timer_do_ghost); -} - -/** - * @brief Drift all part in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_drift_part(struct runner *r, struct cell *c, int timer) { - - TIMER_TIC; - - cell_drift_part(c, r->e, 0); - - if (timer) TIMER_TOC(timer_drift_part); -} - -/** - * @brief Drift all gpart in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) { - - TIMER_TIC; - - cell_drift_gpart(c, r->e, 0); - - if (timer) TIMER_TOC(timer_drift_gpart); -} - -/** - * @brief Drift all spart in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) { - - TIMER_TIC; - - cell_drift_spart(c, r->e, 0); - - if (timer) TIMER_TOC(timer_drift_spart); -} - -/** - * @brief Drift all bpart in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer) { - - TIMER_TIC; - - cell_drift_bpart(c, r->e, 0); - - if (timer) TIMER_TOC(timer_drift_bpart); -} - -/** - * @brief Perform the first half-kick on all the active particles in a cell. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_kick1(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const struct hydro_props *hydro_props = e->hydro_properties; - const struct entropy_floor_properties *entropy_floor = e->entropy_floor; - const int with_cosmology = (e->policy & engine_policy_cosmology); - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - struct gpart *restrict gparts = c->grav.parts; - struct spart *restrict sparts = c->stars.parts; - const int count = c->hydro.count; - const int gcount = c->grav.count; - const int scount = c->stars.count; - const integertime_t ti_current = e->ti_current; - const double time_base = e->time_base; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e) && - !cell_is_starting_stars(c, e) && !cell_is_starting_black_holes(c, e)) - return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* If particle needs to be kicked */ - if (part_is_starting(p, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - if (p->wakeup == time_bin_awake) - error("Woken-up particle that has not been processed in kick1"); -#endif - - /* Skip particles that have been woken up and treated by the limiter. */ - if (p->wakeup != time_bin_not_awake) continue; - - const integertime_t ti_step = get_integer_timestep(p->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current + 1, p->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - const integertime_t ti_end = ti_begin + ti_step; - - if (ti_begin != ti_current) - error( - "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " - "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld", - ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; - if (with_cosmology) { - dt_kick_hydro = cosmology_get_hydro_kick_factor( - cosmo, ti_begin, ti_begin + ti_step / 2); - dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, - ti_begin + ti_step / 2); - dt_kick_therm = cosmology_get_therm_kick_factor( - cosmo, ti_begin, ti_begin + ti_step / 2); - dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin, - ti_begin + ti_step / 2); - } else { - dt_kick_hydro = (ti_step / 2) * time_base; - dt_kick_grav = (ti_step / 2) * time_base; - dt_kick_therm = (ti_step / 2) * time_base; - dt_kick_corr = (ti_step / 2) * time_base; - } - - /* do the kick */ - kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, - dt_kick_corr, cosmo, hydro_props, entropy_floor, ti_begin, - ti_begin + ti_step / 2); - - /* Update the accelerations to be used in the drift for hydro */ - if (p->gpart != NULL) { - - xp->a_grav[0] = p->gpart->a_grav[0]; - xp->a_grav[1] = p->gpart->a_grav[1]; - xp->a_grav[2] = p->gpart->a_grav[2]; - } - } - } - - /* Loop over the gparts in this cell. */ - for (int k = 0; k < gcount; k++) { - - /* Get a handle on the part. */ - struct gpart *restrict gp = &gparts[k]; - - /* If the g-particle has no counterpart and needs to be kicked */ - if ((gp->type == swift_type_dark_matter || - gp->type == swift_type_dark_matter_background) && - gpart_is_starting(gp, e)) { - - const integertime_t ti_step = get_integer_timestep(gp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current + 1, gp->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - const integertime_t ti_end = - get_integer_time_end(ti_current + 1, gp->time_bin); - - if (ti_begin != ti_current) - error( - "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " - "ti_step=%lld time_bin=%d ti_current=%lld", - ti_end, ti_begin, ti_step, gp->time_bin, ti_current); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav; - if (with_cosmology) { - dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, - ti_begin + ti_step / 2); - } else { - dt_kick_grav = (ti_step / 2) * time_base; - } - - /* do the kick */ - kick_gpart(gp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2); - } - } - - /* Loop over the stars particles in this cell. */ - for (int k = 0; k < scount; k++) { - - /* Get a handle on the s-part. */ - struct spart *restrict sp = &sparts[k]; - - /* If particle needs to be kicked */ - if (spart_is_starting(sp, e)) { - - const integertime_t ti_step = get_integer_timestep(sp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current + 1, sp->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - const integertime_t ti_end = - get_integer_time_end(ti_current + 1, sp->time_bin); - - if (ti_begin != ti_current) - error( - "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " - "ti_step=%lld time_bin=%d ti_current=%lld", - ti_end, ti_begin, ti_step, sp->time_bin, ti_current); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav; - if (with_cosmology) { - dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, - ti_begin + ti_step / 2); - } else { - dt_kick_grav = (ti_step / 2) * time_base; - } - - /* do the kick */ - kick_spart(sp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2); - } - } - } - - if (timer) TIMER_TOC(timer_kick1); -} - -/** - * @brief Perform the second half-kick on all the active particles in a cell. - * - * Also prepares particles to be drifted. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_kick2(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - const struct cosmology *cosmo = e->cosmology; - const struct hydro_props *hydro_props = e->hydro_properties; - const struct entropy_floor_properties *entropy_floor = e->entropy_floor; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const int count = c->hydro.count; - const int gcount = c->grav.count; - const int scount = c->stars.count; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - struct gpart *restrict gparts = c->grav.parts; - struct spart *restrict sparts = c->stars.parts; - const integertime_t ti_current = e->ti_current; - const double time_base = e->time_base; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) && - !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) - return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0); - } else { - - /* Loop over the particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* If particle needs to be kicked */ - if (part_is_active(p, e)) { - - integertime_t ti_begin, ti_end, ti_step; - -#ifdef SWIFT_DEBUG_CHECKS - if (p->wakeup == time_bin_awake) - error("Woken-up particle that has not been processed in kick1"); -#endif - - if (p->wakeup == time_bin_not_awake) { - - /* Time-step from a regular kick */ - ti_step = get_integer_timestep(p->time_bin); - ti_begin = get_integer_time_begin(ti_current, p->time_bin); - ti_end = ti_begin + ti_step; - - } else { - - /* Time-step that follows a wake-up call */ - ti_begin = get_integer_time_begin(ti_current, p->wakeup); - ti_end = get_integer_time_end(ti_current, p->time_bin); - ti_step = ti_end - ti_begin; - - /* Reset the flag. Everything is back to normal from now on. */ - p->wakeup = time_bin_awake; - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_begin + ti_step != ti_current) - error( - "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld " - "time_bin=%d wakeup=%d ti_current=%lld", - ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); -#endif - /* Time interval for this half-kick */ - double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; - if (with_cosmology) { - dt_kick_hydro = cosmology_get_hydro_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_end); - dt_kick_grav = cosmology_get_grav_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_end); - dt_kick_therm = cosmology_get_therm_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_end); - dt_kick_corr = cosmology_get_corr_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_end); - } else { - dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base; - dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base; - dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base; - dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base; - } - - /* Finish the time-step with a second half-kick */ - kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, - dt_kick_corr, cosmo, hydro_props, entropy_floor, - ti_begin + ti_step / 2, ti_end); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that kick and the drift are synchronized */ - if (p->ti_drift != p->ti_kick) error("Error integrating part in time."); -#endif - - /* Prepare the values to be drifted */ - hydro_reset_predicted_values(p, xp, cosmo); - } - } - - /* Loop over the g-particles in this cell. */ - for (int k = 0; k < gcount; k++) { - - /* Get a handle on the part. */ - struct gpart *restrict gp = &gparts[k]; - - /* If the g-particle has no counterpart and needs to be kicked */ - if ((gp->type == swift_type_dark_matter || - gp->type == swift_type_dark_matter_background) && - gpart_is_active(gp, e)) { - - const integertime_t ti_step = get_integer_timestep(gp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current, gp->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_begin + ti_step != ti_current) - error("Particle in wrong time-bin"); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav; - if (with_cosmology) { - dt_kick_grav = cosmology_get_grav_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); - } else { - dt_kick_grav = (ti_step / 2) * time_base; - } - - /* Finish the time-step with a second half-kick */ - kick_gpart(gp, dt_kick_grav, ti_begin + ti_step / 2, - ti_begin + ti_step); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that kick and the drift are synchronized */ - if (gp->ti_drift != gp->ti_kick) - error("Error integrating g-part in time."); -#endif - - /* Prepare the values to be drifted */ - gravity_reset_predicted_values(gp); - } - } - - /* Loop over the particles in this cell. */ - for (int k = 0; k < scount; k++) { - - /* Get a handle on the part. */ - struct spart *restrict sp = &sparts[k]; - - /* If particle needs to be kicked */ - if (spart_is_active(sp, e)) { - - const integertime_t ti_step = get_integer_timestep(sp->time_bin); - const integertime_t ti_begin = - get_integer_time_begin(ti_current, sp->time_bin); - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_begin + ti_step != ti_current) - error("Particle in wrong time-bin"); -#endif - - /* Time interval for this half-kick */ - double dt_kick_grav; - if (with_cosmology) { - dt_kick_grav = cosmology_get_grav_kick_factor( - cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); - } else { - dt_kick_grav = (ti_step / 2) * time_base; - } - - /* Finish the time-step with a second half-kick */ - kick_spart(sp, dt_kick_grav, ti_begin + ti_step / 2, - ti_begin + ti_step); - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that kick and the drift are synchronized */ - if (sp->ti_drift != sp->ti_kick) - error("Error integrating s-part in time."); -#endif - - /* Prepare the values to be drifted */ - stars_reset_predicted_values(sp); - } - } - } - if (timer) TIMER_TOC(timer_kick2); -} - -/** - * @brief Computes the next time-step of all active particles in this cell - * and update the cell's statistics. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_timestep(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const int with_cosmology = (e->policy & engine_policy_cosmology); - const int count = c->hydro.count; - const int gcount = c->grav.count; - const int scount = c->stars.count; - const int bcount = c->black_holes.count; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - struct gpart *restrict gparts = c->grav.parts; - struct spart *restrict sparts = c->stars.parts; - struct bpart *restrict bparts = c->black_holes.parts; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) && - !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) { - c->hydro.updated = 0; - c->grav.updated = 0; - c->stars.updated = 0; - c->black_holes.updated = 0; - return; - } - - int updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; - integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, - ti_hydro_beg_max = 0; - integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, - ti_gravity_beg_max = 0; - integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, - ti_stars_beg_max = 0; - integertime_t ti_black_holes_end_min = max_nr_timesteps, - ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; - - /* No children? */ - if (!c->split) { - - /* Loop over the particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* If particle needs updating */ - if (part_is_active(p, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Current end of time-step */ - const integertime_t ti_end = - get_integer_time_end(ti_current, p->time_bin); - - if (ti_end != ti_current) - error("Computing time-step of rogue particle."); -#endif - - /* Get new time-step */ - const integertime_t ti_new_step = get_part_timestep(p, xp, e); - - /* Update particle */ - p->time_bin = get_time_bin(ti_new_step); - if (p->gpart != NULL) p->gpart->time_bin = p->time_bin; - - /* Update the tracers properties */ - tracers_after_timestep(p, xp, e->internal_units, e->physical_constants, - with_cosmology, e->cosmology, - e->hydro_properties, e->cooling_func, e->time); - - /* Number of updated particles */ - updated++; - if (p->gpart != NULL) g_updated++; - - /* What is the next sync-point ? */ - ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); - ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); - - /* What is the next starting point for this cell ? */ - ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); - - if (p->gpart != NULL) { - - /* What is the next sync-point ? */ - ti_gravity_end_min = - min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = - max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - } - } - - else { /* part is inactive */ - - if (!part_is_inhibited(p, e)) { - - const integertime_t ti_end = - get_integer_time_end(ti_current, p->time_bin); - - const integertime_t ti_beg = - get_integer_time_begin(ti_current + 1, p->time_bin); - - /* What is the next sync-point ? */ - ti_hydro_end_min = min(ti_end, ti_hydro_end_min); - ti_hydro_end_max = max(ti_end, ti_hydro_end_max); - - /* What is the next starting point for this cell ? */ - ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max); - - if (p->gpart != NULL) { - - /* What is the next sync-point ? */ - ti_gravity_end_min = min(ti_end, ti_gravity_end_min); - ti_gravity_end_max = max(ti_end, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); - } - } - } - } - - /* Loop over the g-particles in this cell. */ - for (int k = 0; k < gcount; k++) { - - /* Get a handle on the part. */ - struct gpart *restrict gp = &gparts[k]; - - /* If the g-particle has no counterpart */ - if (gp->type == swift_type_dark_matter || - gp->type == swift_type_dark_matter_background) { - - /* need to be updated ? */ - if (gpart_is_active(gp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Current end of time-step */ - const integertime_t ti_end = - get_integer_time_end(ti_current, gp->time_bin); - - if (ti_end != ti_current) - error("Computing time-step of rogue particle."); -#endif - - /* Get new time-step */ - const integertime_t ti_new_step = get_gpart_timestep(gp, e); - - /* Update particle */ - gp->time_bin = get_time_bin(ti_new_step); - - /* Number of updated g-particles */ - g_updated++; - - /* What is the next sync-point ? */ - ti_gravity_end_min = - min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = - max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - - } else { /* gpart is inactive */ - - if (!gpart_is_inhibited(gp, e)) { - - const integertime_t ti_end = - get_integer_time_end(ti_current, gp->time_bin); - - /* What is the next sync-point ? */ - ti_gravity_end_min = min(ti_end, ti_gravity_end_min); - ti_gravity_end_max = max(ti_end, ti_gravity_end_max); - - const integertime_t ti_beg = - get_integer_time_begin(ti_current + 1, gp->time_bin); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); - } - } - } - } - - /* Loop over the star particles in this cell. */ - for (int k = 0; k < scount; k++) { - - /* Get a handle on the part. */ - struct spart *restrict sp = &sparts[k]; - - /* need to be updated ? */ - if (spart_is_active(sp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Current end of time-step */ - const integertime_t ti_end = - get_integer_time_end(ti_current, sp->time_bin); - - if (ti_end != ti_current) - error("Computing time-step of rogue particle."); -#endif - /* Get new time-step */ - const integertime_t ti_new_step = get_spart_timestep(sp, e); - - /* Update particle */ - sp->time_bin = get_time_bin(ti_new_step); - sp->gpart->time_bin = get_time_bin(ti_new_step); - - /* Number of updated s-particles */ - s_updated++; - g_updated++; - - ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min); - ti_stars_end_max = max(ti_current + ti_new_step, ti_stars_end_max); - ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_stars_beg_max = max(ti_current, ti_stars_beg_max); - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - - /* star particle is inactive but not inhibited */ - } else { - - if (!spart_is_inhibited(sp, e)) { - - const integertime_t ti_end = - get_integer_time_end(ti_current, sp->time_bin); - - const integertime_t ti_beg = - get_integer_time_begin(ti_current + 1, sp->time_bin); - - ti_stars_end_min = min(ti_end, ti_stars_end_min); - ti_stars_end_max = max(ti_end, ti_stars_end_max); - ti_gravity_end_min = min(ti_end, ti_gravity_end_min); - ti_gravity_end_max = max(ti_end, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_stars_beg_max = max(ti_beg, ti_stars_beg_max); - ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); - } - } - } - - /* Loop over the star particles in this cell. */ - for (int k = 0; k < bcount; k++) { - - /* Get a handle on the part. */ - struct bpart *restrict bp = &bparts[k]; - - /* need to be updated ? */ - if (bpart_is_active(bp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Current end of time-step */ - const integertime_t ti_end = - get_integer_time_end(ti_current, bp->time_bin); - - if (ti_end != ti_current) - error("Computing time-step of rogue particle."); -#endif - /* Get new time-step */ - const integertime_t ti_new_step = get_bpart_timestep(bp, e); - - /* Update particle */ - bp->time_bin = get_time_bin(ti_new_step); - bp->gpart->time_bin = get_time_bin(ti_new_step); - - /* Number of updated s-particles */ - b_updated++; - g_updated++; - - ti_black_holes_end_min = - min(ti_current + ti_new_step, ti_black_holes_end_min); - ti_black_holes_end_max = - max(ti_current + ti_new_step, ti_black_holes_end_max); - ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_black_holes_beg_max = max(ti_current, ti_black_holes_beg_max); - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - - /* star particle is inactive but not inhibited */ - } else { - - if (!bpart_is_inhibited(bp, e)) { - - const integertime_t ti_end = - get_integer_time_end(ti_current, bp->time_bin); - - const integertime_t ti_beg = - get_integer_time_begin(ti_current + 1, bp->time_bin); - - ti_black_holes_end_min = min(ti_end, ti_black_holes_end_min); - ti_black_holes_end_max = max(ti_end, ti_black_holes_end_max); - ti_gravity_end_min = min(ti_end, ti_gravity_end_min); - ti_gravity_end_max = max(ti_end, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_black_holes_beg_max = max(ti_beg, ti_black_holes_beg_max); - ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); - } - } - } - - } else { - - /* Loop over the progeny. */ - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *restrict cp = c->progeny[k]; - - /* Recurse */ - runner_do_timestep(r, cp, 0); - - /* And aggregate */ - updated += cp->hydro.updated; - g_updated += cp->grav.updated; - s_updated += cp->stars.updated; - b_updated += cp->black_holes.updated; - - ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); - ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); - ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); - - ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); - ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); - ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); - - ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min); - ti_stars_end_max = max(cp->grav.ti_end_max, ti_stars_end_max); - ti_stars_beg_max = max(cp->grav.ti_beg_max, ti_stars_beg_max); - - ti_black_holes_end_min = - min(cp->black_holes.ti_end_min, ti_black_holes_end_min); - ti_black_holes_end_max = - max(cp->grav.ti_end_max, ti_black_holes_end_max); - ti_black_holes_beg_max = - max(cp->grav.ti_beg_max, ti_black_holes_beg_max); - } - } - } - - /* Store the values. */ - c->hydro.updated = updated; - c->grav.updated = g_updated; - c->stars.updated = s_updated; - c->black_holes.updated = b_updated; - - c->hydro.ti_end_min = ti_hydro_end_min; - c->hydro.ti_end_max = ti_hydro_end_max; - c->hydro.ti_beg_max = ti_hydro_beg_max; - c->grav.ti_end_min = ti_gravity_end_min; - c->grav.ti_end_max = ti_gravity_end_max; - c->grav.ti_beg_max = ti_gravity_beg_max; - c->stars.ti_end_min = ti_stars_end_min; - c->stars.ti_end_max = ti_stars_end_max; - c->stars.ti_beg_max = ti_stars_beg_max; - c->black_holes.ti_end_min = ti_black_holes_end_min; - c->black_holes.ti_end_max = ti_black_holes_end_max; - c->black_holes.ti_beg_max = ti_black_holes_beg_max; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->hydro.ti_end_min == e->ti_current && - c->hydro.ti_end_min < max_nr_timesteps) - error("End of next hydro step is current time!"); - if (c->grav.ti_end_min == e->ti_current && - c->grav.ti_end_min < max_nr_timesteps) - error("End of next gravity step is current time!"); - if (c->stars.ti_end_min == e->ti_current && - c->stars.ti_end_min < max_nr_timesteps) - error("End of next stars step is current time!"); - if (c->black_holes.ti_end_min == e->ti_current && - c->black_holes.ti_end_min < max_nr_timesteps) - error("End of next black holes step is current time!"); -#endif - - if (timer) TIMER_TOC(timer_timestep); -} - -/** - * @brief Apply the time-step limiter to all awaken particles in a cell - * hierarchy. - * - * @param r The task #runner. - * @param c The #cell. - * @param force Limit the particles irrespective of the #cell flags. - * @param timer Are we timing this ? - */ -void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) { - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we only limit local cells. */ - if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope."); -#endif - - integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, - ti_hydro_beg_max = 0; - integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, - ti_gravity_beg_max = 0; - - /* Limit irrespective of cell flags? */ - force = (force || cell_get_flag(c, cell_flag_do_hydro_limiter)); - - /* Early abort? */ - if (c->hydro.count == 0) { - - /* Clear the limiter flags. */ - cell_clear_flag( - c, cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter); - return; - } - - /* Loop over the progeny ? */ - if (c->split && (force || cell_get_flag(c, cell_flag_do_hydro_sub_limiter))) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *restrict cp = c->progeny[k]; - - /* Recurse */ - runner_do_limiter(r, cp, force, 0); - - /* And aggregate */ - ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); - ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); - ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); - ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); - ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); - ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); - } - } - - /* Store the updated values */ - c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); - c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); - c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); - c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); - c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); - c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); - - } else if (!c->split && force) { - - ti_hydro_end_min = c->hydro.ti_end_min; - ti_hydro_end_max = c->hydro.ti_end_max; - ti_hydro_beg_max = c->hydro.ti_beg_max; - ti_gravity_end_min = c->grav.ti_end_min; - ti_gravity_end_max = c->grav.ti_end_max; - ti_gravity_beg_max = c->grav.ti_beg_max; - - /* Loop over the gas particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* Avoid inhibited particles */ - if (part_is_inhibited(p, e)) continue; - - /* If the particle will be active no need to wake it up */ - if (part_is_active(p, e) && p->wakeup != time_bin_not_awake) - p->wakeup = time_bin_not_awake; - - /* Bip, bip, bip... wake-up time */ - if (p->wakeup <= time_bin_awake) { - - /* Apply the limiter and get the new time-step size */ - const integertime_t ti_new_step = timestep_limit_part(p, xp, e); - - /* What is the next sync-point ? */ - ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); - ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); - - /* What is the next starting point for this cell ? */ - ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); - - /* Also limit the gpart counter-part */ - if (p->gpart != NULL) { - - /* Register the time-bin */ - p->gpart->time_bin = p->time_bin; - - /* What is the next sync-point ? */ - ti_gravity_end_min = - min(ti_current + ti_new_step, ti_gravity_end_min); - ti_gravity_end_max = - max(ti_current + ti_new_step, ti_gravity_end_max); - - /* What is the next starting point for this cell ? */ - ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); - } - } - } - - /* Store the updated values */ - c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); - c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); - c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); - c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); - c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); - c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); - } - - /* Clear the limiter flags. */ - cell_clear_flag(c, - cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter); - - if (timer) TIMER_TOC(timer_do_limiter); -} - -/** - * @brief End the hydro force calculation of all active particles in a cell - * by multiplying the acccelerations by the relevant constants - * - * @param r The #runner thread. - * @param c The #cell. - * @param timer Are we timing this ? - */ -void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_end_hydro_force(r, c->progeny[k], 0); - } else { - - const struct cosmology *cosmo = e->cosmology; - const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; - - /* Loop over the gas particles in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - - if (part_is_active(p, e)) { - - /* Finish the force loop */ - hydro_end_force(p, cosmo); - chemistry_end_force(p, cosmo); - -#ifdef SWIFT_BOUNDARY_PARTICLES - - /* Get the ID of the part */ - const long long id = p->id; - - /* Cancel hdyro forces of these particles */ - if (id < SWIFT_BOUNDARY_PARTICLES) { - - /* Don't move ! */ - hydro_reset_acceleration(p); - -#if defined(GIZMO_MFV_SPH) || defined(GIZMO_MFM_SPH) - - /* Some values need to be reset in the Gizmo case. */ - hydro_prepare_force(p, &c->hydro.xparts[k], cosmo, - e->hydro_properties, 0); -#endif - } -#endif - } - } - } - - if (timer) TIMER_TOC(timer_end_hydro_force); -} - -/** - * @brief End the gravity force calculation of all active particles in a cell - * by multiplying the acccelerations by the relevant constants - * - * @param r The #runner thread. - * @param c The #cell. - * @param timer Are we timing this ? - */ -void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer) { - - const struct engine *e = r->e; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_end_grav_force(r, c->progeny[k], 0); - } else { - - const struct space *s = e->s; - const int periodic = s->periodic; - const float G_newton = e->physical_constants->const_newton_G; - - /* Potential normalisation in the case of periodic gravity */ - float potential_normalisation = 0.; - if (periodic && (e->policy & engine_policy_self_gravity)) { - const double volume = s->dim[0] * s->dim[1] * s->dim[2]; - const double r_s = e->mesh->r_s; - potential_normalisation = 4. * M_PI * e->total_mass * r_s * r_s / volume; - } - - const int gcount = c->grav.count; - struct gpart *restrict gparts = c->grav.parts; - - /* Loop over the g-particles in this cell. */ - for (int k = 0; k < gcount; k++) { - - /* Get a handle on the gpart. */ - struct gpart *restrict gp = &gparts[k]; - - if (gpart_is_active(gp, e)) { - - /* Finish the force calculation */ - gravity_end_force(gp, G_newton, potential_normalisation, periodic); - -#ifdef SWIFT_MAKE_GRAVITY_GLASS - - /* Negate the gravity forces */ - gp->a_grav[0] *= -1.f; - gp->a_grav[1] *= -1.f; - gp->a_grav[2] *= -1.f; -#endif - -#ifdef SWIFT_NO_GRAVITY_BELOW_ID - - /* Get the ID of the gpart */ - long long id = 0; - if (gp->type == swift_type_gas) - id = e->s->parts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_stars) - id = e->s->sparts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_black_hole) - error("Unexisting type"); - else - id = gp->id_or_neg_offset; - - /* Cancel gravity forces of these particles */ - if (id < SWIFT_NO_GRAVITY_BELOW_ID) { - - /* Don't move ! */ - gp->a_grav[0] = 0.f; - gp->a_grav[1] = 0.f; - gp->a_grav[2] = 0.f; - } -#endif - -#ifdef SWIFT_DEBUG_CHECKS - if ((e->policy & engine_policy_self_gravity) && - !(e->policy & engine_policy_black_holes)) { - - /* Let's add a self interaction to simplify the count */ - gp->num_interacted++; - - /* Check that this gpart has interacted with all the other - * particles (via direct or multipoles) in the box */ - if (gp->num_interacted != - e->total_nr_gparts - e->count_inhibited_gparts) { - - /* Get the ID of the gpart */ - long long my_id = 0; - if (gp->type == swift_type_gas) - my_id = e->s->parts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_stars) - my_id = e->s->sparts[-gp->id_or_neg_offset].id; - else if (gp->type == swift_type_black_hole) - error("Unexisting type"); - else - my_id = gp->id_or_neg_offset; - - error( - "g-particle (id=%lld, type=%s) did not interact " - "gravitationally with all other gparts " - "gp->num_interacted=%lld, total_gparts=%lld (local " - "num_gparts=%zd inhibited_gparts=%lld)", - my_id, part_type_names[gp->type], gp->num_interacted, - e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts); - } - } -#endif - } - } - } - if (timer) TIMER_TOC(timer_end_grav_force); -} - -/** - * @brief Process all the gas particles in a cell that have been flagged for - * swallowing by a black hole. - * - * This is done by recursing down to the leaf-level and skipping the sub-cells - * that have not been drifted as they would not have any particles with - * swallowing flag. We then loop over the particles with a flag and look into - * the space-wide list of black holes for the particle with the corresponding - * ID. If found, the BH swallows the gas particle and the gas particle is - * removed. If the cell is local, we may be looking for a foreign BH, in which - * case, we do not update the BH (that will be done on its node) but just remove - * the gas particle. - * - * @param r The thread #runner. - * @param c The #cell. - * @param timer Are we timing this? - */ -void runner_do_gas_swallow(struct runner *r, struct cell *c, int timer) { - - struct engine *e = r->e; - struct space *s = e->s; - struct bpart *bparts = s->bparts; - const size_t nr_bpart = s->nr_bparts; -#ifdef WITH_MPI - struct bpart *bparts_foreign = s->bparts_foreign; - const size_t nr_bparts_foreign = s->nr_bparts_foreign; -#endif - - struct part *parts = c->hydro.parts; - struct xpart *xparts = c->hydro.xparts; - - /* Early abort? - * (We only want cells for which we drifted the gas as these are - * the only ones that could have gas particles that have been flagged - * for swallowing) */ - if (c->hydro.count == 0 || c->hydro.ti_old_part != e->ti_current) { - return; - } - - /* Loop over the progeny ? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *restrict cp = c->progeny[k]; - - runner_do_gas_swallow(r, cp, 0); - } - } - } else { - - /* Loop over all the gas particles in the cell - * Note that the cell (and hence the parts) may be local or foreign. */ - const size_t nr_parts = c->hydro.count; - for (size_t k = 0; k < nr_parts; k++) { - - /* Get a handle on the part. */ - struct part *const p = &parts[k]; - struct xpart *const xp = &xparts[k]; - - /* Ignore inhibited particles (they have already been removed!) */ - if (part_is_inhibited(p, e)) continue; - - /* Get the ID of the black holes that will swallow this part */ - const long long swallow_id = - black_holes_get_part_swallow_id(&p->black_holes_data); - - /* Has this particle been flagged for swallowing? */ - if (swallow_id >= 0) { - -#ifdef SWIFT_DEBUG_CHECKS - if (p->ti_drift != e->ti_current) - error("Trying to swallow an un-drifted particle."); -#endif - - /* ID of the BH swallowing this particle */ - const long long BH_id = swallow_id; - - /* Have we found this particle's BH already? */ - int found = 0; - - /* Let's look for the hungry black hole in the local list */ - for (size_t i = 0; i < nr_bpart; ++i) { - - /* Get a handle on the bpart. */ - struct bpart *bp = &bparts[i]; - - if (bp->id == BH_id) { - - /* Lock the space as we are going to work directly on the bpart list - */ - lock_lock(&s->lock); - - /* Swallow the gas particle (i.e. update the BH properties) */ - black_holes_swallow_part(bp, p, xp, e->cosmology); - - /* Release the space as we are done updating the bpart */ - if (lock_unlock(&s->lock) != 0) - error("Failed to unlock the space."); - - message("BH %lld swallowing gas particle %lld", bp->id, p->id); - - /* If the gas particle is local, remove it */ - if (c->nodeID == e->nodeID) { - - message("BH %lld removing gas particle %lld", bp->id, p->id); - - lock_lock(&e->s->lock); - - /* Re-check that the particle has not been removed - * by another thread before we do the deed. */ - if (!part_is_inhibited(p, e)) { - - /* Finally, remove the gas particle from the system - * Recall that the gpart associated with it is also removed - * at the same time. */ - cell_remove_part(e, c, p, xp); - } - - if (lock_unlock(&e->s->lock) != 0) - error("Failed to unlock the space!"); - } - - /* In any case, prevent the particle from being re-swallowed */ - black_holes_mark_part_as_swallowed(&p->black_holes_data); - - found = 1; - break; - } - - } /* Loop over local BHs */ - -#ifdef WITH_MPI - - /* We could also be in the case of a local gas particle being - * swallowed by a foreign BH. In this case, we won't update the - * BH but just remove the particle from the local list. */ - if (c->nodeID == e->nodeID && !found) { - - /* Let's look for the foreign hungry black hole */ - for (size_t i = 0; i < nr_bparts_foreign; ++i) { - - /* Get a handle on the bpart. */ - struct bpart *bp = &bparts_foreign[i]; - - if (bp->id == BH_id) { - - message("BH %lld removing gas particle %lld (foreign BH case)", - bp->id, p->id); - - lock_lock(&e->s->lock); - - /* Re-check that the particle has not been removed - * by another thread before we do the deed. */ - if (!part_is_inhibited(p, e)) { - - /* Finally, remove the gas particle from the system */ - cell_remove_part(e, c, p, xp); - } - - if (lock_unlock(&e->s->lock) != 0) - error("Failed to unlock the space!"); - - found = 1; - break; - } - } /* Loop over foreign BHs */ - } /* Is the cell local? */ -#endif - - /* If we have a local particle, we must have found the BH in one - * of our list of black holes. */ - if (c->nodeID == e->nodeID && !found) { - error("Gas particle %lld could not find BH %lld to be swallowed", - p->id, swallow_id); - } - } /* Part was flagged for swallowing */ - } /* Loop over the parts */ - } /* Cell is not split */ -} - -/** - * @brief Processing of gas particles to swallow - self task case. - * - * @param r The thread #runner. - * @param c The #cell. - * @param timer Are we timing this? - */ -void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != r->e->nodeID) error("Running self task on foreign node"); - if (!cell_is_active_black_holes(c, r->e)) - error("Running self task on inactive cell"); -#endif - - runner_do_gas_swallow(r, c, timer); -} - -/** - * @brief Processing of gas particles to swallow - pair task case. - * - * @param r The thread #runner. - * @param ci First #cell. - * @param cj Second #cell. - * @param timer Are we timing this? - */ -void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci, - struct cell *cj, int timer) { - - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID) - error("Running pair task on foreign node"); -#endif - - /* Run the swallowing loop only in the cell that is the neighbour of the - * active BH */ - if (cell_is_active_black_holes(cj, e)) runner_do_gas_swallow(r, ci, timer); - if (cell_is_active_black_holes(ci, e)) runner_do_gas_swallow(r, cj, timer); -} - -/** - * @brief Process all the BH particles in a cell that have been flagged for - * swallowing by a black hole. - * - * This is done by recursing down to the leaf-level and skipping the sub-cells - * that have not been drifted as they would not have any particles with - * swallowing flag. We then loop over the particles with a flag and look into - * the space-wide list of black holes for the particle with the corresponding - * ID. If found, the BH swallows the BH particle and the BH particle is - * removed. If the cell is local, we may be looking for a foreign BH, in which - * case, we do not update the BH (that will be done on its node) but just remove - * the BH particle. - * - * @param r The thread #runner. - * @param c The #cell. - * @param timer Are we timing this? - */ -void runner_do_bh_swallow(struct runner *r, struct cell *c, int timer) { - - struct engine *e = r->e; - struct space *s = e->s; - struct bpart *bparts = s->bparts; - const size_t nr_bpart = s->nr_bparts; -#ifdef WITH_MPI - struct bpart *bparts_foreign = s->bparts_foreign; - const size_t nr_bparts_foreign = s->nr_bparts_foreign; -#endif - - struct bpart *cell_bparts = c->black_holes.parts; - - /* Early abort? - * (We only want cells for which we drifted the BH as these are - * the only ones that could have BH particles that have been flagged - * for swallowing) */ - if (c->black_holes.count == 0 || - c->black_holes.ti_old_part != e->ti_current) { - return; - } - - /* Loop over the progeny ? */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *restrict cp = c->progeny[k]; - - runner_do_bh_swallow(r, cp, 0); - } - } - } else { - - /* Loop over all the gas particles in the cell - * Note that the cell (and hence the bparts) may be local or foreign. */ - const size_t nr_cell_bparts = c->black_holes.count; - for (size_t k = 0; k < nr_cell_bparts; k++) { - - /* Get a handle on the part. */ - struct bpart *const cell_bp = &cell_bparts[k]; - - /* Ignore inhibited particles (they have already been removed!) */ - if (bpart_is_inhibited(cell_bp, e)) continue; - - /* Get the ID of the black holes that will swallow this part */ - const long long swallow_id = - black_holes_get_bpart_swallow_id(&cell_bp->merger_data); - - /* message("OO id=%lld swallow_id = %lld", cell_bp->id, */ - /* swallow_id); */ - - /* Has this particle been flagged for swallowing? */ - if (swallow_id >= 0) { - -#ifdef SWIFT_DEBUG_CHECKS - if (cell_bp->ti_drift != e->ti_current) - error("Trying to swallow an un-drifted particle."); -#endif - - /* ID of the BH swallowing this particle */ - const long long BH_id = swallow_id; - - /* Have we found this particle's BH already? */ - int found = 0; - - /* Let's look for the hungry black hole in the local list */ - for (size_t i = 0; i < nr_bpart; ++i) { - - /* Get a handle on the bpart. */ - struct bpart *bp = &bparts[i]; - - if (bp->id == BH_id) { - - /* Lock the space as we are going to work directly on the bpart list - */ - lock_lock(&s->lock); - - /* Swallow the gas particle (i.e. update the BH properties) */ - black_holes_swallow_bpart(bp, cell_bp, e->cosmology); - - /* Release the space as we are done updating the bpart */ - if (lock_unlock(&s->lock) != 0) - error("Failed to unlock the space."); - - message("BH %lld swallowing BH particle %lld", bp->id, cell_bp->id); - - /* If the gas particle is local, remove it */ - if (c->nodeID == e->nodeID) { - - message("BH %lld removing BH particle %lld", bp->id, cell_bp->id); - - /* Finally, remove the gas particle from the system - * Recall that the gpart associated with it is also removed - * at the same time. */ - cell_remove_bpart(e, c, cell_bp); - } - - /* In any case, prevent the particle from being re-swallowed */ - black_holes_mark_bpart_as_merged(&cell_bp->merger_data); - - found = 1; - break; - } - - } /* Loop over local BHs */ - -#ifdef WITH_MPI - - /* We could also be in the case of a local BH particle being - * swallowed by a foreign BH. In this case, we won't update the - * foreign BH but just remove the particle from the local list. */ - if (c->nodeID == e->nodeID && !found) { - - /* Let's look for the foreign hungry black hole */ - for (size_t i = 0; i < nr_bparts_foreign; ++i) { - - /* Get a handle on the bpart. */ - struct bpart *bp = &bparts_foreign[i]; - - if (bp->id == BH_id) { - - message("BH %lld removing BH particle %lld (foreign BH case)", - bp->id, cell_bp->id); - - /* Finally, remove the gas particle from the system */ - cell_remove_bpart(e, c, cell_bp); - - found = 1; - break; - } - } /* Loop over foreign BHs */ - } /* Is the cell local? */ -#endif - - /* If we have a local particle, we must have found the BH in one - * of our list of black holes. */ - if (c->nodeID == e->nodeID && !found) { - error("BH particle %lld could not find BH %lld to be swallowed", - cell_bp->id, swallow_id); - } - } /* Part was flagged for swallowing */ - } /* Loop over the parts */ - } /* Cell is not split */ -} - -/** - * @brief Processing of bh particles to swallow - self task case. - * - * @param r The thread #runner. - * @param c The #cell. - * @param timer Are we timing this? - */ -void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != r->e->nodeID) error("Running self task on foreign node"); - if (!cell_is_active_black_holes(c, r->e)) - error("Running self task on inactive cell"); -#endif - - runner_do_bh_swallow(r, c, timer); -} - -/** - * @brief Processing of bh particles to swallow - pair task case. - * - * @param r The thread #runner. - * @param ci First #cell. - * @param cj Second #cell. - * @param timer Are we timing this? - */ -void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci, - struct cell *cj, int timer) { - - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID) - error("Running pair task on foreign node"); -#endif - - /* Run the swallowing loop only in the cell that is the neighbour of the - * active BH */ - if (cell_is_active_black_holes(cj, e)) runner_do_bh_swallow(r, ci, timer); - if (cell_is_active_black_holes(ci, e)) runner_do_bh_swallow(r, cj, timer); -} - -/** - * @brief Construct the cell properties from the received #part. - * - * @param r The runner thread. - * @param c The cell. - * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? - * @param timer Are we timing this ? - */ -void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, - int timer) { -#ifdef WITH_MPI - - const struct part *restrict parts = c->hydro.parts; - const size_t nr_parts = c->hydro.count; - const integertime_t ti_current = r->e->ti_current; - - TIMER_TIC; - - integertime_t ti_hydro_end_min = max_nr_timesteps; - integertime_t ti_hydro_end_max = 0; - timebin_t time_bin_min = num_time_bins; - timebin_t time_bin_max = 0; - float h_max = 0.f; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID == engine_rank) error("Updating a local cell!"); -#endif - - /* Clear this cell's sorted mask. */ - if (clear_sorts) c->hydro.sorted = 0; - - /* If this cell is a leaf, collect the particle data. */ - if (!c->split) { - - /* Collect everything... */ - for (size_t k = 0; k < nr_parts; k++) { - if (parts[k].time_bin == time_bin_inhibited) continue; - time_bin_min = min(time_bin_min, parts[k].time_bin); - time_bin_max = max(time_bin_max, parts[k].time_bin); - h_max = max(h_max, parts[k].h); - } - - /* Convert into a time */ - ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max); - } - - /* Otherwise, recurse and collect. */ - else { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { - runner_do_recv_part(r, c->progeny[k], clear_sorts, 0); - ti_hydro_end_min = - min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min); - ti_hydro_end_max = - max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max); - h_max = max(h_max, c->progeny[k]->hydro.h_max); - } - } - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_hydro_end_min < ti_current) - error( - "Received a cell at an incorrect time c->ti_end_min=%lld, " - "e->ti_current=%lld.", - ti_hydro_end_min, ti_current); -#endif - - /* ... and store. */ - // c->hydro.ti_end_min = ti_hydro_end_min; - // c->hydro.ti_end_max = ti_hydro_end_max; - c->hydro.ti_old_part = ti_current; - c->hydro.h_max = h_max; - - if (timer) TIMER_TOC(timer_dorecv_part); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Construct the cell properties from the received #gpart. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { - -#ifdef WITH_MPI - - const struct gpart *restrict gparts = c->grav.parts; - const size_t nr_gparts = c->grav.count; - const integertime_t ti_current = r->e->ti_current; - - TIMER_TIC; - - integertime_t ti_gravity_end_min = max_nr_timesteps; - integertime_t ti_gravity_end_max = 0; - timebin_t time_bin_min = num_time_bins; - timebin_t time_bin_max = 0; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID == engine_rank) error("Updating a local cell!"); -#endif - - /* If this cell is a leaf, collect the particle data. */ - if (!c->split) { - - /* Collect everything... */ - for (size_t k = 0; k < nr_gparts; k++) { - if (gparts[k].time_bin == time_bin_inhibited) continue; - time_bin_min = min(time_bin_min, gparts[k].time_bin); - time_bin_max = max(time_bin_max, gparts[k].time_bin); - } - - /* Convert into a time */ - ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max); - } - - /* Otherwise, recurse and collect. */ - else { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) { - runner_do_recv_gpart(r, c->progeny[k], 0); - ti_gravity_end_min = - min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min); - ti_gravity_end_max = - max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max); - } - } - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_gravity_end_min < ti_current) - error( - "Received a cell at an incorrect time c->ti_end_min=%lld, " - "e->ti_current=%lld.", - ti_gravity_end_min, ti_current); -#endif - - /* ... and store. */ - // c->grav.ti_end_min = ti_gravity_end_min; - // c->grav.ti_end_max = ti_gravity_end_max; - c->grav.ti_old_part = ti_current; - - if (timer) TIMER_TOC(timer_dorecv_gpart); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Construct the cell properties from the received #spart. - * - * @param r The runner thread. - * @param c The cell. - * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? - * @param timer Are we timing this ? - */ -void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts, - int timer) { - -#ifdef WITH_MPI - - struct spart *restrict sparts = c->stars.parts; - const size_t nr_sparts = c->stars.count; - const integertime_t ti_current = r->e->ti_current; - - TIMER_TIC; - - integertime_t ti_stars_end_min = max_nr_timesteps; - integertime_t ti_stars_end_max = 0; - timebin_t time_bin_min = num_time_bins; - timebin_t time_bin_max = 0; - float h_max = 0.f; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID == engine_rank) error("Updating a local cell!"); -#endif - - /* Clear this cell's sorted mask. */ - if (clear_sorts) c->stars.sorted = 0; - - /* If this cell is a leaf, collect the particle data. */ - if (!c->split) { - - /* Collect everything... */ - for (size_t k = 0; k < nr_sparts; k++) { -#ifdef DEBUG_INTERACTIONS_STARS - sparts[k].num_ngb_force = 0; -#endif - if (sparts[k].time_bin == time_bin_inhibited) continue; - time_bin_min = min(time_bin_min, sparts[k].time_bin); - time_bin_max = max(time_bin_max, sparts[k].time_bin); - h_max = max(h_max, sparts[k].h); - } - - /* Convert into a time */ - ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_stars_end_max = get_integer_time_end(ti_current, time_bin_max); - } - - /* Otherwise, recurse and collect. */ - else { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { - runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0); - ti_stars_end_min = - min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min); - ti_stars_end_max = - max(ti_stars_end_max, c->progeny[k]->stars.ti_end_max); - h_max = max(h_max, c->progeny[k]->stars.h_max); - } - } - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_stars_end_min < ti_current && - !(r->e->policy & engine_policy_star_formation)) - error( - "Received a cell at an incorrect time c->ti_end_min=%lld, " - "e->ti_current=%lld.", - ti_stars_end_min, ti_current); -#endif - - /* ... and store. */ - // c->grav.ti_end_min = ti_gravity_end_min; - // c->grav.ti_end_max = ti_gravity_end_max; - c->stars.ti_old_part = ti_current; - c->stars.h_max = h_max; - - if (timer) TIMER_TOC(timer_dorecv_spart); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief Construct the cell properties from the received #bpart. - * - * Note that we do not need to clear the sorts since we do not sort - * the black holes. - * - * @param r The runner thread. - * @param c The cell. - * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? - * @param timer Are we timing this ? - */ -void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts, - int timer) { - -#ifdef WITH_MPI - - struct bpart *restrict bparts = c->black_holes.parts; - const size_t nr_bparts = c->black_holes.count; - const integertime_t ti_current = r->e->ti_current; - - TIMER_TIC; - - integertime_t ti_black_holes_end_min = max_nr_timesteps; - integertime_t ti_black_holes_end_max = 0; - timebin_t time_bin_min = num_time_bins; - timebin_t time_bin_max = 0; - float h_max = 0.f; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID == engine_rank) error("Updating a local cell!"); -#endif - - /* If this cell is a leaf, collect the particle data. */ - if (!c->split) { - - /* Collect everything... */ - for (size_t k = 0; k < nr_bparts; k++) { -#ifdef DEBUG_INTERACTIONS_BLACK_HOLES - bparts[k].num_ngb_force = 0; -#endif - - /* message("Receiving bparts id=%lld time_bin=%d", */ - /* bparts[k].id, bparts[k].time_bin); */ - - if (bparts[k].time_bin == time_bin_inhibited) continue; - time_bin_min = min(time_bin_min, bparts[k].time_bin); - time_bin_max = max(time_bin_max, bparts[k].time_bin); - h_max = max(h_max, bparts[k].h); - } - - /* Convert into a time */ - ti_black_holes_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_black_holes_end_max = get_integer_time_end(ti_current, time_bin_max); - } - - /* Otherwise, recurse and collect. */ - else { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL && c->progeny[k]->black_holes.count > 0) { - runner_do_recv_bpart(r, c->progeny[k], clear_sorts, 0); - ti_black_holes_end_min = - min(ti_black_holes_end_min, c->progeny[k]->black_holes.ti_end_min); - ti_black_holes_end_max = - max(ti_black_holes_end_max, c->progeny[k]->black_holes.ti_end_max); - h_max = max(h_max, c->progeny[k]->black_holes.h_max); - } - } - } - -#ifdef SWIFT_DEBUG_CHECKS - if (ti_black_holes_end_min < ti_current) - error( - "Received a cell at an incorrect time c->ti_end_min=%lld, " - "e->ti_current=%lld.", - ti_black_holes_end_min, ti_current); -#endif - - /* ... and store. */ - // c->grav.ti_end_min = ti_gravity_end_min; - // c->grav.ti_end_max = ti_gravity_end_max; - c->black_holes.ti_old_part = ti_current; - c->black_holes.h_max = h_max; - - if (timer) TIMER_TOC(timer_dorecv_bpart); - -#else - error("SWIFT was not compiled with MPI support."); -#endif -} - -/** - * @brief The #runner main thread routine. - * - * @param data A pointer to this thread's data. - */ -void *runner_main(void *data) { - - struct runner *r = (struct runner *)data; - struct engine *e = r->e; - struct scheduler *sched = &e->sched; - unsigned int seed = r->id; - pthread_setspecific(sched->local_seed_pointer, &seed); - /* Main loop. */ - while (1) { - - /* Wait at the barrier. */ - engine_barrier(e); - - /* Can we go home yet? */ - if (e->step_props & engine_step_prop_done) break; - - /* Re-set the pointer to the previous task, as there is none. */ - struct task *t = NULL; - struct task *prev = NULL; - - /* Loop while there are tasks... */ - while (1) { - - /* If there's no old task, try to get a new one. */ - if (t == NULL) { - - /* Get the task. */ - TIMER_TIC - t = scheduler_gettask(sched, r->qid, prev); - TIMER_TOC(timer_gettask); - - /* Did I get anything? */ - if (t == NULL) break; - } - - /* Get the cells. */ - struct cell *ci = t->ci; - struct cell *cj = t->cj; - -#ifdef SWIFT_DEBUG_TASKS - /* Mark the thread we run on */ - t->rid = r->cpuid; - - /* And recover the pair direction */ - if (t->type == task_type_pair || t->type == task_type_sub_pair) { - struct cell *ci_temp = ci; - struct cell *cj_temp = cj; - double shift[3]; - t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift); - } else { - t->sid = -1; - } -#endif - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we haven't scheduled an inactive task */ - t->ti_run = e->ti_current; - /* Store the task that will be running (for debugging only) */ - r->t = t; -#endif - - /* Different types of tasks... */ - switch (t->type) { - case task_type_self: - if (t->subtype == task_subtype_density) - runner_doself1_branch_density(r, ci); -#ifdef EXTRA_HYDRO_LOOP - else if (t->subtype == task_subtype_gradient) - runner_doself1_branch_gradient(r, ci); -#endif - else if (t->subtype == task_subtype_force) - runner_doself2_branch_force(r, ci); - else if (t->subtype == task_subtype_limiter) - runner_doself2_branch_limiter(r, ci); - else if (t->subtype == task_subtype_grav) - runner_doself_recursive_grav(r, ci, 1); - else if (t->subtype == task_subtype_external_grav) - runner_do_grav_external(r, ci, 1); - else if (t->subtype == task_subtype_stars_density) - runner_doself_branch_stars_density(r, ci); - else if (t->subtype == task_subtype_stars_feedback) - runner_doself_branch_stars_feedback(r, ci); - else if (t->subtype == task_subtype_bh_density) - runner_doself_branch_bh_density(r, ci); - else if (t->subtype == task_subtype_bh_swallow) - runner_doself_branch_bh_swallow(r, ci); - else if (t->subtype == task_subtype_do_gas_swallow) - runner_do_gas_swallow_self(r, ci, 1); - else if (t->subtype == task_subtype_do_bh_swallow) - runner_do_bh_swallow_self(r, ci, 1); - else if (t->subtype == task_subtype_bh_feedback) - runner_doself_branch_bh_feedback(r, ci); - else - error("Unknown/invalid task subtype (%s).", - subtaskID_names[t->subtype]); - break; - - case task_type_pair: - if (t->subtype == task_subtype_density) - runner_dopair1_branch_density(r, ci, cj); -#ifdef EXTRA_HYDRO_LOOP - else if (t->subtype == task_subtype_gradient) - runner_dopair1_branch_gradient(r, ci, cj); -#endif - else if (t->subtype == task_subtype_force) - runner_dopair2_branch_force(r, ci, cj); - else if (t->subtype == task_subtype_limiter) - runner_dopair2_branch_limiter(r, ci, cj); - else if (t->subtype == task_subtype_grav) - runner_dopair_recursive_grav(r, ci, cj, 1); - else if (t->subtype == task_subtype_stars_density) - runner_dopair_branch_stars_density(r, ci, cj); - else if (t->subtype == task_subtype_stars_feedback) - runner_dopair_branch_stars_feedback(r, ci, cj); - else if (t->subtype == task_subtype_bh_density) - runner_dopair_branch_bh_density(r, ci, cj); - else if (t->subtype == task_subtype_bh_swallow) - runner_dopair_branch_bh_swallow(r, ci, cj); - else if (t->subtype == task_subtype_do_gas_swallow) - runner_do_gas_swallow_pair(r, ci, cj, 1); - else if (t->subtype == task_subtype_do_bh_swallow) - runner_do_bh_swallow_pair(r, ci, cj, 1); - else if (t->subtype == task_subtype_bh_feedback) - runner_dopair_branch_bh_feedback(r, ci, cj); - else - error("Unknown/invalid task subtype (%s/%s).", - taskID_names[t->type], subtaskID_names[t->subtype]); - break; - - case task_type_sub_self: - if (t->subtype == task_subtype_density) - runner_dosub_self1_density(r, ci, 1); -#ifdef EXTRA_HYDRO_LOOP - else if (t->subtype == task_subtype_gradient) - runner_dosub_self1_gradient(r, ci, 1); -#endif - else if (t->subtype == task_subtype_force) - runner_dosub_self2_force(r, ci, 1); - else if (t->subtype == task_subtype_limiter) - runner_dosub_self2_limiter(r, ci, 1); - else if (t->subtype == task_subtype_stars_density) - runner_dosub_self_stars_density(r, ci, 1); - else if (t->subtype == task_subtype_stars_feedback) - runner_dosub_self_stars_feedback(r, ci, 1); - else if (t->subtype == task_subtype_bh_density) - runner_dosub_self_bh_density(r, ci, 1); - else if (t->subtype == task_subtype_bh_swallow) - runner_dosub_self_bh_swallow(r, ci, 1); - else if (t->subtype == task_subtype_do_gas_swallow) - runner_do_gas_swallow_self(r, ci, 1); - else if (t->subtype == task_subtype_do_bh_swallow) - runner_do_bh_swallow_self(r, ci, 1); - else if (t->subtype == task_subtype_bh_feedback) - runner_dosub_self_bh_feedback(r, ci, 1); - else - error("Unknown/invalid task subtype (%s/%s).", - taskID_names[t->type], subtaskID_names[t->subtype]); - break; - - case task_type_sub_pair: - if (t->subtype == task_subtype_density) - runner_dosub_pair1_density(r, ci, cj, 1); -#ifdef EXTRA_HYDRO_LOOP - else if (t->subtype == task_subtype_gradient) - runner_dosub_pair1_gradient(r, ci, cj, 1); -#endif - else if (t->subtype == task_subtype_force) - runner_dosub_pair2_force(r, ci, cj, 1); - else if (t->subtype == task_subtype_limiter) - runner_dosub_pair2_limiter(r, ci, cj, 1); - else if (t->subtype == task_subtype_stars_density) - runner_dosub_pair_stars_density(r, ci, cj, 1); - else if (t->subtype == task_subtype_stars_feedback) - runner_dosub_pair_stars_feedback(r, ci, cj, 1); - else if (t->subtype == task_subtype_bh_density) - runner_dosub_pair_bh_density(r, ci, cj, 1); - else if (t->subtype == task_subtype_bh_swallow) - runner_dosub_pair_bh_swallow(r, ci, cj, 1); - else if (t->subtype == task_subtype_do_gas_swallow) - runner_do_gas_swallow_pair(r, ci, cj, 1); - else if (t->subtype == task_subtype_do_bh_swallow) - runner_do_bh_swallow_pair(r, ci, cj, 1); - else if (t->subtype == task_subtype_bh_feedback) - runner_dosub_pair_bh_feedback(r, ci, cj, 1); - else - error("Unknown/invalid task subtype (%s/%s).", - taskID_names[t->type], subtaskID_names[t->subtype]); - break; - - case task_type_sort: - /* Cleanup only if any of the indices went stale. */ - runner_do_hydro_sort( - r, ci, t->flags, - ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1); - /* Reset the sort flags as our work here is done. */ - t->flags = 0; - break; - case task_type_stars_sort: - /* Cleanup only if any of the indices went stale. */ - runner_do_stars_sort( - r, ci, t->flags, - ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1); - /* Reset the sort flags as our work here is done. */ - t->flags = 0; - break; - case task_type_init_grav: - runner_do_init_grav(r, ci, 1); - break; - case task_type_ghost: - runner_do_ghost(r, ci, 1); - break; -#ifdef EXTRA_HYDRO_LOOP - case task_type_extra_ghost: - runner_do_extra_ghost(r, ci, 1); - break; -#endif - case task_type_stars_ghost: - runner_do_stars_ghost(r, ci, 1); - break; - case task_type_bh_density_ghost: - runner_do_black_holes_density_ghost(r, ci, 1); - break; - case task_type_bh_swallow_ghost3: - runner_do_black_holes_swallow_ghost(r, ci, 1); - break; - case task_type_drift_part: - runner_do_drift_part(r, ci, 1); - break; - case task_type_drift_spart: - runner_do_drift_spart(r, ci, 1); - break; - case task_type_drift_bpart: - runner_do_drift_bpart(r, ci, 1); - break; - case task_type_drift_gpart: - runner_do_drift_gpart(r, ci, 1); - break; - case task_type_kick1: - runner_do_kick1(r, ci, 1); - break; - case task_type_kick2: - runner_do_kick2(r, ci, 1); - break; - case task_type_end_hydro_force: - runner_do_end_hydro_force(r, ci, 1); - break; - case task_type_end_grav_force: - runner_do_end_grav_force(r, ci, 1); - break; - case task_type_logger: - runner_do_logger(r, ci, 1); - break; - case task_type_timestep: - runner_do_timestep(r, ci, 1); - break; - case task_type_timestep_limiter: - runner_do_limiter(r, ci, 0, 1); - break; -#ifdef WITH_MPI - case task_type_send: - if (t->subtype == task_subtype_tend_part) { - free(t->buff); - } else if (t->subtype == task_subtype_tend_gpart) { - free(t->buff); - } else if (t->subtype == task_subtype_tend_spart) { - free(t->buff); - } else if (t->subtype == task_subtype_tend_bpart) { - free(t->buff); - } else if (t->subtype == task_subtype_sf_counts) { - free(t->buff); - } else if (t->subtype == task_subtype_part_swallow) { - free(t->buff); - } else if (t->subtype == task_subtype_bpart_merger) { - free(t->buff); - } - break; - case task_type_recv: - if (t->subtype == task_subtype_tend_part) { - cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_tend_gpart) { - cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_tend_spart) { - cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_tend_bpart) { - cell_unpack_end_step_black_holes( - ci, (struct pcell_step_black_holes *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_sf_counts) { - cell_unpack_sf_counts(ci, (struct pcell_sf *)t->buff); - cell_clear_stars_sort_flags(ci, /*clear_unused_flags=*/0); - free(t->buff); - } else if (t->subtype == task_subtype_xv) { - runner_do_recv_part(r, ci, 1, 1); - } else if (t->subtype == task_subtype_rho) { - runner_do_recv_part(r, ci, 0, 1); - } else if (t->subtype == task_subtype_gradient) { - runner_do_recv_part(r, ci, 0, 1); - } else if (t->subtype == task_subtype_part_swallow) { - cell_unpack_part_swallow(ci, - (struct black_holes_part_data *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_bpart_merger) { - cell_unpack_bpart_swallow(ci, - (struct black_holes_bpart_data *)t->buff); - free(t->buff); - } else if (t->subtype == task_subtype_limiter) { - runner_do_recv_part(r, ci, 0, 1); - } else if (t->subtype == task_subtype_gpart) { - runner_do_recv_gpart(r, ci, 1); - } else if (t->subtype == task_subtype_spart) { - runner_do_recv_spart(r, ci, 1, 1); - } else if (t->subtype == task_subtype_bpart_rho) { - runner_do_recv_bpart(r, ci, 1, 1); - } else if (t->subtype == task_subtype_bpart_swallow) { - runner_do_recv_bpart(r, ci, 0, 1); - } else if (t->subtype == task_subtype_bpart_feedback) { - runner_do_recv_bpart(r, ci, 0, 1); - } else if (t->subtype == task_subtype_multipole) { - cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff); - free(t->buff); - } else { - error("Unknown/invalid task subtype (%d).", t->subtype); - } - break; -#endif - case task_type_grav_down: - runner_do_grav_down(r, t->ci, 1); - break; - case task_type_grav_mesh: - runner_do_grav_mesh(r, t->ci, 1); - break; - case task_type_grav_long_range: - runner_do_grav_long_range(r, t->ci, 1); - break; - case task_type_grav_mm: - runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj); - break; - case task_type_cooling: - runner_do_cooling(r, t->ci, 1); - break; - case task_type_star_formation: - runner_do_star_formation(r, t->ci, 1); - break; - case task_type_stars_resort: - runner_do_stars_resort(r, t->ci, 1); - break; - case task_type_fof_self: - runner_do_fof_self(r, t->ci, 1); - break; - case task_type_fof_pair: - runner_do_fof_pair(r, t->ci, t->cj, 1); - break; - default: - error("Unknown/invalid task type (%d).", t->type); - } - -/* Mark that we have run this task on these cells */ -#ifdef SWIFT_DEBUG_CHECKS - if (ci != NULL) { - ci->tasks_executed[t->type]++; - ci->subtasks_executed[t->subtype]++; - } - if (cj != NULL) { - cj->tasks_executed[t->type]++; - cj->subtasks_executed[t->subtype]++; - } - - /* This runner is not doing a task anymore */ - r->t = NULL; -#endif - - /* We're done with this task, see if we get a next one. */ - prev = t; - t = scheduler_done(sched, t); - - } /* main loop. */ - } - - /* Be kind, rewind. */ - return NULL; -} - -/** - * @brief Write the required particles through the logger. - * - * @param r The runner thread. - * @param c The cell. - * @param timer Are we timing this ? - */ -void runner_do_logger(struct runner *r, struct cell *c, int timer) { - -#ifdef WITH_LOGGER - TIMER_TIC; - - const struct engine *e = r->e; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - const int count = c->hydro.count; - - /* Anything to do here? */ - if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return; - - /* Recurse? Avoid spending too much time in useless cells. */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int k = 0; k < count; k++) { - - /* Get a handle on the part. */ - struct part *restrict p = &parts[k]; - struct xpart *restrict xp = &xparts[k]; - - /* If particle needs to be log */ - /* This is the same function than part_is_active, except for - * debugging checks */ - if (part_is_active(p, e)) { - - if (logger_should_write(&xp->logger_data, e->logger)) { - /* Write particle */ - /* Currently writing everything, should adapt it through time */ - logger_log_part(e->logger, p, - logger_mask_data[logger_x].mask | - logger_mask_data[logger_v].mask | - logger_mask_data[logger_a].mask | - logger_mask_data[logger_u].mask | - logger_mask_data[logger_h].mask | - logger_mask_data[logger_rho].mask | - logger_mask_data[logger_consts].mask, - &xp->logger_data.last_offset); - - /* Set counter back to zero */ - xp->logger_data.steps_since_last_output = 0; - } else - /* Update counter */ - xp->logger_data.steps_since_last_output += 1; - } - } - } - - if (c->grav.count > 0) error("gparts not implemented"); - - if (c->stars.count > 0) error("sparts not implemented"); - - if (timer) TIMER_TOC(timer_logger); - -#else - error("Logger disabled, please enable it during configuration"); -#endif -} - -/** - * @brief Recursively search for FOF groups in a single cell. - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void runner_do_fof_self(struct runner *r, struct cell *c, int timer) { - -#ifdef WITH_FOF - - TIMER_TIC; - - const struct engine *e = r->e; - struct space *s = e->s; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const int periodic = s->periodic; - const struct gpart *const gparts = s->gparts; - const double search_r2 = e->fof_properties->l_x2; - - rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c); - - if (timer) TIMER_TOC(timer_fof_self); - -#else - error("SWIFT was not compiled with FOF enabled!"); -#endif -} - -/** - * @brief Recursively search for FOF groups between a pair of cells. - * - * @param r runner task - * @param ci cell i - * @param cj cell j - * @param timer 1 if the time is to be recorded. - */ -void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj, - int timer) { - -#ifdef WITH_FOF - - TIMER_TIC; - - const struct engine *e = r->e; - struct space *s = e->s; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const int periodic = s->periodic; - const struct gpart *const gparts = s->gparts; - const double search_r2 = e->fof_properties->l_x2; - - rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci, - cj); - - if (timer) TIMER_TOC(timer_fof_pair); -#else - error("SWIFT was not compiled with FOF enabled!"); -#endif -} diff --git a/src/runner.h b/src/runner.h index be175eef423faee23ef97ba86a7faf2f43e8ef5d..7e8d0459efb5485ea1301c923e8c7a3396b6fc7e 100644 --- a/src/runner.h +++ b/src/runner.h @@ -26,13 +26,21 @@ /* Config parameters. */ #include "../config.h" -/* Includes. */ +/* Local headers. */ #include "cache.h" #include "gravity_cache.h" -#include "task.h" struct cell; struct engine; +struct task; + +/* Unique identifier of loop types */ +#define TASK_LOOP_DENSITY 0 +#define TASK_LOOP_GRADIENT 1 +#define TASK_LOOP_FORCE 2 +#define TASK_LOOP_LIMITER 3 +#define TASK_LOOP_FEEDBACK 4 +#define TASK_LOOP_SWALLOW 5 /** * @brief A struct representing a runner's thread and its data. @@ -75,6 +83,12 @@ struct runner { /* Function prototypes. */ void runner_do_ghost(struct runner *r, struct cell *c, int timer); void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer); +void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer); +void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c, + int timer); +void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c, + int timer); +void runner_do_init_grav(struct runner *r, struct cell *c, int timer); void runner_do_hydro_sort(struct runner *r, struct cell *c, int flag, int cleanup, int clock); void runner_do_stars_sort(struct runner *r, struct cell *c, int flag, @@ -84,19 +98,38 @@ void runner_do_all_stars_sort(struct runner *r, struct cell *c); void runner_do_drift_part(struct runner *r, struct cell *c, int timer); void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer); void runner_do_drift_spart(struct runner *r, struct cell *c, int timer); +void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer); void runner_do_kick1(struct runner *r, struct cell *c, int timer); void runner_do_kick2(struct runner *r, struct cell *c, int timer); +void runner_do_timestep(struct runner *r, struct cell *c, int timer); void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer); +void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer); void runner_do_init(struct runner *r, struct cell *c, int timer); void runner_do_cooling(struct runner *r, struct cell *c, int timer); +void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer); +void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer); void runner_do_grav_external(struct runner *r, struct cell *c, int timer); void runner_do_grav_fft(struct runner *r, int timer); void runner_do_logger(struct runner *r, struct cell *c, int timer); void runner_do_fof_self(struct runner *r, struct cell *c, int timer); void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj, int timer); +void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer); +void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer); +void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci, + struct cell *cj, int timer); +void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci, + struct cell *cj, int timer); +void runner_do_star_formation(struct runner *r, struct cell *c, int timer); +void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer); + +void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer); +void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, + int timer); +void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts, + int timer); +void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts, + int timer); void *runner_main(void *data); -void runner_do_drift_all_mapper(void *map_data, int num_elements, - void *extra_data); #endif /* SWIFT_RUNNER_H */ diff --git a/src/runner_black_holes.c b/src/runner_black_holes.c new file mode 100644 index 0000000000000000000000000000000000000000..d9bb62201d7b087670aef0ce2346a51bf61a3868 --- /dev/null +++ b/src/runner_black_holes.c @@ -0,0 +1,459 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "black_holes.h" +#include "cell.h" +#include "engine.h" +#include "timers.h" + +/** + * @brief Process all the gas particles in a cell that have been flagged for + * swallowing by a black hole. + * + * This is done by recursing down to the leaf-level and skipping the sub-cells + * that have not been drifted as they would not have any particles with + * swallowing flag. We then loop over the particles with a flag and look into + * the space-wide list of black holes for the particle with the corresponding + * ID. If found, the BH swallows the gas particle and the gas particle is + * removed. If the cell is local, we may be looking for a foreign BH, in which + * case, we do not update the BH (that will be done on its node) but just remove + * the gas particle. + * + * @param r The thread #runner. + * @param c The #cell. + * @param timer Are we timing this? + */ +void runner_do_gas_swallow(struct runner *r, struct cell *c, int timer) { + + struct engine *e = r->e; + struct space *s = e->s; + struct bpart *bparts = s->bparts; + const size_t nr_bpart = s->nr_bparts; +#ifdef WITH_MPI + struct bpart *bparts_foreign = s->bparts_foreign; + const size_t nr_bparts_foreign = s->nr_bparts_foreign; +#endif + + struct part *parts = c->hydro.parts; + struct xpart *xparts = c->hydro.xparts; + + /* Early abort? + * (We only want cells for which we drifted the gas as these are + * the only ones that could have gas particles that have been flagged + * for swallowing) */ + if (c->hydro.count == 0 || c->hydro.ti_old_part != e->ti_current) { + return; + } + + /* Loop over the progeny ? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + runner_do_gas_swallow(r, cp, 0); + } + } + } else { + + /* Loop over all the gas particles in the cell + * Note that the cell (and hence the parts) may be local or foreign. */ + const size_t nr_parts = c->hydro.count; + for (size_t k = 0; k < nr_parts; k++) { + + /* Get a handle on the part. */ + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; + + /* Ignore inhibited particles (they have already been removed!) */ + if (part_is_inhibited(p, e)) continue; + + /* Get the ID of the black holes that will swallow this part */ + const long long swallow_id = + black_holes_get_part_swallow_id(&p->black_holes_data); + + /* Has this particle been flagged for swallowing? */ + if (swallow_id >= 0) { + +#ifdef SWIFT_DEBUG_CHECKS + if (p->ti_drift != e->ti_current) + error("Trying to swallow an un-drifted particle."); +#endif + + /* ID of the BH swallowing this particle */ + const long long BH_id = swallow_id; + + /* Have we found this particle's BH already? */ + int found = 0; + + /* Let's look for the hungry black hole in the local list */ + for (size_t i = 0; i < nr_bpart; ++i) { + + /* Get a handle on the bpart. */ + struct bpart *bp = &bparts[i]; + + if (bp->id == BH_id) { + + /* Lock the space as we are going to work directly on the bpart list + */ + lock_lock(&s->lock); + + /* Swallow the gas particle (i.e. update the BH properties) */ + black_holes_swallow_part(bp, p, xp, e->cosmology); + + /* Release the space as we are done updating the bpart */ + if (lock_unlock(&s->lock) != 0) + error("Failed to unlock the space."); + + message("BH %lld swallowing gas particle %lld", bp->id, p->id); + + /* If the gas particle is local, remove it */ + if (c->nodeID == e->nodeID) { + + message("BH %lld removing gas particle %lld", bp->id, p->id); + + lock_lock(&e->s->lock); + + /* Re-check that the particle has not been removed + * by another thread before we do the deed. */ + if (!part_is_inhibited(p, e)) { + + /* Finally, remove the gas particle from the system + * Recall that the gpart associated with it is also removed + * at the same time. */ + cell_remove_part(e, c, p, xp); + } + + if (lock_unlock(&e->s->lock) != 0) + error("Failed to unlock the space!"); + } + + /* In any case, prevent the particle from being re-swallowed */ + black_holes_mark_part_as_swallowed(&p->black_holes_data); + + found = 1; + break; + } + + } /* Loop over local BHs */ + +#ifdef WITH_MPI + + /* We could also be in the case of a local gas particle being + * swallowed by a foreign BH. In this case, we won't update the + * BH but just remove the particle from the local list. */ + if (c->nodeID == e->nodeID && !found) { + + /* Let's look for the foreign hungry black hole */ + for (size_t i = 0; i < nr_bparts_foreign; ++i) { + + /* Get a handle on the bpart. */ + struct bpart *bp = &bparts_foreign[i]; + + if (bp->id == BH_id) { + + message("BH %lld removing gas particle %lld (foreign BH case)", + bp->id, p->id); + + lock_lock(&e->s->lock); + + /* Re-check that the particle has not been removed + * by another thread before we do the deed. */ + if (!part_is_inhibited(p, e)) { + + /* Finally, remove the gas particle from the system */ + cell_remove_part(e, c, p, xp); + } + + if (lock_unlock(&e->s->lock) != 0) + error("Failed to unlock the space!"); + + found = 1; + break; + } + } /* Loop over foreign BHs */ + } /* Is the cell local? */ +#endif + + /* If we have a local particle, we must have found the BH in one + * of our list of black holes. */ + if (c->nodeID == e->nodeID && !found) { + error("Gas particle %lld could not find BH %lld to be swallowed", + p->id, swallow_id); + } + } /* Part was flagged for swallowing */ + } /* Loop over the parts */ + } /* Cell is not split */ +} + +/** + * @brief Processing of gas particles to swallow - self task case. + * + * @param r The thread #runner. + * @param c The #cell. + * @param timer Are we timing this? + */ +void runner_do_gas_swallow_self(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != r->e->nodeID) error("Running self task on foreign node"); + if (!cell_is_active_black_holes(c, r->e)) + error("Running self task on inactive cell"); +#endif + + runner_do_gas_swallow(r, c, timer); +} + +/** + * @brief Processing of gas particles to swallow - pair task case. + * + * @param r The thread #runner. + * @param ci First #cell. + * @param cj Second #cell. + * @param timer Are we timing this? + */ +void runner_do_gas_swallow_pair(struct runner *r, struct cell *ci, + struct cell *cj, int timer) { + + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID) + error("Running pair task on foreign node"); +#endif + + /* Run the swallowing loop only in the cell that is the neighbour of the + * active BH */ + if (cell_is_active_black_holes(cj, e)) runner_do_gas_swallow(r, ci, timer); + if (cell_is_active_black_holes(ci, e)) runner_do_gas_swallow(r, cj, timer); +} + +/** + * @brief Process all the BH particles in a cell that have been flagged for + * swallowing by a black hole. + * + * This is done by recursing down to the leaf-level and skipping the sub-cells + * that have not been drifted as they would not have any particles with + * swallowing flag. We then loop over the particles with a flag and look into + * the space-wide list of black holes for the particle with the corresponding + * ID. If found, the BH swallows the BH particle and the BH particle is + * removed. If the cell is local, we may be looking for a foreign BH, in which + * case, we do not update the BH (that will be done on its node) but just remove + * the BH particle. + * + * @param r The thread #runner. + * @param c The #cell. + * @param timer Are we timing this? + */ +void runner_do_bh_swallow(struct runner *r, struct cell *c, int timer) { + + struct engine *e = r->e; + struct space *s = e->s; + struct bpart *bparts = s->bparts; + const size_t nr_bpart = s->nr_bparts; +#ifdef WITH_MPI + struct bpart *bparts_foreign = s->bparts_foreign; + const size_t nr_bparts_foreign = s->nr_bparts_foreign; +#endif + + struct bpart *cell_bparts = c->black_holes.parts; + + /* Early abort? + * (We only want cells for which we drifted the BH as these are + * the only ones that could have BH particles that have been flagged + * for swallowing) */ + if (c->black_holes.count == 0 || + c->black_holes.ti_old_part != e->ti_current) { + return; + } + + /* Loop over the progeny ? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + runner_do_bh_swallow(r, cp, 0); + } + } + } else { + + /* Loop over all the gas particles in the cell + * Note that the cell (and hence the bparts) may be local or foreign. */ + const size_t nr_cell_bparts = c->black_holes.count; + for (size_t k = 0; k < nr_cell_bparts; k++) { + + /* Get a handle on the part. */ + struct bpart *const cell_bp = &cell_bparts[k]; + + /* Ignore inhibited particles (they have already been removed!) */ + if (bpart_is_inhibited(cell_bp, e)) continue; + + /* Get the ID of the black holes that will swallow this part */ + const long long swallow_id = + black_holes_get_bpart_swallow_id(&cell_bp->merger_data); + + /* message("OO id=%lld swallow_id = %lld", cell_bp->id, */ + /* swallow_id); */ + + /* Has this particle been flagged for swallowing? */ + if (swallow_id >= 0) { + +#ifdef SWIFT_DEBUG_CHECKS + if (cell_bp->ti_drift != e->ti_current) + error("Trying to swallow an un-drifted particle."); +#endif + + /* ID of the BH swallowing this particle */ + const long long BH_id = swallow_id; + + /* Have we found this particle's BH already? */ + int found = 0; + + /* Let's look for the hungry black hole in the local list */ + for (size_t i = 0; i < nr_bpart; ++i) { + + /* Get a handle on the bpart. */ + struct bpart *bp = &bparts[i]; + + if (bp->id == BH_id) { + + /* Lock the space as we are going to work directly on the bpart list + */ + lock_lock(&s->lock); + + /* Swallow the gas particle (i.e. update the BH properties) */ + black_holes_swallow_bpart(bp, cell_bp, e->cosmology); + + /* Release the space as we are done updating the bpart */ + if (lock_unlock(&s->lock) != 0) + error("Failed to unlock the space."); + + message("BH %lld swallowing BH particle %lld", bp->id, cell_bp->id); + + /* If the gas particle is local, remove it */ + if (c->nodeID == e->nodeID) { + + message("BH %lld removing BH particle %lld", bp->id, cell_bp->id); + + /* Finally, remove the gas particle from the system + * Recall that the gpart associated with it is also removed + * at the same time. */ + cell_remove_bpart(e, c, cell_bp); + } + + /* In any case, prevent the particle from being re-swallowed */ + black_holes_mark_bpart_as_merged(&cell_bp->merger_data); + + found = 1; + break; + } + + } /* Loop over local BHs */ + +#ifdef WITH_MPI + + /* We could also be in the case of a local BH particle being + * swallowed by a foreign BH. In this case, we won't update the + * foreign BH but just remove the particle from the local list. */ + if (c->nodeID == e->nodeID && !found) { + + /* Let's look for the foreign hungry black hole */ + for (size_t i = 0; i < nr_bparts_foreign; ++i) { + + /* Get a handle on the bpart. */ + struct bpart *bp = &bparts_foreign[i]; + + if (bp->id == BH_id) { + + message("BH %lld removing BH particle %lld (foreign BH case)", + bp->id, cell_bp->id); + + /* Finally, remove the gas particle from the system */ + cell_remove_bpart(e, c, cell_bp); + + found = 1; + break; + } + } /* Loop over foreign BHs */ + } /* Is the cell local? */ +#endif + + /* If we have a local particle, we must have found the BH in one + * of our list of black holes. */ + if (c->nodeID == e->nodeID && !found) { + error("BH particle %lld could not find BH %lld to be swallowed", + cell_bp->id, swallow_id); + } + } /* Part was flagged for swallowing */ + } /* Loop over the parts */ + } /* Cell is not split */ +} + +/** + * @brief Processing of bh particles to swallow - self task case. + * + * @param r The thread #runner. + * @param c The #cell. + * @param timer Are we timing this? + */ +void runner_do_bh_swallow_self(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != r->e->nodeID) error("Running self task on foreign node"); + if (!cell_is_active_black_holes(c, r->e)) + error("Running self task on inactive cell"); +#endif + + runner_do_bh_swallow(r, c, timer); +} + +/** + * @brief Processing of bh particles to swallow - pair task case. + * + * @param r The thread #runner. + * @param ci First #cell. + * @param cj Second #cell. + * @param timer Are we timing this? + */ +void runner_do_bh_swallow_pair(struct runner *r, struct cell *ci, + struct cell *cj, int timer) { + + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != e->nodeID && cj->nodeID != e->nodeID) + error("Running pair task on foreign node"); +#endif + + /* Run the swallowing loop only in the cell that is the neighbour of the + * active BH */ + if (cell_is_active_black_holes(cj, e)) runner_do_bh_swallow(r, ci, timer); + if (cell_is_active_black_holes(ci, e)) runner_do_bh_swallow(r, cj, timer); +} diff --git a/src/runner_doiact_black_holes.c b/src/runner_doiact_black_holes.c new file mode 100644 index 0000000000000000000000000000000000000000..5c139eada6cf7403076194c42261948db5e0f7f4 --- /dev/null +++ b/src/runner_doiact_black_holes.c @@ -0,0 +1,53 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "active.h" +#include "black_holes.h" +#include "cell.h" +#include "engine.h" +#include "runner.h" +#include "space_getsid.h" +#include "timers.h" + +/* Import the black hole density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_functions_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole feedback loop functions. */ +#define FUNCTION swallow +#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW +#include "runner_doiact_functions_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole feedback loop functions. */ +#define FUNCTION feedback +#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK +#include "runner_doiact_functions_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION diff --git a/src/runner_doiact_black_holes.h b/src/runner_doiact_black_holes.h index ce159c7ac24a508bc625070ed50b3aad7dd9fa8d..763e557babb9ca94a05a28d1ea5ed0f1141684ff 100644 --- a/src/runner_doiact_black_holes.h +++ b/src/runner_doiact_black_holes.h @@ -85,852 +85,20 @@ #define _IACT_BH_BH(f) PASTE(runner_iact_nonsym_bh_bh, f) #define IACT_BH_BH _IACT_BH_BH(FUNCTION) -/** - * @brief Calculate the number density of #part around the #bpart - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void DOSELF1_BH(struct runner *r, struct cell *c, int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - TIMER_TIC; - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Anything to do here? */ - if (c->black_holes.count == 0) return; - if (!cell_is_active_black_holes(c, e)) return; - - const int bcount = c->black_holes.count; - const int count = c->hydro.count; - struct bpart *restrict bparts = c->black_holes.parts; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - - /* Do we actually have any gas neighbours? */ - if (c->hydro.count != 0) { - - /* Loop over the bparts in ci. */ - for (int bid = 0; bid < bcount; bid++) { - - /* Get a hold of the ith bpart in ci. */ - struct bpart *restrict bi = &bparts[bid]; - - /* Skip inactive particles */ - if (!bpart_is_active(bi, e)) continue; - - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - const float bix[3] = {(float)(bi->x[0] - c->loc[0]), - (float)(bi->x[1] - c->loc[1]), - (float)(bi->x[2] - c->loc[2])}; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts[pjd]; - struct xpart *restrict xpj = &xparts[pjd]; - const float hj = pj->h; - - /* Early abort? */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), - (float)(pj->x[1] - c->loc[1]), - (float)(pj->x[2] - c->loc[2])}; - float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (bi->ti_drift != e->ti_current) - error("Particle bi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the parts in ci. */ - } /* loop over the bparts in ci. */ - } /* Do we have gas particles in the cell? */ - - /* When doing BH swallowing, we need a quick loop also over the BH - * neighbours */ -#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) - - /* Loop over the bparts in ci. */ - for (int bid = 0; bid < bcount; bid++) { - - /* Get a hold of the ith bpart in ci. */ - struct bpart *restrict bi = &bparts[bid]; - - /* Skip inactive particles */ - if (!bpart_is_active(bi, e)) continue; - - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - const float bix[3] = {(float)(bi->x[0] - c->loc[0]), - (float)(bi->x[1] - c->loc[1]), - (float)(bi->x[2] - c->loc[2])}; - - /* Loop over the parts in cj. */ - for (int bjd = 0; bjd < bcount; bjd++) { - - /* Skip self interaction */ - if (bid == bjd) continue; - - /* Get a pointer to the jth particle. */ - struct bpart *restrict bj = &bparts[bjd]; - const float hj = bj->h; - - /* Early abort? */ - if (bpart_is_inhibited(bj, e)) continue; - - /* Compute the pairwise distance. */ - const float bjx[3] = {(float)(bj->x[0] - c->loc[0]), - (float)(bj->x[1] - c->loc[1]), - (float)(bj->x[2] - c->loc[2])}; - float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (bi->ti_drift != e->ti_current) - error("Particle bi not drifted to current time"); - if (bj->ti_drift != e->ti_current) - error("Particle bj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the bparts in ci. */ - } /* loop over the bparts in ci. */ - -#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */ - - TIMER_TOC(TIMER_DOSELF_BH); -} - -/** - * @brief Calculate the number density of cj #part around the ci #bpart - * - * @param r runner task - * @param ci The first #cell - * @param cj The second #cell - */ -void DO_NONSYM_PAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { - -#ifdef SWIFT_DEBUG_CHECKS -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - if (cj->nodeID != engine_rank) error("Should be run on a different node"); -#endif -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Anything to do here? */ - if (ci->black_holes.count == 0) return; - if (!cell_is_active_black_holes(ci, e)) return; - - const int bcount_i = ci->black_holes.count; - const int count_j = cj->hydro.count; - struct bpart *restrict bparts_i = ci->black_holes.parts; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - - /* Do we actually have any gas neighbours? */ - if (cj->hydro.count != 0) { - - /* Loop over the bparts in ci. */ - for (int bid = 0; bid < bcount_i; bid++) { - - /* Get a hold of the ith bpart in ci. */ - struct bpart *restrict bi = &bparts_i[bid]; - - /* Skip inactive particles */ - if (!bpart_is_active(bi, e)) continue; - - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])), - (float)(bi->x[1] - (cj->loc[1] + shift[1])), - (float)(bi->x[2] - (cj->loc[2] + shift[2]))}; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - const float hj = pj->h; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), - (float)(pj->x[1] - cj->loc[1]), - (float)(pj->x[2] - cj->loc[2])}; - float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (bi->ti_drift != e->ti_current) - error("Particle bi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the parts in cj. */ - } /* loop over the bparts in ci. */ - } /* Do we have gas particles in the cell? */ - - /* When doing BH swallowing, we need a quick loop also over the BH - * neighbours */ -#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) - - const int bcount_j = cj->black_holes.count; - struct bpart *restrict bparts_j = cj->black_holes.parts; - - /* Loop over the bparts in ci. */ - for (int bid = 0; bid < bcount_i; bid++) { - - /* Get a hold of the ith bpart in ci. */ - struct bpart *restrict bi = &bparts_i[bid]; - - /* Skip inactive particles */ - if (!bpart_is_active(bi, e)) continue; - - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])), - (float)(bi->x[1] - (cj->loc[1] + shift[1])), - (float)(bi->x[2] - (cj->loc[2] + shift[2]))}; - - /* Loop over the bparts in cj. */ - for (int bjd = 0; bjd < bcount_j; bjd++) { - - /* Get a pointer to the jth particle. */ - struct bpart *restrict bj = &bparts_j[bjd]; - const float hj = bj->h; - - /* Skip inhibited particles. */ - if (bpart_is_inhibited(bj, e)) continue; - - /* Compute the pairwise distance. */ - const float bjx[3] = {(float)(bj->x[0] - cj->loc[0]), - (float)(bj->x[1] - cj->loc[1]), - (float)(bj->x[2] - cj->loc[2])}; - float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (bi->ti_drift != e->ti_current) - error("Particle bi not drifted to current time"); - if (bj->ti_drift != e->ti_current) - error("Particle bj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the bparts in cj. */ - } /* loop over the bparts in ci. */ - -#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */ -} - -void DOPAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj, int timer) { - - TIMER_TIC; - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_bh = ci->nodeID == r->e->nodeID; - const int do_cj_bh = cj->nodeID == r->e->nodeID; -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_bh = cj->nodeID == r->e->nodeID; - const int do_cj_bh = ci->nodeID == r->e->nodeID; -#else - /* The swallow task is executed on both sides */ - const int do_ci_bh = 1; - const int do_cj_bh = 1; -#endif - - if (do_ci_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, ci, cj); - if (do_cj_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, cj, ci); - - TIMER_TOC(TIMER_DOPAIR_BH); -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * Version using a brute-force algorithm. - * - * @param r The #runner. - * @param ci The first #cell. - * @param bparts_i The #bpart to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param bcount The number of particles in @c ind. - * @param cj The second #cell. - * @param shift The shift vector to apply to the particles in ci. - */ -void DOPAIR1_SUBSET_BH_NAIVE(struct runner *r, struct cell *restrict ci, - struct bpart *restrict bparts_i, int *restrict ind, - const int bcount, struct cell *restrict cj, - const double *shift) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - const int count_j = cj->hydro.count; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Early abort? */ - if (count_j == 0) return; - - /* Loop over the parts_i. */ - for (int bid = 0; bid < bcount; bid++) { - - /* Get a hold of the ith part in ci. */ - struct bpart *restrict bi = &bparts_i[ind[bid]]; - - const double bix = bi->x[0] - (shift[0]); - const double biy = bi->x[1] - (shift[1]); - const double biz = bi->x[2] - (shift[2]); - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - -#ifdef SWIFT_DEBUG_CHECKS - if (!bpart_is_active(bi, e)) - error("Trying to correct smoothing length of inactive particle !"); -#endif - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - - /* Skip inhibited particles */ - if (part_is_inhibited(pj, e)) continue; - - const double pjx = pj->x[0]; - const double pjy = pj->x[1]; - const double pjz = pj->x[2]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float dx[3] = {(float)(bix - pjx), (float)(biy - pjy), - (float)(biz - pjz)}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - /* Hit or miss? */ - if (r2 < hig2) { - IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, - ti_current); - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * @param r The #runner. - * @param ci The first #cell. - * @param bparts The #bpart to interact. - * @param ind The list of indices of particles in @c ci to interact with. - * @param bcount The number of particles in @c ind. - */ -void DOSELF1_SUBSET_BH(struct runner *r, struct cell *restrict ci, - struct bpart *restrict bparts, int *restrict ind, - const int bcount) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - const int count_i = ci->hydro.count; - struct part *restrict parts_j = ci->hydro.parts; - struct xpart *restrict xparts_j = ci->hydro.xparts; +void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c); +void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj); - /* Early abort? */ - if (count_i == 0) return; - - /* Loop over the parts in ci. */ - for (int bid = 0; bid < bcount; bid++) { - - /* Get a hold of the ith part in ci. */ - struct bpart *bi = &bparts[ind[bid]]; - const float bix[3] = {(float)(bi->x[0] - ci->loc[0]), - (float)(bi->x[1] - ci->loc[1]), - (float)(bi->x[2] - ci->loc[2])}; - const float hi = bi->h; - const float hig2 = hi * hi * kernel_gamma2; - -#ifdef SWIFT_DEBUG_CHECKS - if (!bpart_is_active(bi, e)) error("Inactive particle in subset function!"); -#endif - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_i; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - - /* Early abort? */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), - (float)(pj->x[1] - ci->loc[1]), - (float)(pj->x[2] - ci->loc[2])}; - float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_BH_GAS(r2, dx, hi, pj->h, bi, pj, xpj, cosmo, - e->gravity_properties, ti_current); - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} +void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer); -/** - * @brief Determine which version of DOSELF1_SUBSET_BH needs to be called - * depending on the optimisation level. - * - * @param r The #runner. - * @param ci The first #cell. - * @param bparts The #bpart to interact. - * @param ind The list of indices of particles in @c ci to interact with. - * @param bcount The number of particles in @c ind. - */ void DOSELF1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci, struct bpart *restrict bparts, int *restrict ind, - const int bcount) { - - DOSELF1_SUBSET_BH(r, ci, bparts, ind, bcount); -} - -/** - * @brief Determine which version of DOPAIR1_SUBSET_BH needs to be called - * depending on the orientation of the cells or whether DOPAIR1_SUBSET_BH - * needs to be called at all. - * - * @param r The #runner. - * @param ci The first #cell. - * @param bparts_i The #bpart to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param bcount The number of particles in @c ind. - * @param cj The second #cell. - */ + const int bcount); void DOPAIR1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci, struct bpart *restrict bparts_i, int *restrict ind, int const bcount, - struct cell *restrict cj) { - - const struct engine *e = r->e; - - /* Anything to do here? */ - if (cj->hydro.count == 0) return; - - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - - DOPAIR1_SUBSET_BH_NAIVE(r, ci, bparts_i, ind, bcount, cj, shift); -} + struct cell *restrict cj); void DOSUB_SUBSET_BH(struct runner *r, struct cell *ci, struct bpart *bparts, - int *ind, const int bcount, struct cell *cj, - int gettimer) { - - const struct engine *e = r->e; - struct space *s = e->s; - - /* Should we even bother? */ - if (!cell_is_active_black_holes(ci, e) && - (cj == NULL || !cell_is_active_black_holes(cj, e))) - return; - - /* Find out in which sub-cell of ci the parts are. */ - struct cell *sub = NULL; - if (ci->split) { - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) { - if (&bparts[ind[0]] >= &ci->progeny[k]->black_holes.parts[0] && - &bparts[ind[0]] < - &ci->progeny[k] - ->black_holes.parts[ci->progeny[k]->black_holes.count]) { - sub = ci->progeny[k]; - break; - } - } - } - } - - /* Is this a single cell? */ - if (cj == NULL) { - - /* Recurse? */ - if (cell_can_recurse_in_self_black_holes_task(ci)) { - - /* Loop over all progeny. */ - DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, NULL, 0); - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != sub && ci->progeny[j] != NULL) - DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, ci->progeny[j], 0); - - } - - /* Otherwise, compute self-interaction. */ - else - DOSELF1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount); - } /* self-interaction. */ - - /* Otherwise, it's a pair interaction. */ - else { - - /* Recurse? */ - if (cell_can_recurse_in_pair_black_holes_task(ci, cj) && - cell_can_recurse_in_pair_black_holes_task(cj, ci)) { - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(s, &ci, &cj, shift); - - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) - DOSUB_SUBSET_BH(r, ci->progeny[pid], bparts, ind, bcount, - cj->progeny[pjd], 0); - if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) - DOSUB_SUBSET_BH(r, cj->progeny[pjd], bparts, ind, bcount, - ci->progeny[pid], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else if (cell_is_active_black_holes(ci, e) && cj->hydro.count > 0) { - - /* Do any of the cells need to be drifted first? */ - if (cell_is_active_black_holes(ci, e)) { - if (!cell_are_bpart_drifted(ci, e)) error("Cell should be drifted!"); - if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); - } - - DOPAIR1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount, cj); - } - - } /* otherwise, pair interaction. */ -} - -/** - * @brief Determine which version of DOSELF1_BH needs to be called depending - * on the optimisation level. - * - * @param r #runner - * @param c #cell c - * - */ -void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c) { - - const struct engine *restrict e = r->e; - - /* Anything to do here? */ - if (c->black_holes.count == 0) return; - - /* Anything to do here? */ - if (!cell_is_active_black_holes(c, e)) return; - - /* Did we mess up the recursion? */ - if (c->black_holes.h_max_old * kernel_gamma > c->dmin) - error("Cell smaller than smoothing length"); - - DOSELF1_BH(r, c, 1); -} - -/** - * @brief Determine which version of DOPAIR1_BH needs to be called depending - * on the orientation of the cells or whether DOPAIR1_BH needs to be called - * at all. - * - * @param r #runner - * @param ci #cell ci - * @param cj #cell cj - * - */ -void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj) { - - const struct engine *restrict e = r->e; - - const int ci_active = cell_is_active_black_holes(ci, e); - const int cj_active = cell_is_active_black_holes(cj, e); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_bh = ci->nodeID == e->nodeID; - const int do_cj_bh = cj->nodeID == e->nodeID; -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_bh = cj->nodeID == e->nodeID; - const int do_cj_bh = ci->nodeID == e->nodeID; -#else - /* The swallow task is executed on both sides */ - const int do_ci_bh = 1; - const int do_cj_bh = 1; -#endif - - const int do_ci = (ci->black_holes.count != 0 && cj->hydro.count != 0 && - ci_active && do_ci_bh); - const int do_cj = (cj->black_holes.count != 0 && ci->hydro.count != 0 && - cj_active && do_cj_bh); - - /* Anything to do here? */ - if (!do_ci && !do_cj) return; - - /* Check that cells are drifted. */ - if (do_ci && - (!cell_are_bpart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) - error("Interacting undrifted cells."); - - if (do_cj && - (!cell_are_part_drifted(ci, e) || !cell_are_bpart_drifted(cj, e))) - error("Interacting undrifted cells."); - - /* No sorted intreactions here -> use the naive ones */ - DOPAIR1_BH_NAIVE(r, ci, cj, 1); -} - -/** - * @brief Compute grouped sub-cell interactions for pairs - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - * @param gettimer Do we have a timer ? - * - * @todo Hard-code the sid on the recursive calls to avoid the - * redundant computations to find the sid on-the-fly. - */ -void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { - - TIMER_TIC; - - struct space *s = r->e->s; - const struct engine *e = r->e; - - /* Should we even bother? - * In the swallow case we care about BH-BH and BH-gas - * interactions. - * In all other cases only BH-gas so we can abort if there is - * is no gas in the cell */ -#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) - const int should_do_ci = - ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e); - const int should_do_cj = - cj->black_holes.count != 0 && cell_is_active_black_holes(cj, e); -#else - const int should_do_ci = ci->black_holes.count != 0 && cj->hydro.count != 0 && - cell_is_active_black_holes(ci, e); - const int should_do_cj = cj->black_holes.count != 0 && ci->hydro.count != 0 && - cell_is_active_black_holes(cj, e); - -#endif - - if (!should_do_ci && !should_do_cj) return; - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3]; - const int sid = space_getsid(s, &ci, &cj, shift); - - /* Recurse? */ - if (cell_can_recurse_in_pair_black_holes_task(ci, cj) && - cell_can_recurse_in_pair_black_holes_task(cj, ci)) { - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - DOSUB_PAIR1_BH(r, ci->progeny[pid], cj->progeny[pjd], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else { - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_bh = ci->nodeID == e->nodeID; - const int do_cj_bh = cj->nodeID == e->nodeID; -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - /* Here we are updating the hydro -> switch ci, cj */ - const int do_ci_bh = cj->nodeID == e->nodeID; - const int do_cj_bh = ci->nodeID == e->nodeID; -#else - /* Here we perform the task on both sides */ - const int do_ci_bh = 1; - const int do_cj_bh = 1; -#endif - - const int do_ci = ci->black_holes.count != 0 && - cell_is_active_black_holes(ci, e) && do_ci_bh; - const int do_cj = cj->black_holes.count != 0 && - cell_is_active_black_holes(cj, e) && do_cj_bh; - - if (do_ci) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_bpart_drifted(ci, e)) - error("Interacting undrifted cells (bparts)."); - - if (cj->hydro.count != 0 && !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells (parts)."); - } - - if (do_cj) { - - /* Make sure both cells are drifted to the current timestep. */ - if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e)) - error("Interacting undrifted cells (parts)."); - - if (!cell_are_bpart_drifted(cj, e)) - error("Interacting undrifted cells (bparts)."); - } - - if (do_ci || do_cj) DOPAIR1_BRANCH_BH(r, ci, cj); - } - - TIMER_TOC(TIMER_DOSUB_PAIR_BH); -} - -/** - * @brief Compute grouped sub-cell interactions for self tasks - * - * @param r The #runner. - * @param ci The first #cell. - * @param gettimer Do we have a timer ? - */ -void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer) { - - TIMER_TIC; - - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) - error("This function should not be called on foreign cells"); -#endif - - /* Should we even bother? - * In the swallow case we care about BH-BH and BH-gas - * interactions. - * In all other cases only BH-gas so we can abort if there is - * is no gas in the cell */ -#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) - const int should_do_ci = - ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e); -#else - const int should_do_ci = ci->black_holes.count != 0 && ci->hydro.count != 0 && - cell_is_active_black_holes(ci, e); -#endif - - if (!should_do_ci) return; - - /* Recurse? */ - if (cell_can_recurse_in_self_black_holes_task(ci)) { - - /* Loop over all progeny. */ - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB_SELF1_BH(r, ci->progeny[k], 0); - for (int j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB_PAIR1_BH(r, ci->progeny[k], ci->progeny[j], 0); - } - } - - /* Otherwise, compute self-interaction. */ - else { - - /* Check we did drift to the current time */ - if (!cell_are_bpart_drifted(ci, e)) error("Interacting undrifted cell."); - - if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e)) - error("Interacting undrifted cells (bparts)."); - - DOSELF1_BRANCH_BH(r, ci); - } - - TIMER_TOC(TIMER_DOSUB_SELF_BH); -} + int *ind, const int bcount, struct cell *cj, int gettimer); diff --git a/src/runner_doiact_functions_black_holes.h b/src/runner_doiact_functions_black_holes.h new file mode 100644 index 0000000000000000000000000000000000000000..f8af37c751a9f7a89455ae5c9a7ef72ec55a1c64 --- /dev/null +++ b/src/runner_doiact_functions_black_holes.h @@ -0,0 +1,877 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Before including this file, define FUNCTION, which is the + name of the interaction function. This creates the interaction functions + runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION, + and runner_dosub_FUNCTION calling the pairwise interaction function + runner_iact_FUNCTION. */ + +#include "runner_doiact_black_holes.h" + +/** + * @brief Calculate the number density of #part around the #bpart + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void DOSELF1_BH(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + TIMER_TIC; + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (c->black_holes.count == 0) return; + if (!cell_is_active_black_holes(c, e)) return; + + const int bcount = c->black_holes.count; + const int count = c->hydro.count; + struct bpart *restrict bparts = c->black_holes.parts; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + + /* Do we actually have any gas neighbours? */ + if (c->hydro.count != 0) { + + /* Loop over the bparts in ci. */ + for (int bid = 0; bid < bcount; bid++) { + + /* Get a hold of the ith bpart in ci. */ + struct bpart *restrict bi = &bparts[bid]; + + /* Skip inactive particles */ + if (!bpart_is_active(bi, e)) continue; + + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float bix[3] = {(float)(bi->x[0] - c->loc[0]), + (float)(bi->x[1] - c->loc[1]), + (float)(bi->x[2] - c->loc[2])}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts[pjd]; + struct xpart *restrict xpj = &xparts[pjd]; + const float hj = pj->h; + + /* Early abort? */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), + (float)(pj->x[1] - c->loc[1]), + (float)(pj->x[2] - c->loc[2])}; + float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (bi->ti_drift != e->ti_current) + error("Particle bi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the parts in ci. */ + } /* loop over the bparts in ci. */ + } /* Do we have gas particles in the cell? */ + + /* When doing BH swallowing, we need a quick loop also over the BH + * neighbours */ +#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) + + /* Loop over the bparts in ci. */ + for (int bid = 0; bid < bcount; bid++) { + + /* Get a hold of the ith bpart in ci. */ + struct bpart *restrict bi = &bparts[bid]; + + /* Skip inactive particles */ + if (!bpart_is_active(bi, e)) continue; + + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float bix[3] = {(float)(bi->x[0] - c->loc[0]), + (float)(bi->x[1] - c->loc[1]), + (float)(bi->x[2] - c->loc[2])}; + + /* Loop over the parts in cj. */ + for (int bjd = 0; bjd < bcount; bjd++) { + + /* Skip self interaction */ + if (bid == bjd) continue; + + /* Get a pointer to the jth particle. */ + struct bpart *restrict bj = &bparts[bjd]; + const float hj = bj->h; + + /* Early abort? */ + if (bpart_is_inhibited(bj, e)) continue; + + /* Compute the pairwise distance. */ + const float bjx[3] = {(float)(bj->x[0] - c->loc[0]), + (float)(bj->x[1] - c->loc[1]), + (float)(bj->x[2] - c->loc[2])}; + float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (bi->ti_drift != e->ti_current) + error("Particle bi not drifted to current time"); + if (bj->ti_drift != e->ti_current) + error("Particle bj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the bparts in ci. */ + } /* loop over the bparts in ci. */ + +#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */ + + TIMER_TOC(TIMER_DOSELF_BH); +} + +/** + * @brief Calculate the number density of cj #part around the ci #bpart + * + * @param r runner task + * @param ci The first #cell + * @param cj The second #cell + */ +void DO_NONSYM_PAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + +#ifdef SWIFT_DEBUG_CHECKS +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + if (cj->nodeID != engine_rank) error("Should be run on a different node"); +#endif +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (ci->black_holes.count == 0) return; + if (!cell_is_active_black_holes(ci, e)) return; + + const int bcount_i = ci->black_holes.count; + const int count_j = cj->hydro.count; + struct bpart *restrict bparts_i = ci->black_holes.parts; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Do we actually have any gas neighbours? */ + if (cj->hydro.count != 0) { + + /* Loop over the bparts in ci. */ + for (int bid = 0; bid < bcount_i; bid++) { + + /* Get a hold of the ith bpart in ci. */ + struct bpart *restrict bi = &bparts_i[bid]; + + /* Skip inactive particles */ + if (!bpart_is_active(bi, e)) continue; + + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])), + (float)(bi->x[1] - (cj->loc[1] + shift[1])), + (float)(bi->x[2] - (cj->loc[2] + shift[2]))}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + const float hj = pj->h; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), + (float)(pj->x[1] - cj->loc[1]), + (float)(pj->x[2] - cj->loc[2])}; + float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (bi->ti_drift != e->ti_current) + error("Particle bi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the parts in cj. */ + } /* loop over the bparts in ci. */ + } /* Do we have gas particles in the cell? */ + + /* When doing BH swallowing, we need a quick loop also over the BH + * neighbours */ +#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) + + const int bcount_j = cj->black_holes.count; + struct bpart *restrict bparts_j = cj->black_holes.parts; + + /* Loop over the bparts in ci. */ + for (int bid = 0; bid < bcount_i; bid++) { + + /* Get a hold of the ith bpart in ci. */ + struct bpart *restrict bi = &bparts_i[bid]; + + /* Skip inactive particles */ + if (!bpart_is_active(bi, e)) continue; + + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float bix[3] = {(float)(bi->x[0] - (cj->loc[0] + shift[0])), + (float)(bi->x[1] - (cj->loc[1] + shift[1])), + (float)(bi->x[2] - (cj->loc[2] + shift[2]))}; + + /* Loop over the bparts in cj. */ + for (int bjd = 0; bjd < bcount_j; bjd++) { + + /* Get a pointer to the jth particle. */ + struct bpart *restrict bj = &bparts_j[bjd]; + const float hj = bj->h; + + /* Skip inhibited particles. */ + if (bpart_is_inhibited(bj, e)) continue; + + /* Compute the pairwise distance. */ + const float bjx[3] = {(float)(bj->x[0] - cj->loc[0]), + (float)(bj->x[1] - cj->loc[1]), + (float)(bj->x[2] - cj->loc[2])}; + float dx[3] = {bix[0] - bjx[0], bix[1] - bjx[1], bix[2] - bjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (bi->ti_drift != e->ti_current) + error("Particle bi not drifted to current time"); + if (bj->ti_drift != e->ti_current) + error("Particle bj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_BH_BH(r2, dx, hi, hj, bi, bj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the bparts in cj. */ + } /* loop over the bparts in ci. */ + +#endif /* (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) */ +} + +void DOPAIR1_BH_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj, int timer) { + + TIMER_TIC; + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_bh = ci->nodeID == r->e->nodeID; + const int do_cj_bh = cj->nodeID == r->e->nodeID; +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_bh = cj->nodeID == r->e->nodeID; + const int do_cj_bh = ci->nodeID == r->e->nodeID; +#else + /* The swallow task is executed on both sides */ + const int do_ci_bh = 1; + const int do_cj_bh = 1; +#endif + + if (do_ci_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, ci, cj); + if (do_cj_bh) DO_NONSYM_PAIR1_BH_NAIVE(r, cj, ci); + + TIMER_TOC(TIMER_DOPAIR_BH); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * Version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param bparts_i The #bpart to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param bcount The number of particles in @c ind. + * @param cj The second #cell. + * @param shift The shift vector to apply to the particles in ci. + */ +void DOPAIR1_SUBSET_BH_NAIVE(struct runner *r, struct cell *restrict ci, + struct bpart *restrict bparts_i, int *restrict ind, + const int bcount, struct cell *restrict cj, + const double *shift) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Early abort? */ + if (count_j == 0) return; + + /* Loop over the parts_i. */ + for (int bid = 0; bid < bcount; bid++) { + + /* Get a hold of the ith part in ci. */ + struct bpart *restrict bi = &bparts_i[ind[bid]]; + + const double bix = bi->x[0] - (shift[0]); + const double biy = bi->x[1] - (shift[1]); + const double biz = bi->x[2] - (shift[2]); + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!bpart_is_active(bi, e)) + error("Trying to correct smoothing length of inactive particle !"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + + /* Skip inhibited particles */ + if (part_is_inhibited(pj, e)) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + float dx[3] = {(float)(bix - pjx), (float)(biy - pjy), + (float)(biz - pjz)}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ + if (r2 < hig2) { + IACT_BH_GAS(r2, dx, hi, hj, bi, pj, xpj, cosmo, e->gravity_properties, + ti_current); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param bparts The #bpart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param bcount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_BH(struct runner *r, struct cell *restrict ci, + struct bpart *restrict bparts, int *restrict ind, + const int bcount) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + const int count_i = ci->hydro.count; + struct part *restrict parts_j = ci->hydro.parts; + struct xpart *restrict xparts_j = ci->hydro.xparts; + + /* Early abort? */ + if (count_i == 0) return; + + /* Loop over the parts in ci. */ + for (int bid = 0; bid < bcount; bid++) { + + /* Get a hold of the ith part in ci. */ + struct bpart *bi = &bparts[ind[bid]]; + const float bix[3] = {(float)(bi->x[0] - ci->loc[0]), + (float)(bi->x[1] - ci->loc[1]), + (float)(bi->x[2] - ci->loc[2])}; + const float hi = bi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!bpart_is_active(bi, e)) error("Inactive particle in subset function!"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_i; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + + /* Early abort? */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), + (float)(pj->x[1] - ci->loc[1]), + (float)(pj->x[2] - ci->loc[2])}; + float dx[3] = {bix[0] - pjx[0], bix[1] - pjx[1], bix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_BH_GAS(r2, dx, hi, pj->h, bi, pj, xpj, cosmo, + e->gravity_properties, ti_current); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Determine which version of DOSELF1_SUBSET_BH needs to be called + * depending on the optimisation level. + * + * @param r The #runner. + * @param ci The first #cell. + * @param bparts The #bpart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param bcount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci, + struct bpart *restrict bparts, int *restrict ind, + const int bcount) { + + DOSELF1_SUBSET_BH(r, ci, bparts, ind, bcount); +} + +/** + * @brief Determine which version of DOPAIR1_SUBSET_BH needs to be called + * depending on the orientation of the cells or whether DOPAIR1_SUBSET_BH + * needs to be called at all. + * + * @param r The #runner. + * @param ci The first #cell. + * @param bparts_i The #bpart to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param bcount The number of particles in @c ind. + * @param cj The second #cell. + */ +void DOPAIR1_SUBSET_BRANCH_BH(struct runner *r, struct cell *restrict ci, + struct bpart *restrict bparts_i, + int *restrict ind, int const bcount, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* Anything to do here? */ + if (cj->hydro.count == 0) return; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + DOPAIR1_SUBSET_BH_NAIVE(r, ci, bparts_i, ind, bcount, cj, shift); +} + +void DOSUB_SUBSET_BH(struct runner *r, struct cell *ci, struct bpart *bparts, + int *ind, const int bcount, struct cell *cj, + int gettimer) { + + const struct engine *e = r->e; + struct space *s = e->s; + + /* Should we even bother? */ + if (!cell_is_active_black_holes(ci, e) && + (cj == NULL || !cell_is_active_black_holes(cj, e))) + return; + + /* Find out in which sub-cell of ci the parts are. */ + struct cell *sub = NULL; + if (ci->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + if (&bparts[ind[0]] >= &ci->progeny[k]->black_holes.parts[0] && + &bparts[ind[0]] < + &ci->progeny[k] + ->black_holes.parts[ci->progeny[k]->black_holes.count]) { + sub = ci->progeny[k]; + break; + } + } + } + } + + /* Is this a single cell? */ + if (cj == NULL) { + + /* Recurse? */ + if (cell_can_recurse_in_self_black_holes_task(ci)) { + + /* Loop over all progeny. */ + DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, NULL, 0); + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != sub && ci->progeny[j] != NULL) + DOSUB_SUBSET_BH(r, sub, bparts, ind, bcount, ci->progeny[j], 0); + + } + + /* Otherwise, compute self-interaction. */ + else + DOSELF1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount); + } /* self-interaction. */ + + /* Otherwise, it's a pair interaction. */ + else { + + /* Recurse? */ + if (cell_can_recurse_in_pair_black_holes_task(ci, cj) && + cell_can_recurse_in_pair_black_holes_task(cj, ci)) { + + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(s, &ci, &cj, shift); + + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) + DOSUB_SUBSET_BH(r, ci->progeny[pid], bparts, ind, bcount, + cj->progeny[pjd], 0); + if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) + DOSUB_SUBSET_BH(r, cj->progeny[pjd], bparts, ind, bcount, + ci->progeny[pid], 0); + } + } + + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_black_holes(ci, e) && cj->hydro.count > 0) { + + /* Do any of the cells need to be drifted first? */ + if (cell_is_active_black_holes(ci, e)) { + if (!cell_are_bpart_drifted(ci, e)) error("Cell should be drifted!"); + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); + } + + DOPAIR1_SUBSET_BRANCH_BH(r, ci, bparts, ind, bcount, cj); + } + + } /* otherwise, pair interaction. */ +} + +/** + * @brief Determine which version of DOSELF1_BH needs to be called depending + * on the optimisation level. + * + * @param r #runner + * @param c #cell c + * + */ +void DOSELF1_BRANCH_BH(struct runner *r, struct cell *c) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (c->black_holes.count == 0) return; + + /* Anything to do here? */ + if (!cell_is_active_black_holes(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->black_holes.h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + DOSELF1_BH(r, c, 1); +} + +/** + * @brief Determine which version of DOPAIR1_BH needs to be called depending + * on the orientation of the cells or whether DOPAIR1_BH needs to be called + * at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void DOPAIR1_BRANCH_BH(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *restrict e = r->e; + + const int ci_active = cell_is_active_black_holes(ci, e); + const int cj_active = cell_is_active_black_holes(cj, e); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_bh = ci->nodeID == e->nodeID; + const int do_cj_bh = cj->nodeID == e->nodeID; +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_bh = cj->nodeID == e->nodeID; + const int do_cj_bh = ci->nodeID == e->nodeID; +#else + /* The swallow task is executed on both sides */ + const int do_ci_bh = 1; + const int do_cj_bh = 1; +#endif + + const int do_ci = (ci->black_holes.count != 0 && cj->hydro.count != 0 && + ci_active && do_ci_bh); + const int do_cj = (cj->black_holes.count != 0 && ci->hydro.count != 0 && + cj_active && do_cj_bh); + + /* Anything to do here? */ + if (!do_ci && !do_cj) return; + + /* Check that cells are drifted. */ + if (do_ci && + (!cell_are_bpart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) + error("Interacting undrifted cells."); + + if (do_cj && + (!cell_are_part_drifted(ci, e) || !cell_are_bpart_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* No sorted intreactions here -> use the naive ones */ + DOPAIR1_BH_NAIVE(r, ci, cj, 1); +} + +/** + * @brief Compute grouped sub-cell interactions for pairs + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param gettimer Do we have a timer ? + * + * @todo Hard-code the sid on the recursive calls to avoid the + * redundant computations to find the sid on-the-fly. + */ +void DOSUB_PAIR1_BH(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer) { + + TIMER_TIC; + + struct space *s = r->e->s; + const struct engine *e = r->e; + + /* Should we even bother? + * In the swallow case we care about BH-BH and BH-gas + * interactions. + * In all other cases only BH-gas so we can abort if there is + * is no gas in the cell */ +#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) + const int should_do_ci = + ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e); + const int should_do_cj = + cj->black_holes.count != 0 && cell_is_active_black_holes(cj, e); +#else + const int should_do_ci = ci->black_holes.count != 0 && cj->hydro.count != 0 && + cell_is_active_black_holes(ci, e); + const int should_do_cj = cj->black_holes.count != 0 && ci->hydro.count != 0 && + cell_is_active_black_holes(cj, e); + +#endif + + if (!should_do_ci && !should_do_cj) return; + + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3]; + const int sid = space_getsid(s, &ci, &cj, shift); + + /* Recurse? */ + if (cell_can_recurse_in_pair_black_holes_task(ci, cj) && + cell_can_recurse_in_pair_black_holes_task(cj, ci)) { + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) + DOSUB_PAIR1_BH(r, ci->progeny[pid], cj->progeny[pjd], 0); + } + } + + /* Otherwise, compute the pair directly. */ + else { + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_bh = ci->nodeID == e->nodeID; + const int do_cj_bh = cj->nodeID == e->nodeID; +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + /* Here we are updating the hydro -> switch ci, cj */ + const int do_ci_bh = cj->nodeID == e->nodeID; + const int do_cj_bh = ci->nodeID == e->nodeID; +#else + /* Here we perform the task on both sides */ + const int do_ci_bh = 1; + const int do_cj_bh = 1; +#endif + + const int do_ci = ci->black_holes.count != 0 && + cell_is_active_black_holes(ci, e) && do_ci_bh; + const int do_cj = cj->black_holes.count != 0 && + cell_is_active_black_holes(cj, e) && do_cj_bh; + + if (do_ci) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_bpart_drifted(ci, e)) + error("Interacting undrifted cells (bparts)."); + + if (cj->hydro.count != 0 && !cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells (parts)."); + } + + if (do_cj) { + + /* Make sure both cells are drifted to the current timestep. */ + if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (parts)."); + + if (!cell_are_bpart_drifted(cj, e)) + error("Interacting undrifted cells (bparts)."); + } + + if (do_ci || do_cj) DOPAIR1_BRANCH_BH(r, ci, cj); + } + + TIMER_TOC(TIMER_DOSUB_PAIR_BH); +} + +/** + * @brief Compute grouped sub-cell interactions for self tasks + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void DOSUB_SELF1_BH(struct runner *r, struct cell *ci, int gettimer) { + + TIMER_TIC; + + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) + error("This function should not be called on foreign cells"); +#endif + + /* Should we even bother? + * In the swallow case we care about BH-BH and BH-gas + * interactions. + * In all other cases only BH-gas so we can abort if there is + * is no gas in the cell */ +#if (FUNCTION_TASK_LOOP == TASK_LOOP_SWALLOW) + const int should_do_ci = + ci->black_holes.count != 0 && cell_is_active_black_holes(ci, e); +#else + const int should_do_ci = ci->black_holes.count != 0 && ci->hydro.count != 0 && + cell_is_active_black_holes(ci, e); +#endif + + if (!should_do_ci) return; + + /* Recurse? */ + if (cell_can_recurse_in_self_black_holes_task(ci)) { + + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + DOSUB_SELF1_BH(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + DOSUB_PAIR1_BH(r, ci->progeny[k], ci->progeny[j], 0); + } + } + + /* Otherwise, compute self-interaction. */ + else { + + /* Check we did drift to the current time */ + if (!cell_are_bpart_drifted(ci, e)) error("Interacting undrifted cell."); + + if (ci->hydro.count != 0 && !cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (bparts)."); + + DOSELF1_BRANCH_BH(r, ci); + } + + TIMER_TOC(TIMER_DOSUB_SELF_BH); +} diff --git a/src/runner_doiact.h b/src/runner_doiact_functions_hydro.h similarity index 96% rename from src/runner_doiact.h rename to src/runner_doiact_functions_hydro.h index 8aabb05d177385c6bbee1a91eb2ea231ccbca3e4..c324c759b5acc9db75cf0849d0e417b2141978f4 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact_functions_hydro.h @@ -24,106 +24,7 @@ and runner_dosub_FUNCTION calling the pairwise interaction function runner_iact_FUNCTION. */ -#define PASTE(x, y) x##_##y - -#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f) -#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION) - -#define _DOPAIR1(f) PASTE(runner_dopair1, f) -#define DOPAIR1 _DOPAIR1(FUNCTION) - -#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f) -#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION) - -#define _DOPAIR2(f) PASTE(runner_dopair2, f) -#define DOPAIR2 _DOPAIR2(FUNCTION) - -#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f) -#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) - -#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f) -#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION) - -#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f) -#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION) - -#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) -#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) - -#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f) -#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION) - -#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f) -#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION) - -#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f) -#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION) - -#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f) -#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION) - -#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f) -#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION) - -#define _DOSELF1(f) PASTE(runner_doself1, f) -#define DOSELF1 _DOSELF1(FUNCTION) - -#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f) -#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION) - -#define _DOSELF2(f) PASTE(runner_doself2, f) -#define DOSELF2 _DOSELF2(FUNCTION) - -#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f) -#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION) - -#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f) -#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION) - -#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f) -#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION) - -#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f) -#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION) - -#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f) -#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION) - -#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f) -#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION) - -#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f) -#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION) - -#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f) -#define IACT_NONSYM _IACT_NONSYM(FUNCTION) - -#define _IACT(f) PASTE(runner_iact, f) -#define IACT _IACT(FUNCTION) - -#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f) -#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION) - -#define _IACT_VEC(f) PASTE(runner_iact_vec, f) -#define IACT_VEC _IACT_VEC(FUNCTION) - -#define _TIMER_DOSELF(f) PASTE(timer_doself, f) -#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION) - -#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f) -#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION) - -#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f) -#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION) - -#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f) -#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION) - -#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f) -#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION) - -#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) -#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) +#include "runner_doiact_hydro.h" /** * @brief Compute the interactions between a cell pair (non-symmetric case). diff --git a/src/runner_doiact_functions_stars.h b/src/runner_doiact_functions_stars.h new file mode 100644 index 0000000000000000000000000000000000000000..b0d731857e9b4b0474e47c3ac3fca540eecb1cbb --- /dev/null +++ b/src/runner_doiact_functions_stars.h @@ -0,0 +1,1332 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Before including this file, define FUNCTION, which is the + name of the interaction function. This creates the interaction functions + runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION, + and runner_dosub_FUNCTION calling the pairwise interaction function + runner_iact_FUNCTION. */ + +#include "runner_doiact_stars.h" + +/** + * @brief Calculate the number density of #part around the #spart + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + TIMER_TIC; + + const struct engine *e = r->e; + const int with_cosmology = e->policy & engine_policy_cosmology; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (c->hydro.count == 0 || c->stars.count == 0) return; + if (!cell_is_active_stars(c, e)) return; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int scount = c->stars.count; + const int count = c->hydro.count; + struct spart *restrict sparts = c->stars.parts; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + + /* Loop over the sparts in ci. */ + for (int sid = 0; sid < scount; sid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict si = &sparts[sid]; + + /* Skip inactive particles */ + if (!spart_is_active(si, e)) continue; + + /* Skip inactive particles */ + if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue; + + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; + const float six[3] = {(float)(si->x[0] - c->loc[0]), + (float)(si->x[1] - c->loc[1]), + (float)(si->x[2] - c->loc[2])}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts[pjd]; + struct xpart *restrict xpj = &xparts[pjd]; + const float hj = pj->h; + + /* Early abort? */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), + (float)(pj->x[1] - c->loc[1]), + (float)(pj->x[2] - c->loc[2])}; + float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, si, pj, a, H); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, + ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in ci. */ + } /* loop over the sparts in ci. */ + + TIMER_TOC(TIMER_DOSELF_STARS); +} + +/** + * @brief Calculate the number density of cj #part around the ci #spart + * + * @param r runner task + * @param ci The first #cell + * @param cj The second #cell + */ +void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + +#ifdef SWIFT_DEBUG_CHECKS +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#else + if (cj->nodeID != engine_rank) error("Should be run on a different node"); +#endif +#endif + + const struct engine *e = r->e; + const int with_cosmology = e->policy & engine_policy_cosmology; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Anything to do here? */ + if (cj->hydro.count == 0 || ci->stars.count == 0) return; + if (!cell_is_active_stars(ci, e)) return; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int scount_i = ci->stars.count; + const int count_j = cj->hydro.count; + struct spart *restrict sparts_i = ci->stars.parts; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the sparts in ci. */ + for (int sid = 0; sid < scount_i; sid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict si = &sparts_i[sid]; + + /* Skip inactive particles */ + if (!spart_is_active(si, e)) continue; + + /* Skip inactive particles */ + if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue; + + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; + const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])), + (float)(si->x[1] - (cj->loc[1] + shift[1])), + (float)(si->x[2] - (cj->loc[2] + shift[2]))}; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + const float hj = pj->h; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), + (float)(pj->x[1] - cj->loc[1]), + (float)(pj->x[2] - cj->loc[2])}; + float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, si, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, + ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Compute the interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param sid The direction of the pair. + * @param shift The shift vector to apply to the particles in ci. + */ +void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, + const int sid, const double *shift) { + + TIMER_TIC; + + const struct engine *e = r->e; + const int with_cosmology = e->policy & engine_policy_cosmology; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + /* Get the cutoff shift. */ + double rshift = 0.0; + for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_stars = (ci->nodeID == e->nodeID) && (ci->stars.count != 0) && + (cj->hydro.count != 0) && cell_is_active_stars(ci, e); + const int do_cj_stars = (cj->nodeID == e->nodeID) && (cj->stars.count != 0) && + (ci->hydro.count != 0) && cell_is_active_stars(cj, e); +#else + /* here we are updating the hydro -> switch ci, cj for local */ + const int do_ci_stars = (cj->nodeID == e->nodeID) && (ci->stars.count != 0) && + (cj->hydro.count != 0) && cell_is_active_stars(ci, e); + const int do_cj_stars = (ci->nodeID == e->nodeID) && (cj->stars.count != 0) && + (ci->hydro.count != 0) && cell_is_active_stars(cj, e); +#endif + + if (do_ci_stars) { + + /* Pick-out the sorted lists. */ + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; + const struct sort_entry *restrict sort_i = ci->stars.sort[sid]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Some constants used to checks that the parts are in the right frame */ + const float shift_threshold_x = + 2. * ci->width[0] + + 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); + const float shift_threshold_y = + 2. * ci->width[1] + + 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); + const float shift_threshold_z = + 2. * ci->width[2] + + 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); +#endif /* SWIFT_DEBUG_CHECKS */ + + /* Get some other useful values. */ + const double hi_max = ci->stars.h_max * kernel_gamma - rshift; + const int count_i = ci->stars.count; + const int count_j = cj->hydro.count; + struct spart *restrict sparts_i = ci->stars.parts; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + const double dj_min = sort_j[0].d; + const float dx_max_rshift = + (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift; + const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort); + + /* Loop over the sparts in ci. */ + for (int pid = count_i - 1; + pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { + + /* Get a hold of the ith part in ci. */ + struct spart *restrict spi = &sparts_i[sort_i[pid].i]; + const float hi = spi->h; + + /* Skip inactive particles */ + if (!spart_is_active(spi, e)) continue; + + /* Skip inactive particles */ + if (!feedback_is_active(spi, e->time, cosmo, with_cosmology)) continue; + + /* Compute distance from the other cell. */ + const double px[3] = {spi->x[0], spi->x[1], spi->x[2]}; + float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + + px[2] * runner_shift[sid][2]; + + /* Is there anything we need to interact with ? */ + const double di = dist + hi * kernel_gamma + dx_max_rshift; + if (di < dj_min) continue; + + /* Get some additional information about pi */ + const float hig2 = hi * hi * kernel_gamma2; + const float pix = spi->x[0] - (cj->loc[0] + shift[0]); + const float piy = spi->x[1] - (cj->loc[1] + shift[1]); + const float piz = spi->x[2] - (cj->loc[2] + shift[2]); + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + + /* Recover pj */ + struct part *pj = &parts_j[sort_j[pjd].i]; + struct xpart *xpj = &xparts_j[sort_j[pjd].i]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + const float hj = pj->h; + const float pjx = pj->x[0] - cj->loc[0]; + const float pjy = pj->x[1] - cj->loc[1]; + const float pjz = pj->x[2] - cj->loc[2]; + + /* Compute the pairwise distance. */ + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + + /* Check that particles have been drifted to the current time */ + if (spi->ti_drift != e->ti_current) + error("Particle spi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ + } /* do_ci_stars */ + + if (do_cj_stars) { + /* Pick-out the sorted lists. */ + const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; + const struct sort_entry *restrict sort_j = cj->stars.sort[sid]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Some constants used to checks that the parts are in the right frame */ + const float shift_threshold_x = + 2. * ci->width[0] + + 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); + const float shift_threshold_y = + 2. * ci->width[1] + + 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); + const float shift_threshold_z = + 2. * ci->width[2] + + 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); +#endif /* SWIFT_DEBUG_CHECKS */ + + /* Get some other useful values. */ + const double hj_max = cj->hydro.h_max * kernel_gamma; + const int count_i = ci->hydro.count; + const int count_j = cj->stars.count; + struct part *restrict parts_i = ci->hydro.parts; + struct xpart *restrict xparts_i = ci->hydro.xparts; + struct spart *restrict sparts_j = cj->stars.parts; + const double di_max = sort_i[count_i - 1].d - rshift; + const float dx_max_rshift = + (ci->hydro.dx_max_sort + cj->stars.dx_max_sort) + rshift; + const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort); + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; + pjd++) { + + /* Get a hold of the jth part in cj. */ + struct spart *spj = &sparts_j[sort_j[pjd].i]; + const float hj = spj->h; + + /* Skip inactive particles */ + if (!spart_is_active(spj, e)) continue; + + /* Skip inactive particles */ + if (!feedback_is_active(spj, e->time, cosmo, with_cosmology)) continue; + + /* Compute distance from the other cell. */ + const double px[3] = {spj->x[0], spj->x[1], spj->x[2]}; + float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + + px[2] * runner_shift[sid][2]; + + /* Is there anything we need to interact with ? */ + const double dj = dist - hj * kernel_gamma - dx_max_rshift; + if (dj - rshift > di_max) continue; + + /* Get some additional information about pj */ + const float hjg2 = hj * hj * kernel_gamma2; + const float pjx = spj->x[0] - cj->loc[0]; + const float pjy = spj->x[1] - cj->loc[1]; + const float pjz = spj->x[2] - cj->loc[2]; + + /* Loop over the parts in ci. */ + for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { + + /* Recover pi */ + struct part *pi = &parts_i[sort_i[pid].i]; + struct xpart *xpi = &xparts_i[sort_i[pid].i]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pi, e)) continue; + + const float hi = pi->h; + const float pix = pi->x[0] - (cj->loc[0] + shift[0]); + const float piy = pi->x[1] - (cj->loc[1] + shift[1]); + const float piz = pi->x[2] - (cj->loc[2] + shift[2]); + + /* Compute the pairwise distance. */ + float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (spj->ti_drift != e->ti_current) + error("Particle spj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hjg2) { + + IACT_STARS(r2, dx, hj, hi, spj, pi, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hj, hi, spj, pi, xpi, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hj, hi, spj, pi, xpi, cosmo, + ti_current); +#endif + } + } /* loop over the parts in ci. */ + } /* loop over the parts in cj. */ + } /* Cell cj is active */ + + TIMER_TOC(TIMER_DOPAIR_STARS); +} + +void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj, int timer) { + + TIMER_TIC; + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_stars = ci->nodeID == r->e->nodeID; + const int do_cj_stars = cj->nodeID == r->e->nodeID; +#else + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_stars = cj->nodeID == r->e->nodeID; + const int do_cj_stars = ci->nodeID == r->e->nodeID; +#endif + if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0) + DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj); + if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0) + DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci); + + TIMER_TOC(TIMER_DOPAIR_STARS); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * Version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + * @param sid The direction of the pair. + * @param flipped Flag to check whether the cells have been flipped or not. + * @param shift The shift vector to apply to the particles in ci. + */ +void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts_i, int *restrict ind, + int scount, struct cell *restrict cj, const int sid, + const int flipped, const double *shift) { + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Early abort? */ + if (count_j == 0) return; + + /* Pick-out the sorted lists. */ + const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; + const float dxj = cj->hydro.dx_max_sort; + + /* Sparts are on the left? */ + if (!flipped) { + + /* Loop over the sparts_i. */ + for (int pid = 0; pid < scount; pid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict spi = &sparts_i[ind[pid]]; + const double pix = spi->x[0] - (shift[0]); + const double piy = spi->x[1] - (shift[1]); + const double piz = spi->x[2] - (shift[2]); + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] + + piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[sort_j[pjd].i]; + struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), + (float)(piz - pjz)}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (spi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the sparts in ci. */ + } + + /* Sparts are on the right. */ + else { + + /* Loop over the sparts_i. */ + for (int pid = 0; pid < scount; pid++) { + + /* Get a hold of the ith spart in ci. */ + struct spart *restrict spi = &sparts_i[ind[pid]]; + const double pix = spi->x[0] - (shift[0]); + const double piy = spi->x[1] - (shift[1]); + const double piz = spi->x[2] - (shift[2]); + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] + + piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; + + /* Loop over the parts in cj. */ + for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[sort_j[pjd].i]; + struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; + + /* Skip inhibited particles. */ + if (part_is_inhibited(pj, e)) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), + (float)(piz - pjz)}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (spi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the sparts in ci. */ + } +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * Version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + * @param shift The shift vector to apply to the particles in ci. + */ +void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj, const double *shift) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int count_j = cj->hydro.count; + struct part *restrict parts_j = cj->hydro.parts; + struct xpart *restrict xparts_j = cj->hydro.xparts; + + /* Early abort? */ + if (count_j == 0) return; + + /* Loop over the parts_i. */ + for (int pid = 0; pid < scount; pid++) { + + /* Get a hold of the ith part in ci. */ + struct spart *restrict spi = &sparts_i[ind[pid]]; + + const double pix = spi->x[0] - (shift[0]); + const double piy = spi->x[1] - (shift[1]); + const double piz = spi->x[2] - (shift[2]); + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!spart_is_active(spi, e)) + error("Trying to correct smoothing length of inactive particle !"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + + /* Skip inhibited particles */ + if (part_is_inhibited(pj, e)) continue; + + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; + const float hj = pj->h; + + /* Compute the pairwise distance. */ + float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), + (float)(piz - pjz)}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, + ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts The #spart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts, int *restrict ind, + int scount) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) error("Should be run on a different node"); +#endif + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const struct cosmology *cosmo = e->cosmology; + + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + + const int count_i = ci->hydro.count; + struct part *restrict parts_j = ci->hydro.parts; + struct xpart *restrict xparts_j = ci->hydro.xparts; + + /* Early abort? */ + if (count_i == 0) return; + + /* Loop over the parts in ci. */ + for (int spid = 0; spid < scount; spid++) { + + /* Get a hold of the ith part in ci. */ + struct spart *spi = &sparts[ind[spid]]; + const float spix[3] = {(float)(spi->x[0] - ci->loc[0]), + (float)(spi->x[1] - ci->loc[1]), + (float)(spi->x[2] - ci->loc[2])}; + const float hi = spi->h; + const float hig2 = hi * hi * kernel_gamma2; + +#ifdef SWIFT_DEBUG_CHECKS + if (!spart_is_active(spi, e)) + error("Inactive particle in subset function!"); +#endif + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_i; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + struct xpart *restrict xpj = &xparts_j[pjd]; + + /* Early abort? */ + if (part_is_inhibited(pj, e)) continue; + + /* Compute the pairwise distance. */ + const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), + (float)(pj->x[1] - ci->loc[1]), + (float)(pj->x[2] - ci->loc[2])}; + float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj, + cosmo, ti_current); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) + runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj, + cosmo, ti_current); +#endif + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ +} + +/** + * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called + * depending on the optimisation level. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts The #spart to interact. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + */ +void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts, + int *restrict ind, int scount) { + + DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount); +} + +/** + * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called + * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS + * needs to be called at all. + * + * @param r The #runner. + * @param ci The first #cell. + * @param sparts_i The #spart to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param scount The number of particles in @c ind. + * @param cj The second #cell. + */ +void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, + struct spart *restrict sparts_i, + int *restrict ind, int scount, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* Anything to do here? */ + if (cj->hydro.count == 0) return; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + +#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS + DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift); +#else + /* Get the sorting index. */ + int sid = 0; + for (int k = 0; k < 3; k++) + sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) + ? 0 + : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); + + /* Switch the cells around? */ + const int flipped = runner_flip[sid]; + sid = sortlistID[sid]; + + /* Has the cell cj been sorted? */ + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) + error("Interacting unsorted cells."); + + DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift); +#endif +} + +void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts, + int *ind, int scount, struct cell *cj, int gettimer) { + + const struct engine *e = r->e; + struct space *s = e->s; + + /* Should we even bother? */ + if (!cell_is_active_stars(ci, e) && + (cj == NULL || !cell_is_active_stars(cj, e))) + return; + + /* Find out in which sub-cell of ci the parts are. */ + struct cell *sub = NULL; + if (ci->split) { + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) { + if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] && + &sparts[ind[0]] < + &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) { + sub = ci->progeny[k]; + break; + } + } + } + } + + /* Is this a single cell? */ + if (cj == NULL) { + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progeny. */ + DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0); + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != sub && ci->progeny[j] != NULL) + DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0); + + } + + /* Otherwise, compute self-interaction. */ + else + DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount); + } /* self-interaction. */ + + /* Otherwise, it's a pair interaction. */ + else { + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci, cj) && + cell_can_recurse_in_pair_stars_task(cj, ci)) { + + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(s, &ci, &cj, shift); + + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) + DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount, + cj->progeny[pjd], 0); + if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) + DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount, + ci->progeny[pid], 0); + } + } + + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) { + + /* Do any of the cells need to be drifted first? */ + if (cell_is_active_stars(ci, e)) { + if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!"); + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); + } + + DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj); + } + + } /* otherwise, pair interaction. */ +} + +/** + * @brief Determine which version of DOSELF1_STARS needs to be called depending + * on the optimisation level. + * + * @param r #runner + * @param c #cell c + * + */ +void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (c->stars.count == 0) return; + + /* Anything to do here? */ + if (!cell_is_active_stars(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->stars.h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + DOSELF1_STARS(r, c, 1); +} + +#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid) \ + ({ \ + const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid]; \ + \ + for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ + const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ + if (PART##_is_inhibited(p, e)) continue; \ + \ + const float d = p->x[0] * runner_shift[sid][0] + \ + p->x[1] * runner_shift[sid][1] + \ + p->x[2] * runner_shift[sid][2]; \ + if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + 1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) && \ + (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ + cj->width[0] * 1.0e-10) \ + error( \ + "particle shift diff exceeds dx_max_sort in cell cj. " \ + "cj->nodeID=%d " \ + "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE \ + ".dx_max_sort=%e " \ + "cj->" #TYPE \ + ".dx_max_sort_old=%e, cellID=%i super->cellID=%i" \ + "cj->depth=%d cj->maxdepth=%d", \ + cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \ + cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID, \ + cj->depth, cj->maxdepth); \ + } \ + }) + +/** + * @brief Determine which version of DOPAIR1_STARS needs to be called depending + * on the orientation of the cells or whether DOPAIR1_STARS needs to be called + * at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *restrict e = r->e; + + /* Get the sort ID. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); + + const int ci_active = cell_is_active_stars(ci, e); + const int cj_active = cell_is_active_stars(cj, e); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_stars = ci->nodeID == e->nodeID; + const int do_cj_stars = cj->nodeID == e->nodeID; +#else + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_stars = cj->nodeID == e->nodeID; + const int do_cj_stars = ci->nodeID == e->nodeID; +#endif + const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && + ci_active && do_ci_stars); + const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && + cj_active && do_cj_stars); + + /* Anything to do here? */ + if (!do_ci && !do_cj) return; + + /* Check that cells are drifted. */ + if (do_ci && + (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* Have the cells been sorted? */ + if (do_ci && (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin)) + error("Interacting unsorted cells."); + + if (do_ci && (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && + (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) + error("Interacting undrifted cells."); + + /* Have the cells been sorted? */ + if (do_cj && (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin)) + error("Interacting unsorted cells."); + + if (do_cj && (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin)) + error("Interacting unsorted cells."); + +#ifdef SWIFT_DEBUG_CHECKS + if (do_ci) { + // MATTHIEU: This test is faulty. To be fixed... + // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid); + RUNNER_CHECK_SORT(stars, spart, ci, cj, sid); + } + + if (do_cj) { + // MATTHIEU: This test is faulty. To be fixed... + // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid); + RUNNER_CHECK_SORT(stars, spart, cj, ci, sid); + } +#endif /* SWIFT_DEBUG_CHECKS */ + +#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS + DOPAIR1_STARS_NAIVE(r, ci, cj, 1); +#else + DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift); +#endif +} + +/** + * @brief Compute grouped sub-cell interactions for pairs + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param gettimer Do we have a timer ? + * + * @todo Hard-code the sid on the recursive calls to avoid the + * redundant computations to find the sid on-the-fly. + */ +void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer) { + + TIMER_TIC; + + struct space *s = r->e->s; + const struct engine *e = r->e; + + /* Should we even bother? */ + const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e); + const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e); + if (!should_do_ci && !should_do_cj) return; + + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3]; + const int sid = space_getsid(s, &ci, &cj, shift); + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci, cj) && + cell_can_recurse_in_pair_stars_task(cj, ci)) { + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0); + } + } + + /* Otherwise, compute the pair directly. */ + else { + +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + const int do_ci_stars = ci->nodeID == e->nodeID; + const int do_cj_stars = cj->nodeID == e->nodeID; +#else + /* here we are updating the hydro -> switch ci, cj */ + const int do_ci_stars = cj->nodeID == e->nodeID; + const int do_cj_stars = ci->nodeID == e->nodeID; +#endif + const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e) && do_ci_stars; + const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e) && do_cj_stars; + + if (do_ci) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_spart_drifted(ci, e)) + error("Interacting undrifted cells (sparts)."); + + if (!cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells (parts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) { + error("Interacting unsorted cell (sparts)."); + } + + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) + error("Interacting unsorted cell (parts). %i", cj->nodeID); + } + + if (do_cj) { + + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_are_part_drifted(ci, e)) + error("Interacting undrifted cells (parts)."); + + if (!cell_are_spart_drifted(cj, e)) + error("Interacting undrifted cells (sparts)."); + + /* Do any of the cells need to be sorted first? */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + error("Interacting unsorted cell (parts)."); + } + + if (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) { + error("Interacting unsorted cell (sparts)."); + } + } + + if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj); + } + + TIMER_TOC(TIMER_DOSUB_PAIR_STARS); +} + +/** + * @brief Compute grouped sub-cell interactions for self tasks + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) { + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->nodeID != engine_rank) + error("This function should not be called on foreign cells"); +#endif + + /* Should we even bother? */ + if (ci->hydro.count == 0 || ci->stars.count == 0 || + !cell_is_active_stars(ci, r->e)) + return; + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + DOSUB_SELF1_STARS(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0); + } + } + + /* Otherwise, compute self-interaction. */ + else { + + /* Drift the cell to the current timestep if needed. */ + if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell."); + + DOSELF1_BRANCH_STARS(r, ci); + } + + TIMER_TOC(TIMER_DOSUB_SELF_STARS); +} diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c new file mode 100644 index 0000000000000000000000000000000000000000..372b7524ecc743735b82c146984a1e2f14203c4d --- /dev/null +++ b/src/runner_doiact_grav.c @@ -0,0 +1,1825 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "../config.h" + +/* This object's header. */ +#include "runner_doiact_grav.h" + +/* Local includes. */ +#include "active.h" +#include "cell.h" +#include "gravity.h" +#include "gravity_cache.h" +#include "gravity_iact.h" +#include "inline.h" +#include "part.h" +#include "space_getsid.h" +#include "timers.h" + +/** + * @brief Recursively propagate the multipoles down the tree by applying the + * L2L and L2P kernels. + * + * @param r The #runner. + * @param c The #cell we are working on. + * @param timer Are we timing this ? + */ +void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { + + /* Some constants */ + const struct engine *e = r->e; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->grav.ti_old_multipole != e->ti_current) + error("c->multipole not drifted."); + if (c->grav.multipole->pot.ti_init != e->ti_current) + error("c->field tensor not initialised"); +#endif + + if (c->split) { + + /* Node case */ + + /* Add the field-tensor to all the 8 progenitors */ + for (int k = 0; k < 8; ++k) { + struct cell *cp = c->progeny[k]; + + /* Do we have a progenitor with any active g-particles ? */ + if (cp != NULL && cell_is_active_gravity(cp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + if (cp->grav.ti_old_multipole != e->ti_current) + error("cp->multipole not drifted."); + if (cp->grav.multipole->pot.ti_init != e->ti_current) + error("cp->field tensor not initialised"); +#endif + /* If the tensor received any contribution, push it down */ + if (c->grav.multipole->pot.interacted) { + + struct grav_tensor shifted_tensor; + + /* Shift the field tensor */ + gravity_L2L(&shifted_tensor, &c->grav.multipole->pot, + cp->grav.multipole->CoM, c->grav.multipole->CoM); + + /* Add it to this level's tensor */ + gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor); + } + + /* Recurse */ + runner_do_grav_down(r, cp, 0); + } + } + + } else { + + /* Leaf case */ + + /* We can abort early if no interactions via multipole happened */ + if (!c->grav.multipole->pot.interacted) return; + + if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); + + /* Cell properties */ + struct gpart *gparts = c->grav.parts; + const int gcount = c->grav.count; + const struct grav_tensor *pot = &c->grav.multipole->pot; + const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1], + c->grav.multipole->CoM[2]}; + + /* Apply accelerations to the particles */ + for (int i = 0; i < gcount; ++i) { + + /* Get a handle on the gpart */ + struct gpart *gp = &gparts[i]; + + /* Update if active */ + if (gpart_is_active(gp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gp->ti_drift != e->ti_current) + error("gpart not drifted to current time"); + if (c->grav.multipole->pot.ti_init != e->ti_current) + error("c->field tensor not initialised"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gp->initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + /* Apply the kernel */ + gravity_L2P(pot, CoM, gp); + } + } + } + + if (timer) TIMER_TOC(timer_dograv_down); +} + +/** + * @brief Compute the non-truncated gravity interactions between all particles + * of a cell and the particles of the other cell. + * + * The calculation is performed non-symmetrically using the pre-filled + * #gravity_cache structures. The loop over the j cache should auto-vectorize. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param cj_cache #gravity_cache contaning the source particles. + * @param gcount_i The number of particles in the cell i. + * @param gcount_padded_j The number of particles in the cell j padded to the + * vector length. + * @param periodic Is the calculation using periodic BCs ? + * @param dim The size of the simulation volume. + * + * @param e The #engine (for debugging checks only). + * @param gparts_i The #gpart in cell i (for debugging checks only). + * @param gparts_j The #gpart in cell j (for debugging checks only). + * @param gcount_j The number of particles in the cell j (for debugging checks + * only). + */ +static INLINE void runner_dopair_grav_pp_full( + struct gravity_cache *restrict ci_cache, + struct gravity_cache *restrict cj_cache, const int gcount_i, + const int gcount_j, const int gcount_padded_j, const int periodic, + const float dim[3], const struct engine *restrict e, + struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { + + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; + + /* Skip particle that can use the multipole */ + if (ci_cache->use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (!gpart_is_active(&gparts_i[pid], e)) + error("Inactive particle went through the cache"); +#endif + + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; + const float h_i = ci_cache->epsilon[pid]; + + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_j, VEC_SIZE); + + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_padded_j; pjd++) { + + /* Get info about j */ + const float x_j = cj_cache->x[pjd]; + const float y_j = cj_cache->y[pjd]; + const float z_j = cj_cache->z[pjd]; + const float mass_j = cj_cache->m[pjd]; + const float h_j = cj_cache->epsilon[pjd]; + + /* Compute the pairwise distance. */ + float dx = x_j - x_i; + float dy = y_j - y_i; + float dz = z_j - z_i; + + /* Correct for periodic BCs */ + if (periodic) { + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + } + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts_j[pjd], e)) + error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && + mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) + gparts_i[pid].num_interacted++; +#endif + } + + /* Store everything back in cache */ + ci_cache->a_x[pid] += a_x; + ci_cache->a_y[pid] += a_y; + ci_cache->a_z[pid] += a_z; + ci_cache->pot[pid] += pot; + } +} + +/** + * @brief Compute the truncated gravity interactions between all particles + * of a cell and the particles of the other cell. + * + * The calculation is performed non-symmetrically using the pre-filled + * #gravity_cache structures. The loop over the j cache should auto-vectorize. + * + * This function only makes sense in periodic BCs. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param cj_cache #gravity_cache contaning the source particles. + * @param gcount_i The number of particles in the cell i. + * @param gcount_padded_j The number of particles in the cell j padded to the + * vector length. + * @param dim The size of the simulation volume. + * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. + * + * @param e The #engine (for debugging checks only). + * @param gparts_i The #gpart in cell i (for debugging checks only). + * @param gparts_j The #gpart in cell j (for debugging checks only). + * @param gcount_j The number of particles in the cell j (for debugging checks + * only). + */ +static INLINE void runner_dopair_grav_pp_truncated( + struct gravity_cache *restrict ci_cache, + struct gravity_cache *restrict cj_cache, const int gcount_i, + const int gcount_j, const int gcount_padded_j, const float dim[3], + const float r_s_inv, const struct engine *restrict e, + struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) + error("Calling truncated PP function in non-periodic setup."); +#endif + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { + + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; + + /* Skip particle that can use the multipole */ + if (ci_cache->use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (!gpart_is_active(&gparts_i[pid], e)) + error("Inactive particle went through the cache"); +#endif + + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; + const float h_i = ci_cache->epsilon[pid]; + + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_j, VEC_SIZE); + + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_padded_j; pjd++) { + + /* Get info about j */ + const float x_j = cj_cache->x[pjd]; + const float y_j = cj_cache->y[pjd]; + const float z_j = cj_cache->z[pjd]; + const float mass_j = cj_cache->m[pjd]; + const float h_j = cj_cache->epsilon[pjd]; + + /* Compute the pairwise distance. */ + float dx = x_j - x_i; + float dy = y_j - y_i; + float dz = z_j - z_i; + + /* Correct for periodic BCs */ + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts_j[pjd], e)) + error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && + mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, + &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) + gparts_i[pid].num_interacted++; +#endif + } + + /* Store everything back in cache */ + ci_cache->a_x[pid] += a_x; + ci_cache->a_y[pid] += a_y; + ci_cache->a_z[pid] += a_z; + ci_cache->pot[pid] += pot; + } +} + +/** + * @brief Compute the gravity interactions between all particles + * of a cell and the multipole of the other cell. + * + * The calculation is performedusing the pre-filled + * #gravity_cache structure. The loop over the i cache should auto-vectorize. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param gcount_padded_i The number of particles in the cell i padded to the + * vector length. + * @param CoM_j Position of the #multipole in #cell j. + * @param multi_j The #multipole in #cell j. + * @param periodic Is the calculation using periodic BCs ? + * @param dim The size of the simulation volume. + * + * @param e The #engine (for debugging checks only). + * @param gparts_i The #gpart in cell i (for debugging checks only). + * @param gcount_i The number of particles in the cell i (for debugging checks + * only). + * @param cj The #cell j (for debugging checks only). + */ +static INLINE void runner_dopair_grav_pm_full( + struct gravity_cache *ci_cache, const int gcount_padded_i, + const float CoM_j[3], const struct multipole *restrict multi_j, + const int periodic, const float dim[3], const struct engine *restrict e, + struct gpart *restrict gparts_i, const int gcount_i, + const struct cell *restrict cj) { + + /* Make the compiler understand we are in happy vectorization land */ + swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, ci_cache->active, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, + SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_i, VEC_SIZE); + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_padded_i; pid++) { + + /* Skip inactive particles */ + if (!active[pid]) continue; + + /* Skip particle that cannot use the multipole */ + if (!use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); + + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); + + if (pid >= gcount_i) error("Adding forces to padded particle"); +#endif + + const float x_i = x[pid]; + const float y_i = y[pid]; + const float z_i = z[pid]; + + /* Some powers of the softening length */ + const float h_i = epsilon[pid]; + const float h_inv_i = 1.f / h_i; + + /* Distance to the Multipole */ + float dx = CoM_j[0] - x_i; + float dy = CoM_j[1] - y_i; + float dz = CoM_j[2] - z_i; + + /* Apply periodic BCs? */ + if (periodic) { + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + } + + const float r2 = dx * dx + dy * dy + dz * dz; + +#ifdef SWIFT_DEBUG_CHECKS + const float r_max_j = cj->grav.multipole->r_max; + const float r_max2 = r_max_j * r_max_j; + const float theta_crit2 = e->gravity_properties->theta_crit2; + + /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */ + if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) + error( + "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " + "%e], rmax=%e r=%e epsilon=%e", + CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i); +#endif + + /* Interact! */ + float f_x, f_y, f_z, pot_ij; + runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, + &f_z, &pot_ij); + + /* Store it back */ + a_x[pid] += f_x; + a_y[pid] += f_y; + a_z[pid] += f_z; + pot[pid] += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + if (pid < gcount_i) + gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; +#endif + } +} + +/** + * @brief Compute the gravity interactions between all particles + * of a cell and the multipole of the other cell. + * + * The calculation is performedusing the pre-filled + * #gravity_cache structure. The loop over the i cache should auto-vectorize. + * + * This function only makes sense in periodic BCs. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param gcount_padded_i The number of particles in the cell i padded to the + * vector length. + * @param CoM_j Position of the #multipole in #cell j. + * @param multi_j The #multipole in #cell j. + * @param dim The size of the simulation volume. + * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. + * + * @param e The #engine (for debugging checks only). + * @param gparts_i The #gpart in cell i (for debugging checks only). + * @param gcount_i The number of particles in the cell i (for debugging checks + * only). + * @param cj The #cell j (for debugging checks only). + */ +static INLINE void runner_dopair_grav_pm_truncated( + struct gravity_cache *ci_cache, const int gcount_padded_i, + const float CoM_j[3], const struct multipole *restrict multi_j, + const float dim[3], const float r_s_inv, const struct engine *restrict e, + struct gpart *restrict gparts_i, const int gcount_i, + const struct cell *restrict cj) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) + error("Calling truncated PP function in non-periodic setup."); +#endif + + /* Make the compiler understand we are in happy vectorization land */ + swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, ci_cache->active, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, + SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_i, VEC_SIZE); + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_padded_i; pid++) { + + /* Skip inactive particles */ + if (!active[pid]) continue; + + /* Skip particle that cannot use the multipole */ + if (!use_mpole[pid]) continue; + +#ifdef SWIFT_DEBUG_CHECKS + if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); + + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts_i[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle was initialised */ + if (gparts_i[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); + + if (pid >= gcount_i) error("Adding forces to padded particle"); +#endif + + const float x_i = x[pid]; + const float y_i = y[pid]; + const float z_i = z[pid]; + + /* Some powers of the softening length */ + const float h_i = epsilon[pid]; + const float h_inv_i = 1.f / h_i; + + /* Distance to the Multipole */ + float dx = CoM_j[0] - x_i; + float dy = CoM_j[1] - y_i; + float dz = CoM_j[2] - z_i; + + /* Apply periodic BCs */ + dx = nearestf(dx, dim[0]); + dy = nearestf(dy, dim[1]); + dz = nearestf(dz, dim[2]); + + const float r2 = dx * dx + dy * dy + dz * dz; + +#ifdef SWIFT_DEBUG_CHECKS + const float r_max_j = cj->grav.multipole->r_max; + const float r_max2 = r_max_j * r_max_j; + const float theta_crit2 = e->gravity_properties->theta_crit2; + + /* 0.99 and 1.1 to avoid FP rounding false-positives */ + if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) + error( + "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " + "%e], rmax=%e", + CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j); +#endif + + /* Interact! */ + float f_x, f_y, f_z, pot_ij; + runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv, + multi_j, &f_x, &f_y, &f_z, &pot_ij); + + /* Store it back */ + a_x[pid] += f_x; + a_y[pid] += f_y; + a_z[pid] += f_z; + pot[pid] += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter */ + if (pid < gcount_i) + gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; +#endif + } +} + +/** + * @brief Computes the interaction of all the particles in a cell with all the + * particles of another cell. + * + * This function switches between the full potential and the truncated one + * depending on needs. It will also use the M2P (multipole) interaction + * for the subset of particles in either cell for which the distance criterion + * is valid. + * + * This function starts by constructing the require #gravity_cache for both + * cells and then call the specialised functions doing the actual work on + * the caches. It then write the data back to the particles. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The other #cell. + * @param symmetric Are we updating both cells (1) or just ci (0) ? + * @param allow_mpole Are we allowing the use of P2M interactions ? + */ +INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, + struct cell *cj, const int symmetric, + const int allow_mpole) { + + /* Recover some useful constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], + (float)e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + const double min_trunc = e->mesh->r_cut_min; + + TIMER_TIC; + + /* Record activity status */ + const int ci_active = + cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID); + const int cj_active = + cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID); + + /* Anything to do here? */ + if (!ci_active && !cj_active) return; + if (!ci_active && !symmetric) return; + + /* Check that we are not doing something stupid */ + if (ci->split || cj->split) error("Running P-P on splitable cells"); + + /* Let's start by checking things are drifted */ + if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); + if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); + if (cj_active && ci->grav.ti_old_multipole != e->ti_current) + error("Un-drifted multipole"); + if (ci_active && cj->grav.ti_old_multipole != e->ti_current) + error("Un-drifted multipole"); + + /* Caches to play with */ + struct gravity_cache *const ci_cache = &r->ci_gravity_cache; + struct gravity_cache *const cj_cache = &r->cj_gravity_cache; + + /* Shift to apply to the particles in each cell */ + const double shift_i[3] = {0., 0., 0.}; + const double shift_j[3] = {0., 0., 0.}; + + /* Recover the multipole info and shift the CoM locations */ + const float rmax_i = ci->grav.multipole->r_max; + const float rmax_j = cj->grav.multipole->r_max; + const float rmax2_i = rmax_i * rmax_i; + const float rmax2_j = rmax_j * rmax_j; + const struct multipole *multi_i = &ci->grav.multipole->m_pole; + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]), + (float)(ci->grav.multipole->CoM[1] - shift_i[1]), + (float)(ci->grav.multipole->CoM[2] - shift_i[2])}; + const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]), + (float)(cj->grav.multipole->CoM[1] - shift_j[1]), + (float)(cj->grav.multipole->CoM[2] - shift_j[2])}; + + /* Start by constructing particle caches */ + + /* Computed the padded counts */ + const int gcount_i = ci->grav.count; + const int gcount_j = cj->grav.count; + const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; + const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we fit in cache */ + if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) + error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, + gcount_j); +#endif + + /* Fill the caches */ + gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, + ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, + shift_i, CoM_j, rmax2_j, ci, e->gravity_properties); + gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, + cj_cache, cj->grav.parts, gcount_j, gcount_padded_j, + shift_j, CoM_i, rmax2_i, cj, e->gravity_properties); + + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { + + /* Not periodic -> Can always use Newtonian potential */ + + /* Let's updated the active cell(s) only */ + if (ci_active) { + + /* First the P2P */ + runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, periodic, dim, e, + ci->grav.parts, cj->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, + periodic, dim, e, ci->grav.parts, gcount_i, + cj); + } + if (cj_active && symmetric) { + + /* First the P2P */ + runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, periodic, dim, e, + cj->grav.parts, ci->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, + periodic, dim, e, cj->grav.parts, gcount_j, + ci); + } + + } else { /* Periodic BC */ + + /* Get the relative distance between the CoMs */ + const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1], + CoM_j[2] - CoM_i[2]}; + const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + + /* Get the maximal distance between any two particles */ + const double max_r = sqrt(r2) + rmax_i + rmax_j; + + /* Do we need to use the truncated interactions ? */ + if (max_r > min_trunc) { + + /* Periodic but far-away cells must use the truncated potential */ + + /* Let's updated the active cell(s) only */ + if (ci_active) { + + /* First the (truncated) P2P */ + runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, dim, r_s_inv, e, + ci->grav.parts, cj->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, + multi_j, dim, r_s_inv, e, + ci->grav.parts, gcount_i, cj); + } + if (cj_active && symmetric) { + + /* First the (truncated) P2P */ + runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, dim, r_s_inv, e, + cj->grav.parts, ci->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i, + multi_i, dim, r_s_inv, e, + cj->grav.parts, gcount_j, ci); + } + + } else { + + /* Periodic but close-by cells can use the full Newtonian potential */ + + /* Let's updated the active cell(s) only */ + if (ci_active) { + + /* First the (Newtonian) P2P */ + runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, periodic, dim, e, + ci->grav.parts, cj->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, + periodic, dim, e, ci->grav.parts, gcount_i, + cj); + } + if (cj_active && symmetric) { + + /* First the (Newtonian) P2P */ + runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, periodic, dim, e, + cj->grav.parts, ci->grav.parts); + + /* Then the M2P */ + if (allow_mpole) + runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, + periodic, dim, e, cj->grav.parts, gcount_j, + ci); + } + } + } + + /* Write back to the particles */ + if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); + if (cj_active && symmetric) + gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j); + + TIMER_TOC(timer_dopair_grav_pp); +} + +/** + * @brief Compute the non-truncated gravity interactions between all particles + * of a cell and the particles of the other cell. + * + * The calculation is performed non-symmetrically using the pre-filled + * #gravity_cache structures. The loop over the j cache should auto-vectorize. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param gcount The number of particles in the cell. + * @param gcount_padded The number of particles in the cell padded to the + * vector length. + * + * @param e The #engine (for debugging checks only). + * @param gparts The #gpart in the cell (for debugging checks only). + */ +static INLINE void runner_doself_grav_pp_full( + struct gravity_cache *restrict ci_cache, const int gcount, + const int gcount_padded, const struct engine *e, struct gpart *gparts) { + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount; pid++) { + + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; + + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; + const float h_i = ci_cache->epsilon[pid]; + + /* Local accumulators for the acceleration */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded, VEC_SIZE); + + /* Loop over every other particle in the cell. */ + for (int pjd = 0; pjd < gcount_padded; pjd++) { + + /* No self interaction */ + if (pid == pjd) continue; + + /* Get info about j */ + const float x_j = ci_cache->x[pjd]; + const float y_j = ci_cache->y[pjd]; + const float z_j = ci_cache->z[pjd]; + const float mass_j = ci_cache->m[pjd]; + const float h_j = ci_cache->epsilon[pjd]; + + /* Compute the pairwise (square) distance. */ + /* Note: no need for periodic wrapping inside a cell */ + const float dx = x_j - x_i; + const float dy = y_j - y_i; + const float dz = z_j - z_i; + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gparts[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts[pjd], e)) + error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) + gparts[pid].num_interacted++; +#endif + } + + /* Store everything back in cache */ + ci_cache->a_x[pid] += a_x; + ci_cache->a_y[pid] += a_y; + ci_cache->a_z[pid] += a_z; + ci_cache->pot[pid] += pot; + } +} + +/** + * @brief Compute the truncated gravity interactions between all particles + * of a cell and the particles of the other cell. + * + * The calculation is performed non-symmetrically using the pre-filled + * #gravity_cache structures. The loop over the j cache should auto-vectorize. + * + * This function only makes sense in periodic BCs. + * + * @param ci_cache #gravity_cache contaning the particles to be updated. + * @param gcount The number of particles in the cell. + * @param gcount_padded The number of particles in the cell padded to the + * vector length. + * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. + * + * @param e The #engine (for debugging checks only). + * @param gparts The #gpart in the cell (for debugging checks only). + */ +static INLINE void runner_doself_grav_pp_truncated( + struct gravity_cache *restrict ci_cache, const int gcount, + const int gcount_padded, const float r_s_inv, const struct engine *e, + struct gpart *gparts) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) + error("Calling truncated PP function in non-periodic setup."); +#endif + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount; pid++) { + + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; + + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; + const float h_i = ci_cache->epsilon[pid]; + + /* Local accumulators for the acceleration and potential */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; + + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded, VEC_SIZE); + + /* Loop over every other particle in the cell. */ + for (int pjd = 0; pjd < gcount_padded; pjd++) { + + /* No self interaction */ + if (pid == pjd) continue; + + /* Get info about j */ + const float x_j = ci_cache->x[pjd]; + const float y_j = ci_cache->y[pjd]; + const float z_j = ci_cache->z[pjd]; + const float mass_j = ci_cache->m[pjd]; + const float h_j = ci_cache->epsilon[pjd]; + + /* Compute the pairwise (square) distance. */ + /* Note: no need for periodic wrapping inside a cell */ + const float dx = x_j - x_i; + const float dy = y_j - y_i; + const float dz = z_j - z_i; + + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Pick the maximal softening length of i and j */ + const float h = max(h_i, h_j); + const float h2 = h * h; + const float h_inv = 1.f / h; + const float h_inv_3 = h_inv * h_inv * h_inv; + +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f && h2 == 0.) + error("Interacting particles with 0 distance and 0 softening."); + + /* Check that particles have been drifted to the current time */ + if (gparts[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && + !gpart_is_inhibited(&gparts[pjd], e)) + error("gpj not drifted to current time"); + + /* Check that we are not updated an inhibited particle */ + if (gpart_is_inhibited(&gparts[pid], e)) + error("Updating an inhibited particle!"); + + /* Check that the particle we interact with was not inhibited */ + if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) + error("Inhibited particle used as gravity source."); + + /* Check that the particle was initialised */ + if (gparts[pid].initialised == 0) + error("Adding forces to an un-initialised gpart."); +#endif + + /* Interact! */ + float f_ij, pot_ij; + runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, + &f_ij, &pot_ij); + + /* Store it back */ + a_x += f_ij * dx; + a_y += f_ij * dy; + a_z += f_ij * dz; + pot += pot_ij; + +#ifdef SWIFT_DEBUG_CHECKS + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) + gparts[pid].num_interacted++; +#endif + } + + /* Store everything back in cache */ + ci_cache->a_x[pid] += a_x; + ci_cache->a_y[pid] += a_y; + ci_cache->a_z[pid] += a_z; + ci_cache->pot[pid] += pot; + } +} + +/** + * @brief Computes the interaction of all the particles in a cell with all the + * other ones. + * + * This function switches between the full potential and the truncated one + * depending on needs. + * + * This function starts by constructing the require #gravity_cache for the + * cell and then call the specialised functions doing the actual work on + * the cache. It then write the data back to the particles. + * + * @param r The #runner. + * @param c The #cell. + */ +INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) { + + /* Recover some useful constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const float r_s_inv = e->mesh->r_s_inv; + const double min_trunc = e->mesh->r_cut_min; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); +#endif + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Check that we are not doing something stupid */ + if (c->split) error("Running P-P on a splitable cell"); + + /* Do we need to start by drifting things ? */ + if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); + + /* Start by constructing a cache for the particles */ + struct gravity_cache *const ci_cache = &r->ci_gravity_cache; + + /* Shift to apply to the particles in the cell */ + const double loc[3] = {c->loc[0] + 0.5 * c->width[0], + c->loc[1] + 0.5 * c->width[1], + c->loc[2] + 0.5 * c->width[2]}; + + /* Computed the padded counts */ + const int gcount = c->grav.count; + const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we fit in cache */ + if (gcount > ci_cache->count) + error("Not enough space in the cache! gcount=%d", gcount); +#endif + + /* Fill the cache */ + gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts, + gcount, gcount_padded, loc, c, + e->gravity_properties); + + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { + + /* Not periodic -> Can always use Newtonian potential */ + runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, + c->grav.parts); + + } else { + + /* Get the maximal distance between any two particles */ + const double max_r = 2. * c->grav.multipole->r_max; + + /* Do we need to use the truncated interactions ? */ + if (max_r > min_trunc) { + + /* Periodic but far-away cells must use the truncated potential */ + runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv, + e, c->grav.parts); + + } else { + + /* Periodic but close-by cells can use the full Newtonian potential */ + runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, + c->grav.parts); + } + } + + /* Write back to the particles */ + gravity_cache_write_back(ci_cache, c->grav.parts, gcount); + + TIMER_TOC(timer_doself_grav_pp); +} + +/** + * @brief Computes the interaction of the field tensor and multipole + * of two cells symmetrically. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, + struct cell *restrict ci, + struct cell *restrict cj) { + + /* Some constants */ + const struct engine *e = r->e; + const struct gravity_props *props = e->gravity_properties; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + + TIMER_TIC; + + /* Anything to do here? */ + if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) || + (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank)) + error("Invalid state in symmetric M-M calculation!"); + + /* Short-cut to the multipole */ + const struct multipole *multi_i = &ci->grav.multipole->m_pole; + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci == cj) error("Interacting a cell with itself using M2L"); + + if (multi_i->num_gpart == 0) + error("Multipole i does not seem to have been set."); + + if (multi_j->num_gpart == 0) + error("Multipole j does not seem to have been set."); + + if (ci->grav.multipole->pot.ti_init != e->ti_current) + error("ci->grav tensor not initialised."); + + if (ci->grav.multipole->pot.ti_init != e->ti_current) + error("cj->grav tensor not initialised."); + + if (ci->grav.ti_old_multipole != e->ti_current) + error( + "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d " + "cj->nodeID=%d e->ti_current=%lld", + ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current); + + if (cj->grav.ti_old_multipole != e->ti_current) + error( + "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " + "ci->nodeID=%d e->ti_current=%lld", + cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); +#endif + + /* Let's interact at this level */ + gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, + multi_i, multi_j, ci->grav.multipole->CoM, + cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); + + TIMER_TOC(timer_dopair_grav_mm); +} + +/** + * @brief Computes the interaction of the field tensor in a cell with the + * multipole of another cell. + * + * @param r The #runner. + * @param ci The #cell with field tensor to interact. + * @param cj The #cell with the multipole. + */ +static INLINE void runner_dopair_grav_mm_nonsym( + struct runner *r, struct cell *restrict ci, + const struct cell *restrict cj) { + + /* Some constants */ + const struct engine *e = r->e; + const struct gravity_props *props = e->gravity_properties; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return; + + /* Short-cut to the multipole */ + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + +#ifdef SWIFT_DEBUG_CHECKS + if (ci == cj) error("Interacting a cell with itself using M2L"); + + if (multi_j->num_gpart == 0) + error("Multipole does not seem to have been set."); + + if (ci->grav.multipole->pot.ti_init != e->ti_current) + error("ci->grav tensor not initialised."); + + if (cj->grav.ti_old_multipole != e->ti_current) + error( + "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " + "ci->nodeID=%d e->ti_current=%lld", + cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); +#endif + + /* Let's interact at this level */ + gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, + cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); + + TIMER_TOC(timer_dopair_grav_mm); +} + +/** + * @brief Call the M-M calculation on two cells if active. + * + * @param r The #runner object. + * @param ci The first #cell. + * @param cj The second #cell. + */ +static INLINE void runner_dopair_grav_mm(struct runner *r, + struct cell *restrict ci, + struct cell *restrict cj) { + + const struct engine *e = r->e; + + /* What do we need to do? */ + const int do_i = + cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID); + const int do_j = + cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID); + + /* Do we need drifting first? */ + if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); + if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); + + /* Interact! */ + if (do_i && do_j) + runner_dopair_grav_mm_symmetric(r, ci, cj); + else if (do_i) + runner_dopair_grav_mm_nonsym(r, ci, cj); + else if (do_j) + runner_dopair_grav_mm_nonsym(r, cj, ci); +} + +/** + * @brief Computes all the M-M interactions between all the well-separated (at + * rebuild) pairs of progenies of the two cells. + * + * @param r The #runner thread. + * @param flags The task flag containing the list of well-separated pairs as a + * bit-field. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags, + struct cell *restrict ci, + struct cell *restrict cj) { + + /* Loop over all pairs of progenies */ + for (int i = 0; i < 8; i++) { + if (ci->progeny[i] != NULL) { + for (int j = 0; j < 8; j++) { + if (cj->progeny[j] != NULL) { + + struct cell *cpi = ci->progeny[i]; + struct cell *cpj = cj->progeny[j]; + + const int flag = i * 8 + j; + + /* Did we agree to use an M-M interaction here at the last rebuild? */ + if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj); + } + } + } + } +} + +static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, + struct cell *ci, + const struct cell *cj) { + /* Some constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], + (float)e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + + /* Anything to do here? */ + if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return; + +#ifdef SWIFT_DEBUG_CHECKS + /* Early abort? */ + if (ci->grav.count == 0 || cj->grav.count == 0) + error("Doing pair gravity on an empty cell !"); + + /* Sanity check */ + if (ci == cj) error("Pair interaction between a cell and itself."); + + if (cj->grav.ti_old_multipole != e->ti_current) + error("cj->grav.multipole not drifted."); +#endif + + /* Can we recurse further? */ + if (ci->split) { + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj); + } + + /* Ok, let's do the interaction here */ + } else { + + /* Start by constructing particle caches */ + + /* Cache to play with */ + struct gravity_cache *const ci_cache = &r->ci_gravity_cache; + + /* Computed the padded counts */ + const int gcount_i = ci->grav.count; + const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we fit in cache */ + if (gcount_i > ci_cache->count) + error("Not enough space in the cache! gcount_i=%d", gcount_i); +#endif + + /* Recover the multipole info and the CoM locations */ + const struct multipole *multi_j = &cj->grav.multipole->m_pole; + const float r_max = cj->grav.multipole->r_max; + const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]), + (float)(cj->grav.multipole->CoM[1]), + (float)(cj->grav.multipole->CoM[2])}; + + /* Fill the cache */ + gravity_cache_populate_all_mpole( + e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i, + gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties); + + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { + + runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, + periodic, dim, e, ci->grav.parts, gcount_i, + cj); + + } else { + + runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, + dim, r_s_inv, e, ci->grav.parts, gcount_i, + cj); + } + + /* Write back to the particles */ + gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); + } +} + +/** + * @brief Computes the interaction of all the particles in a cell with all the + * particles of another cell. + * + * This function will try to recurse as far down the tree as possible and only + * default to direct summation if there is no better option. + * + * If using periodic BCs, we will abort the recursion if th distance between the + * cells is larger than the set threshold. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The other #cell. + * @param gettimer Are we timing this ? + */ +void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, + struct cell *cj, int gettimer) { + + /* Some constants */ + const struct engine *e = r->e; + const int nodeID = e->nodeID; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const double theta_crit2 = e->gravity_properties->theta_crit2; + const double max_distance = e->mesh->r_cut_max; + + /* Anything to do here? */ + if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) || + (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID))) + return; + +#ifdef SWIFT_DEBUG_CHECKS + + const int gcount_i = ci->grav.count; + const int gcount_j = cj->grav.count; + + /* Early abort? */ + if (gcount_i == 0 || gcount_j == 0) + error("Doing pair gravity on an empty cell !"); + + /* Sanity check */ + if (ci == cj) error("Pair interaction between a cell and itself."); + + if (cell_is_active_gravity(ci, e) && + ci->grav.ti_old_multipole != e->ti_current) + error("ci->grav.multipole not drifted."); + if (cell_is_active_gravity(cj, e) && + cj->grav.ti_old_multipole != e->ti_current) + error("cj->grav.multipole not drifted."); +#endif + + TIMER_TIC; + + /* Recover the multipole information */ + struct gravity_tensors *const multi_i = ci->grav.multipole; + struct gravity_tensors *const multi_j = cj->grav.multipole; + + /* Get the distance between the CoMs */ + double dx = multi_i->CoM[0] - multi_j->CoM[0]; + double dy = multi_i->CoM[1] - multi_j->CoM[1]; + double dz = multi_i->CoM[2] - multi_j->CoM[2]; + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + const double r2 = dx * dx + dy * dy + dz * dz; + + /* Minimal distance between any 2 particles in the two cells */ + const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max); + + /* Are we beyond the distance where the truncated forces are 0? */ + if (periodic && r_lr_check > max_distance) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Need to account for the interactions we missed */ + if (cell_is_active_gravity(ci, e)) + multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; + if (cell_is_active_gravity(cj, e)) + multi_j->pot.num_interacted += multi_i->m_pole.num_gpart; +#endif + return; + } + + /* OK, we actually need to compute this pair. Let's find the cheapest + * option... */ + + /* Can we use M-M interactions ? */ + if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2, + multi_i->m_pole.max_softening, + multi_j->m_pole.max_softening)) { + + /* Go M-M */ + runner_dopair_grav_mm(r, ci, cj); + + } else if (!ci->split && !cj->split) { + + /* We have two leaves. Go P-P. */ + runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1); + + } else { + + /* Alright, we'll have to split and recurse. */ + /* We know at least one of ci and cj is splittable */ + + const double ri_max = multi_i->r_max; + const double rj_max = multi_j->r_max; + + /* Split the larger of the two cells and start over again */ + if (ri_max > rj_max) { + + /* Can we actually split that interaction ? */ + if (ci->split) { + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); + } + + } else { + /* cj is split */ + + /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ + + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); + } + } + } else { + + /* Can we actually split that interaction ? */ + if (cj->split) { + + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); + } + + } else { + /* ci is split */ + + /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); + } + } + } + } + + if (gettimer) TIMER_TOC(timer_dosub_pair_grav); +} + +/** + * @brief Computes the interaction of all the particles in a cell. + * + * This function will try to recurse as far down the tree as possible and only + * default to direct summation if there is no better option. + * + * @param r The #runner. + * @param c The first #cell. + * @param gettimer Are we timing this ? + */ +void runner_doself_recursive_grav(struct runner *r, struct cell *c, + int gettimer) { + + /* Some constants */ + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + /* Early abort? */ + if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); +#endif + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* If the cell is split, interact each progeny with itself, and with + each of its siblings. */ + if (c->split) { + + for (int j = 0; j < 8; j++) { + if (c->progeny[j] != NULL) { + + runner_doself_recursive_grav(r, c->progeny[j], 0); + + for (int k = j + 1; k < 8; k++) { + if (c->progeny[k] != NULL) { + + runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0); + } + } + } + } + } + + /* If the cell is not split, then just go for it... */ + else { + + runner_doself_grav_pp(r, c); + } + + if (gettimer) TIMER_TOC(timer_dosub_self_grav); +} + +/** + * @brief Performs all M-M interactions between a given top-level cell and all + * the other top-levels that are far enough. + * + * @param r The thread #runner. + * @param ci The #cell of interest. + * @param timer Are we timing this ? + */ +void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { + + /* Some constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const double theta_crit2 = e->gravity_properties->theta_crit2; + const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; + + TIMER_TIC; + + /* Recover the list of top-level cells */ + struct cell *cells = e->s->cells_top; + int *cells_with_particles = e->s->cells_with_particles_top; + const int nr_cells_with_particles = e->s->nr_cells_with_particles; + + /* Anything to do here? */ + if (!cell_is_active_gravity(ci, e)) return; + + if (ci->nodeID != engine_rank) + error("Non-local cell in long-range gravity task!"); + + /* Check multipole has been drifted */ + if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); + + /* Get this cell's multipole information */ + struct gravity_tensors *const multi_i = ci->grav.multipole; + + /* Find this cell's top-level (great-)parent */ + struct cell *top = ci; + while (top->parent != NULL) top = top->parent; + + /* Recover the top-level multipole (for distance checks) */ + struct gravity_tensors *const multi_top = top->grav.multipole; + const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0], + multi_top->CoM_rebuild[1], + multi_top->CoM_rebuild[2]}; + + /* Loop over all the top-level cells and go for a M-M interaction if + * well-separated */ + for (int n = 0; n < nr_cells_with_particles; ++n) { + + /* Handle on the top-level cell and it's gravity business*/ + const struct cell *cj = &cells[cells_with_particles[n]]; + const struct gravity_tensors *const multi_j = cj->grav.multipole; + + /* Avoid self contributions */ + if (top == cj) continue; + + /* Skip empty cells */ + if (multi_j->m_pole.M_000 == 0.f) continue; + + /* Can we escape early in the periodic BC case? */ + if (periodic) { + + /* Minimal distance between any pair of particles */ + const double min_radius2 = + cell_min_dist2_same_size(top, cj, periodic, dim); + + /* Are we beyond the distance where the truncated forces are 0 ?*/ + if (min_radius2 > max_distance2) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Need to account for the interactions we missed */ + multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; +#endif + + /* Record that this multipole received a contribution */ + multi_i->pot.interacted = 1; + + /* We are done here. */ + continue; + } + } + + /* Get the distance between the CoMs at the last rebuild*/ + double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; + double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; + double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2]; + + /* Apply BC */ + if (periodic) { + dx_r = nearest(dx_r, dim[0]); + dy_r = nearest(dy_r, dim[1]); + dz_r = nearest(dz_r, dim[2]); + } + const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; + + /* Are we in charge of this cell pair? */ + if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild, + theta_crit2, r2_rebuild, + multi_top->m_pole.max_softening, + multi_j->m_pole.max_softening)) { + + /* Call the PM interaction fucntion on the active sub-cells of ci */ + runner_dopair_grav_mm_nonsym(r, ci, cj); + // runner_dopair_recursive_grav_pm(r, ci, cj); + + /* Record that this multipole received a contribution */ + multi_i->pot.interacted = 1; + + } /* We are in charge of this pair */ + } /* Loop over top-level cells */ + + if (timer) TIMER_TOC(timer_dograv_long_range); +} diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index b4ee8225a7aada8cf595ae7bca251d61b5226f64..34f3e9ec147574357620cc8f485889b87880f06e 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -20,1810 +20,30 @@ #ifndef SWIFT_RUNNER_DOIACT_GRAV_H #define SWIFT_RUNNER_DOIACT_GRAV_H -/* Includes. */ -#include "active.h" -#include "cell.h" -#include "gravity.h" -#include "gravity_cache.h" -#include "gravity_iact.h" -#include "inline.h" -#include "part.h" -#include "space_getsid.h" -#include "timers.h" +#include "../config.h" -/** - * @brief Recursively propagate the multipoles down the tree by applying the - * L2L and L2P kernels. - * - * @param r The #runner. - * @param c The #cell we are working on. - * @param timer Are we timing this ? - */ -static INLINE void runner_do_grav_down(struct runner *r, struct cell *c, - int timer) { - - /* Some constants */ - const struct engine *e = r->e; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->grav.ti_old_multipole != e->ti_current) - error("c->multipole not drifted."); - if (c->grav.multipole->pot.ti_init != e->ti_current) - error("c->field tensor not initialised"); -#endif - - if (c->split) { - - /* Node case */ - - /* Add the field-tensor to all the 8 progenitors */ - for (int k = 0; k < 8; ++k) { - struct cell *cp = c->progeny[k]; - - /* Do we have a progenitor with any active g-particles ? */ - if (cp != NULL && cell_is_active_gravity(cp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - if (cp->grav.ti_old_multipole != e->ti_current) - error("cp->multipole not drifted."); - if (cp->grav.multipole->pot.ti_init != e->ti_current) - error("cp->field tensor not initialised"); -#endif - /* If the tensor received any contribution, push it down */ - if (c->grav.multipole->pot.interacted) { - - struct grav_tensor shifted_tensor; - - /* Shift the field tensor */ - gravity_L2L(&shifted_tensor, &c->grav.multipole->pot, - cp->grav.multipole->CoM, c->grav.multipole->CoM); - - /* Add it to this level's tensor */ - gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor); - } - - /* Recurse */ - runner_do_grav_down(r, cp, 0); - } - } - - } else { - - /* Leaf case */ - - /* We can abort early if no interactions via multipole happened */ - if (!c->grav.multipole->pot.interacted) return; - - if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); - - /* Cell properties */ - struct gpart *gparts = c->grav.parts; - const int gcount = c->grav.count; - const struct grav_tensor *pot = &c->grav.multipole->pot; - const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1], - c->grav.multipole->CoM[2]}; - - /* Apply accelerations to the particles */ - for (int i = 0; i < gcount; ++i) { - - /* Get a handle on the gpart */ - struct gpart *gp = &gparts[i]; - - /* Update if active */ - if (gpart_is_active(gp, e)) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (gp->ti_drift != e->ti_current) - error("gpart not drifted to current time"); - if (c->grav.multipole->pot.ti_init != e->ti_current) - error("c->field tensor not initialised"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!"); - - /* Check that the particle was initialised */ - if (gp->initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - /* Apply the kernel */ - gravity_L2P(pot, CoM, gp); - } - } - } - - if (timer) TIMER_TOC(timer_dograv_down); -} - -/** - * @brief Compute the non-truncated gravity interactions between all particles - * of a cell and the particles of the other cell. - * - * The calculation is performed non-symmetrically using the pre-filled - * #gravity_cache structures. The loop over the j cache should auto-vectorize. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param cj_cache #gravity_cache contaning the source particles. - * @param gcount_i The number of particles in the cell i. - * @param gcount_padded_j The number of particles in the cell j padded to the - * vector length. - * @param periodic Is the calculation using periodic BCs ? - * @param dim The size of the simulation volume. - * - * @param e The #engine (for debugging checks only). - * @param gparts_i The #gpart in cell i (for debugging checks only). - * @param gparts_j The #gpart in cell j (for debugging checks only). - * @param gcount_j The number of particles in the cell j (for debugging checks - * only). - */ -static INLINE void runner_dopair_grav_pp_full( - struct gravity_cache *restrict ci_cache, - struct gravity_cache *restrict cj_cache, const int gcount_i, - const int gcount_j, const int gcount_padded_j, const int periodic, - const float dim[3], const struct engine *restrict e, - struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { - - /* Skip inactive particles */ - if (!ci_cache->active[pid]) continue; - - /* Skip particle that can use the multipole */ - if (ci_cache->use_mpole[pid]) continue; - -#ifdef SWIFT_DEBUG_CHECKS - if (!gpart_is_active(&gparts_i[pid], e)) - error("Inactive particle went through the cache"); -#endif - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float h_i = ci_cache->epsilon[pid]; - - /* Local accumulators for the acceleration and potential */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_j, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_padded_j; pjd++) { - - /* Get info about j */ - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - const float mass_j = cj_cache->m[pjd]; - const float h_j = cj_cache->epsilon[pjd]; - - /* Compute the pairwise distance. */ - float dx = x_j - x_i; - float dy = y_j - y_i; - float dz = z_j - z_i; - - /* Correct for periodic BCs */ - if (periodic) { - dx = nearestf(dx, dim[0]); - dy = nearestf(dy, dim[1]); - dz = nearestf(dz, dim[2]); - } - - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Pick the maximal softening length of i and j */ - const float h = max(h_i, h_j); - const float h2 = h * h; - const float h_inv = 1.f / h; - const float h_inv_3 = h_inv * h_inv * h_inv; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f && h2 == 0.) - error("Interacting particles with 0 distance and 0 softening."); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && - !gpart_is_inhibited(&gparts_j[pjd], e)) - error("gpj not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts_i[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle we interact with was not inhibited */ - if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && - mass_j != 0.f) - error("Inhibited particle used as gravity source."); - - /* Check that the particle was initialised */ - if (gparts_i[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - - /* Interact! */ - float f_ij, pot_ij; - runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); - - /* Store it back */ - a_x += f_ij * dx; - a_y += f_ij * dy; - a_z += f_ij * dz; - pot += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted++; -#endif - } - - /* Store everything back in cache */ - ci_cache->a_x[pid] += a_x; - ci_cache->a_y[pid] += a_y; - ci_cache->a_z[pid] += a_z; - ci_cache->pot[pid] += pot; - } -} - -/** - * @brief Compute the truncated gravity interactions between all particles - * of a cell and the particles of the other cell. - * - * The calculation is performed non-symmetrically using the pre-filled - * #gravity_cache structures. The loop over the j cache should auto-vectorize. - * - * This function only makes sense in periodic BCs. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param cj_cache #gravity_cache contaning the source particles. - * @param gcount_i The number of particles in the cell i. - * @param gcount_padded_j The number of particles in the cell j padded to the - * vector length. - * @param dim The size of the simulation volume. - * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. - * - * @param e The #engine (for debugging checks only). - * @param gparts_i The #gpart in cell i (for debugging checks only). - * @param gparts_j The #gpart in cell j (for debugging checks only). - * @param gcount_j The number of particles in the cell j (for debugging checks - * only). - */ -static INLINE void runner_dopair_grav_pp_truncated( - struct gravity_cache *restrict ci_cache, - struct gravity_cache *restrict cj_cache, const int gcount_i, - const int gcount_j, const int gcount_padded_j, const float dim[3], - const float r_s_inv, const struct engine *restrict e, - struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { - -#ifdef SWIFT_DEBUG_CHECKS - if (!e->s->periodic) - error("Calling truncated PP function in non-periodic setup."); -#endif - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { - - /* Skip inactive particles */ - if (!ci_cache->active[pid]) continue; - - /* Skip particle that can use the multipole */ - if (ci_cache->use_mpole[pid]) continue; - -#ifdef SWIFT_DEBUG_CHECKS - if (!gpart_is_active(&gparts_i[pid], e)) - error("Inactive particle went through the cache"); -#endif - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float h_i = ci_cache->epsilon[pid]; - - /* Local accumulators for the acceleration and potential */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_j, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_padded_j; pjd++) { - - /* Get info about j */ - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - const float mass_j = cj_cache->m[pjd]; - const float h_j = cj_cache->epsilon[pjd]; - - /* Compute the pairwise distance. */ - float dx = x_j - x_i; - float dy = y_j - y_i; - float dz = z_j - z_i; - - /* Correct for periodic BCs */ - dx = nearestf(dx, dim[0]); - dy = nearestf(dy, dim[1]); - dz = nearestf(dz, dim[2]); - - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Pick the maximal softening length of i and j */ - const float h = max(h_i, h_j); - const float h2 = h * h; - const float h_inv = 1.f / h; - const float h_inv_3 = h_inv * h_inv * h_inv; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f && h2 == 0.) - error("Interacting particles with 0 distance and 0 softening."); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && - !gpart_is_inhibited(&gparts_j[pjd], e)) - error("gpj not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts_i[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle we interact with was not inhibited */ - if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && - mass_j != 0.f) - error("Inhibited particle used as gravity source."); - - /* Check that the particle was initialised */ - if (gparts_i[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - - /* Interact! */ - float f_ij, pot_ij; - runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, - &f_ij, &pot_ij); - - /* Store it back */ - a_x += f_ij * dx; - a_y += f_ij * dy; - a_z += f_ij * dz; - pot += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted++; -#endif - } - - /* Store everything back in cache */ - ci_cache->a_x[pid] += a_x; - ci_cache->a_y[pid] += a_y; - ci_cache->a_z[pid] += a_z; - ci_cache->pot[pid] += pot; - } -} - -/** - * @brief Compute the gravity interactions between all particles - * of a cell and the multipole of the other cell. - * - * The calculation is performedusing the pre-filled - * #gravity_cache structure. The loop over the i cache should auto-vectorize. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param gcount_padded_i The number of particles in the cell i padded to the - * vector length. - * @param CoM_j Position of the #multipole in #cell j. - * @param multi_j The #multipole in #cell j. - * @param periodic Is the calculation using periodic BCs ? - * @param dim The size of the simulation volume. - * - * @param e The #engine (for debugging checks only). - * @param gparts_i The #gpart in cell i (for debugging checks only). - * @param gcount_i The number of particles in the cell i (for debugging checks - * only). - * @param cj The #cell j (for debugging checks only). - */ -static INLINE void runner_dopair_grav_pm_full( - struct gravity_cache *ci_cache, const int gcount_padded_i, - const float CoM_j[3], const struct multipole *restrict multi_j, - const int periodic, const float dim[3], const struct engine *restrict e, - struct gpart *restrict gparts_i, const int gcount_i, - const struct cell *restrict cj) { - - /* Make the compiler understand we are in happy vectorization land */ - swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, - SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(int, active, ci_cache->active, - SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, - SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_i, VEC_SIZE); - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_padded_i; pid++) { - - /* Skip inactive particles */ - if (!active[pid]) continue; - - /* Skip particle that cannot use the multipole */ - if (!use_mpole[pid]) continue; - -#ifdef SWIFT_DEBUG_CHECKS - if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) - error("Active particle went through the cache"); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts_i[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle was initialised */ - if (gparts_i[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); - - if (pid >= gcount_i) error("Adding forces to padded particle"); -#endif - - const float x_i = x[pid]; - const float y_i = y[pid]; - const float z_i = z[pid]; - - /* Some powers of the softening length */ - const float h_i = epsilon[pid]; - const float h_inv_i = 1.f / h_i; - - /* Distance to the Multipole */ - float dx = CoM_j[0] - x_i; - float dy = CoM_j[1] - y_i; - float dz = CoM_j[2] - z_i; - - /* Apply periodic BCs? */ - if (periodic) { - dx = nearestf(dx, dim[0]); - dy = nearestf(dy, dim[1]); - dz = nearestf(dz, dim[2]); - } - - const float r2 = dx * dx + dy * dy + dz * dz; - -#ifdef SWIFT_DEBUG_CHECKS - const float r_max_j = cj->grav.multipole->r_max; - const float r_max2 = r_max_j * r_max_j; - const float theta_crit2 = e->gravity_properties->theta_crit2; - - /* Note: 0.99 and 1.1 to avoid FP rounding false-positives */ - if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) - error( - "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " - "%e], rmax=%e r=%e epsilon=%e", - CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j, sqrtf(r2), h_i); -#endif - - /* Interact! */ - float f_x, f_y, f_z, pot_ij; - runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, - &f_z, &pot_ij); - - /* Store it back */ - a_x[pid] += f_x; - a_y[pid] += f_y; - a_z[pid] += f_z; - pot[pid] += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter */ - if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; -#endif - } -} - -/** - * @brief Compute the gravity interactions between all particles - * of a cell and the multipole of the other cell. - * - * The calculation is performedusing the pre-filled - * #gravity_cache structure. The loop over the i cache should auto-vectorize. - * - * This function only makes sense in periodic BCs. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param gcount_padded_i The number of particles in the cell i padded to the - * vector length. - * @param CoM_j Position of the #multipole in #cell j. - * @param multi_j The #multipole in #cell j. - * @param dim The size of the simulation volume. - * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. - * - * @param e The #engine (for debugging checks only). - * @param gparts_i The #gpart in cell i (for debugging checks only). - * @param gcount_i The number of particles in the cell i (for debugging checks - * only). - * @param cj The #cell j (for debugging checks only). - */ -static INLINE void runner_dopair_grav_pm_truncated( - struct gravity_cache *ci_cache, const int gcount_padded_i, - const float CoM_j[3], const struct multipole *restrict multi_j, - const float dim[3], const float r_s_inv, const struct engine *restrict e, - struct gpart *restrict gparts_i, const int gcount_i, - const struct cell *restrict cj) { - -#ifdef SWIFT_DEBUG_CHECKS - if (!e->s->periodic) - error("Calling truncated PP function in non-periodic setup."); -#endif - - /* Make the compiler understand we are in happy vectorization land */ - swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, - SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(int, active, ci_cache->active, - SWIFT_CACHE_ALIGNMENT); - swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, - SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_i, VEC_SIZE); - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_padded_i; pid++) { - - /* Skip inactive particles */ - if (!active[pid]) continue; - - /* Skip particle that cannot use the multipole */ - if (!use_mpole[pid]) continue; - -#ifdef SWIFT_DEBUG_CHECKS - if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) - error("Active particle went through the cache"); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts_i[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle was initialised */ - if (gparts_i[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); - - if (pid >= gcount_i) error("Adding forces to padded particle"); -#endif - - const float x_i = x[pid]; - const float y_i = y[pid]; - const float z_i = z[pid]; - - /* Some powers of the softening length */ - const float h_i = epsilon[pid]; - const float h_inv_i = 1.f / h_i; - - /* Distance to the Multipole */ - float dx = CoM_j[0] - x_i; - float dy = CoM_j[1] - y_i; - float dz = CoM_j[2] - z_i; - - /* Apply periodic BCs */ - dx = nearestf(dx, dim[0]); - dy = nearestf(dy, dim[1]); - dz = nearestf(dz, dim[2]); - - const float r2 = dx * dx + dy * dy + dz * dz; - -#ifdef SWIFT_DEBUG_CHECKS - const float r_max_j = cj->grav.multipole->r_max; - const float r_max2 = r_max_j * r_max_j; - const float theta_crit2 = e->gravity_properties->theta_crit2; - - /* 0.99 and 1.1 to avoid FP rounding false-positives */ - if (!gravity_M2P_accept(r_max2, theta_crit2 * 1.1, r2, 0.99 * h_i)) - error( - "use_mpole[i] set when M2P accept fails CoM=[%e %e %e] pos=[%e %e " - "%e], rmax=%e", - CoM_j[0], CoM_j[1], CoM_j[2], x_i, y_i, z_i, r_max_j); -#endif - - /* Interact! */ - float f_x, f_y, f_z, pot_ij; - runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv, - multi_j, &f_x, &f_y, &f_z, &pot_ij); - - /* Store it back */ - a_x[pid] += f_x; - a_y[pid] += f_y; - a_z[pid] += f_z; - pot[pid] += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter */ - if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; -#endif - } -} - -/** - * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell. - * - * This function switches between the full potential and the truncated one - * depending on needs. It will also use the M2P (multipole) interaction - * for the subset of particles in either cell for which the distance criterion - * is valid. - * - * This function starts by constructing the require #gravity_cache for both - * cells and then call the specialised functions doing the actual work on - * the caches. It then write the data back to the particles. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The other #cell. - * @param symmetric Are we updating both cells (1) or just ci (0) ? - * @param allow_mpole Are we allowing the use of P2M interactions ? - */ -static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, - struct cell *cj, const int symmetric, - const int allow_mpole) { - - /* Recover some useful constants */ - const struct engine *e = r->e; - const int periodic = e->mesh->periodic; - const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], - (float)e->mesh->dim[2]}; - const float r_s_inv = e->mesh->r_s_inv; - const double min_trunc = e->mesh->r_cut_min; - - TIMER_TIC; - - /* Record activity status */ - const int ci_active = - cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID); - const int cj_active = - cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID); - - /* Anything to do here? */ - if (!ci_active && !cj_active) return; - if (!ci_active && !symmetric) return; - - /* Check that we are not doing something stupid */ - if (ci->split || cj->split) error("Running P-P on splitable cells"); - - /* Let's start by checking things are drifted */ - if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); - if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); - if (cj_active && ci->grav.ti_old_multipole != e->ti_current) - error("Un-drifted multipole"); - if (ci_active && cj->grav.ti_old_multipole != e->ti_current) - error("Un-drifted multipole"); - - /* Caches to play with */ - struct gravity_cache *const ci_cache = &r->ci_gravity_cache; - struct gravity_cache *const cj_cache = &r->cj_gravity_cache; - - /* Shift to apply to the particles in each cell */ - const double shift_i[3] = {0., 0., 0.}; - const double shift_j[3] = {0., 0., 0.}; - - /* Recover the multipole info and shift the CoM locations */ - const float rmax_i = ci->grav.multipole->r_max; - const float rmax_j = cj->grav.multipole->r_max; - const float rmax2_i = rmax_i * rmax_i; - const float rmax2_j = rmax_j * rmax_j; - const struct multipole *multi_i = &ci->grav.multipole->m_pole; - const struct multipole *multi_j = &cj->grav.multipole->m_pole; - const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]), - (float)(ci->grav.multipole->CoM[1] - shift_i[1]), - (float)(ci->grav.multipole->CoM[2] - shift_i[2])}; - const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]), - (float)(cj->grav.multipole->CoM[1] - shift_j[1]), - (float)(cj->grav.multipole->CoM[2] - shift_j[2])}; - - /* Start by constructing particle caches */ - - /* Computed the padded counts */ - const int gcount_i = ci->grav.count; - const int gcount_j = cj->grav.count; - const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; - const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we fit in cache */ - if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) - error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, - gcount_j); -#endif - - /* Fill the caches */ - gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, - ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, - shift_i, CoM_j, rmax2_j, ci, e->gravity_properties); - gravity_cache_populate(e->max_active_bin, allow_mpole, periodic, dim, - cj_cache, cj->grav.parts, gcount_j, gcount_padded_j, - shift_j, CoM_i, rmax2_i, cj, e->gravity_properties); - - /* Can we use the Newtonian version or do we need the truncated one ? */ - if (!periodic) { - - /* Not periodic -> Can always use Newtonian potential */ - - /* Let's updated the active cell(s) only */ - if (ci_active) { - - /* First the P2P */ - runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, - gcount_padded_j, periodic, dim, e, - ci->grav.parts, cj->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->grav.parts, gcount_i, - cj); - } - if (cj_active && symmetric) { - - /* First the P2P */ - runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, - gcount_padded_i, periodic, dim, e, - cj->grav.parts, ci->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, - periodic, dim, e, cj->grav.parts, gcount_j, - ci); - } - - } else { /* Periodic BC */ - - /* Get the relative distance between the CoMs */ - const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1], - CoM_j[2] - CoM_i[2]}; - const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - - /* Get the maximal distance between any two particles */ - const double max_r = sqrt(r2) + rmax_i + rmax_j; - - /* Do we need to use the truncated interactions ? */ - if (max_r > min_trunc) { - - /* Periodic but far-away cells must use the truncated potential */ - - /* Let's updated the active cell(s) only */ - if (ci_active) { - - /* First the (truncated) P2P */ - runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j, - gcount_padded_j, dim, r_s_inv, e, - ci->grav.parts, cj->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, - multi_j, dim, r_s_inv, e, - ci->grav.parts, gcount_i, cj); - } - if (cj_active && symmetric) { - - /* First the (truncated) P2P */ - runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i, - gcount_padded_i, dim, r_s_inv, e, - cj->grav.parts, ci->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i, - multi_i, dim, r_s_inv, e, - cj->grav.parts, gcount_j, ci); - } - - } else { - - /* Periodic but close-by cells can use the full Newtonian potential */ - - /* Let's updated the active cell(s) only */ - if (ci_active) { - - /* First the (Newtonian) P2P */ - runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, - gcount_padded_j, periodic, dim, e, - ci->grav.parts, cj->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->grav.parts, gcount_i, - cj); - } - if (cj_active && symmetric) { - - /* First the (Newtonian) P2P */ - runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, - gcount_padded_i, periodic, dim, e, - cj->grav.parts, ci->grav.parts); - - /* Then the M2P */ - if (allow_mpole) - runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, - periodic, dim, e, cj->grav.parts, gcount_j, - ci); - } - } - } - - /* Write back to the particles */ - if (ci_active) gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); - if (cj_active && symmetric) - gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j); - - TIMER_TOC(timer_dopair_grav_pp); -} - -/** - * @brief Compute the non-truncated gravity interactions between all particles - * of a cell and the particles of the other cell. - * - * The calculation is performed non-symmetrically using the pre-filled - * #gravity_cache structures. The loop over the j cache should auto-vectorize. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param gcount The number of particles in the cell. - * @param gcount_padded The number of particles in the cell padded to the - * vector length. - * - * @param e The #engine (for debugging checks only). - * @param gparts The #gpart in the cell (for debugging checks only). - */ -static INLINE void runner_doself_grav_pp_full( - struct gravity_cache *restrict ci_cache, const int gcount, - const int gcount_padded, const struct engine *e, struct gpart *gparts) { - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount; pid++) { - - /* Skip inactive particles */ - if (!ci_cache->active[pid]) continue; - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float h_i = ci_cache->epsilon[pid]; - - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded, VEC_SIZE); - - /* Loop over every other particle in the cell. */ - for (int pjd = 0; pjd < gcount_padded; pjd++) { - - /* No self interaction */ - if (pid == pjd) continue; - - /* Get info about j */ - const float x_j = ci_cache->x[pjd]; - const float y_j = ci_cache->y[pjd]; - const float z_j = ci_cache->z[pjd]; - const float mass_j = ci_cache->m[pjd]; - const float h_j = ci_cache->epsilon[pjd]; - - /* Compute the pairwise (square) distance. */ - /* Note: no need for periodic wrapping inside a cell */ - const float dx = x_j - x_i; - const float dy = y_j - y_i; - const float dz = z_j - z_i; - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Pick the maximal softening length of i and j */ - const float h = max(h_i, h_j); - const float h2 = h * h; - const float h_inv = 1.f / h; - const float h_inv_3 = h_inv * h_inv * h_inv; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f && h2 == 0.) - error("Interacting particles with 0 distance and 0 softening."); - - /* Check that particles have been drifted to the current time */ - if (gparts[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && - !gpart_is_inhibited(&gparts[pjd], e)) - error("gpj not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle we interact with was not inhibited */ - if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) - error("Inhibited particle used as gravity source."); - - /* Check that the particle was initialised */ - if (gparts[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - - /* Interact! */ - float f_ij, pot_ij; - runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); - - /* Store it back */ - a_x += f_ij * dx; - a_y += f_ij * dy; - a_z += f_ij * dz; - pot += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted++; -#endif - } - - /* Store everything back in cache */ - ci_cache->a_x[pid] += a_x; - ci_cache->a_y[pid] += a_y; - ci_cache->a_z[pid] += a_z; - ci_cache->pot[pid] += pot; - } -} - -/** - * @brief Compute the truncated gravity interactions between all particles - * of a cell and the particles of the other cell. - * - * The calculation is performed non-symmetrically using the pre-filled - * #gravity_cache structures. The loop over the j cache should auto-vectorize. - * - * This function only makes sense in periodic BCs. - * - * @param ci_cache #gravity_cache contaning the particles to be updated. - * @param gcount The number of particles in the cell. - * @param gcount_padded The number of particles in the cell padded to the - * vector length. - * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. - * - * @param e The #engine (for debugging checks only). - * @param gparts The #gpart in the cell (for debugging checks only). - */ -static INLINE void runner_doself_grav_pp_truncated( - struct gravity_cache *restrict ci_cache, const int gcount, - const int gcount_padded, const float r_s_inv, const struct engine *e, - struct gpart *gparts) { - -#ifdef SWIFT_DEBUG_CHECKS - if (!e->s->periodic) - error("Calling truncated PP function in non-periodic setup."); -#endif - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount; pid++) { - - /* Skip inactive particles */ - if (!ci_cache->active[pid]) continue; - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float h_i = ci_cache->epsilon[pid]; - - /* Local accumulators for the acceleration and potential */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded, VEC_SIZE); - - /* Loop over every other particle in the cell. */ - for (int pjd = 0; pjd < gcount_padded; pjd++) { - - /* No self interaction */ - if (pid == pjd) continue; - - /* Get info about j */ - const float x_j = ci_cache->x[pjd]; - const float y_j = ci_cache->y[pjd]; - const float z_j = ci_cache->z[pjd]; - const float mass_j = ci_cache->m[pjd]; - const float h_j = ci_cache->epsilon[pjd]; - - /* Compute the pairwise (square) distance. */ - /* Note: no need for periodic wrapping inside a cell */ - const float dx = x_j - x_i; - const float dy = y_j - y_i; - const float dz = z_j - z_i; - - const float r2 = dx * dx + dy * dy + dz * dz; - - /* Pick the maximal softening length of i and j */ - const float h = max(h_i, h_j); - const float h2 = h * h; - const float h_inv = 1.f / h; - const float h_inv_3 = h_inv * h_inv * h_inv; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f && h2 == 0.) - error("Interacting particles with 0 distance and 0 softening."); - - /* Check that particles have been drifted to the current time */ - if (gparts[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && - !gpart_is_inhibited(&gparts[pjd], e)) - error("gpj not drifted to current time"); - - /* Check that we are not updated an inhibited particle */ - if (gpart_is_inhibited(&gparts[pid], e)) - error("Updating an inhibited particle!"); - - /* Check that the particle we interact with was not inhibited */ - if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) - error("Inhibited particle used as gravity source."); - - /* Check that the particle was initialised */ - if (gparts[pid].initialised == 0) - error("Adding forces to an un-initialised gpart."); -#endif - - /* Interact! */ - float f_ij, pot_ij; - runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, - &f_ij, &pot_ij); - - /* Store it back */ - a_x += f_ij * dx; - a_y += f_ij * dy; - a_z += f_ij * dz; - pot += pot_ij; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted++; -#endif - } - - /* Store everything back in cache */ - ci_cache->a_x[pid] += a_x; - ci_cache->a_y[pid] += a_y; - ci_cache->a_z[pid] += a_z; - ci_cache->pot[pid] += pot; - } -} - -/** - * @brief Computes the interaction of all the particles in a cell with all the - * other ones. - * - * This function switches between the full potential and the truncated one - * depending on needs. - * - * This function starts by constructing the require #gravity_cache for the - * cell and then call the specialised functions doing the actual work on - * the cache. It then write the data back to the particles. - * - * @param r The #runner. - * @param c The #cell. - */ -static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) { - - /* Recover some useful constants */ - const struct engine *e = r->e; - const int periodic = e->mesh->periodic; - const float r_s_inv = e->mesh->r_s_inv; - const double min_trunc = e->mesh->r_cut_min; - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); -#endif - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* Check that we are not doing something stupid */ - if (c->split) error("Running P-P on a splitable cell"); - - /* Do we need to start by drifting things ? */ - if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); - - /* Start by constructing a cache for the particles */ - struct gravity_cache *const ci_cache = &r->ci_gravity_cache; - - /* Shift to apply to the particles in the cell */ - const double loc[3] = {c->loc[0] + 0.5 * c->width[0], - c->loc[1] + 0.5 * c->width[1], - c->loc[2] + 0.5 * c->width[2]}; - - /* Computed the padded counts */ - const int gcount = c->grav.count; - const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we fit in cache */ - if (gcount > ci_cache->count) - error("Not enough space in the cache! gcount=%d", gcount); -#endif - - /* Fill the cache */ - gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts, - gcount, gcount_padded, loc, c, - e->gravity_properties); - - /* Can we use the Newtonian version or do we need the truncated one ? */ - if (!periodic) { - - /* Not periodic -> Can always use Newtonian potential */ - runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, - c->grav.parts); - - } else { - - /* Get the maximal distance between any two particles */ - const double max_r = 2. * c->grav.multipole->r_max; - - /* Do we need to use the truncated interactions ? */ - if (max_r > min_trunc) { - - /* Periodic but far-away cells must use the truncated potential */ - runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv, - e, c->grav.parts); - - } else { - - /* Periodic but close-by cells can use the full Newtonian potential */ - runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, - c->grav.parts); - } - } - - /* Write back to the particles */ - gravity_cache_write_back(ci_cache, c->grav.parts, gcount); - - TIMER_TOC(timer_doself_grav_pp); -} - -/** - * @brief Computes the interaction of the field tensor and multipole - * of two cells symmetrically. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - */ -static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, - struct cell *restrict ci, - struct cell *restrict cj) { - - /* Some constants */ - const struct engine *e = r->e; - const struct gravity_props *props = e->gravity_properties; - const int periodic = e->mesh->periodic; - const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const float r_s_inv = e->mesh->r_s_inv; - - TIMER_TIC; - - /* Anything to do here? */ - if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) || - (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank)) - error("Invalid state in symmetric M-M calculation!"); - - /* Short-cut to the multipole */ - const struct multipole *multi_i = &ci->grav.multipole->m_pole; - const struct multipole *multi_j = &cj->grav.multipole->m_pole; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci == cj) error("Interacting a cell with itself using M2L"); - - if (multi_i->num_gpart == 0) - error("Multipole i does not seem to have been set."); - - if (multi_j->num_gpart == 0) - error("Multipole j does not seem to have been set."); - - if (ci->grav.multipole->pot.ti_init != e->ti_current) - error("ci->grav tensor not initialised."); - - if (ci->grav.multipole->pot.ti_init != e->ti_current) - error("cj->grav tensor not initialised."); - - if (ci->grav.ti_old_multipole != e->ti_current) - error( - "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d " - "cj->nodeID=%d e->ti_current=%lld", - ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current); - - if (cj->grav.ti_old_multipole != e->ti_current) - error( - "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " - "ci->nodeID=%d e->ti_current=%lld", - cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); -#endif - - /* Let's interact at this level */ - gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, - multi_i, multi_j, ci->grav.multipole->CoM, - cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); - - TIMER_TOC(timer_dopair_grav_mm); -} - -/** - * @brief Computes the interaction of the field tensor in a cell with the - * multipole of another cell. - * - * @param r The #runner. - * @param ci The #cell with field tensor to interact. - * @param cj The #cell with the multipole. - */ -static INLINE void runner_dopair_grav_mm_nonsym( - struct runner *r, struct cell *restrict ci, - const struct cell *restrict cj) { - - /* Some constants */ - const struct engine *e = r->e; - const struct gravity_props *props = e->gravity_properties; - const int periodic = e->mesh->periodic; - const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const float r_s_inv = e->mesh->r_s_inv; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return; - - /* Short-cut to the multipole */ - const struct multipole *multi_j = &cj->grav.multipole->m_pole; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci == cj) error("Interacting a cell with itself using M2L"); - - if (multi_j->num_gpart == 0) - error("Multipole does not seem to have been set."); - - if (ci->grav.multipole->pot.ti_init != e->ti_current) - error("ci->grav tensor not initialised."); - - if (cj->grav.ti_old_multipole != e->ti_current) - error( - "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " - "ci->nodeID=%d e->ti_current=%lld", - cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); -#endif - - /* Let's interact at this level */ - gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, - cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); - - TIMER_TOC(timer_dopair_grav_mm); -} - -/** - * @brief Call the M-M calculation on two cells if active. - * - * @param r The #runner object. - * @param ci The first #cell. - * @param cj The second #cell. - */ -static INLINE void runner_dopair_grav_mm(struct runner *r, - struct cell *restrict ci, - struct cell *restrict cj) { - - const struct engine *e = r->e; - - /* What do we need to do? */ - const int do_i = - cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID); - const int do_j = - cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID); - - /* Do we need drifting first? */ - if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); - if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); - - /* Interact! */ - if (do_i && do_j) - runner_dopair_grav_mm_symmetric(r, ci, cj); - else if (do_i) - runner_dopair_grav_mm_nonsym(r, ci, cj); - else if (do_j) - runner_dopair_grav_mm_nonsym(r, cj, ci); -} - -/** - * @brief Computes all the M-M interactions between all the well-separated (at - * rebuild) pairs of progenies of the two cells. - * - * @param r The #runner thread. - * @param flags The task flag containing the list of well-separated pairs as a - * bit-field. - * @param ci The first #cell. - * @param cj The second #cell. - */ -static INLINE void runner_dopair_grav_mm_progenies(struct runner *r, - const long long flags, - struct cell *restrict ci, - struct cell *restrict cj) { - - /* Loop over all pairs of progenies */ - for (int i = 0; i < 8; i++) { - if (ci->progeny[i] != NULL) { - for (int j = 0; j < 8; j++) { - if (cj->progeny[j] != NULL) { - - struct cell *cpi = ci->progeny[i]; - struct cell *cpj = cj->progeny[j]; - - const int flag = i * 8 + j; - - /* Did we agree to use an M-M interaction here at the last rebuild? */ - if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj); - } - } - } - } -} - -static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, - struct cell *ci, - const struct cell *cj) { - /* Some constants */ - const struct engine *e = r->e; - const int periodic = e->mesh->periodic; - const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], - (float)e->mesh->dim[2]}; - const float r_s_inv = e->mesh->r_s_inv; - - /* Anything to do here? */ - if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return; - -#ifdef SWIFT_DEBUG_CHECKS - /* Early abort? */ - if (ci->grav.count == 0 || cj->grav.count == 0) - error("Doing pair gravity on an empty cell !"); - - /* Sanity check */ - if (ci == cj) error("Pair interaction between a cell and itself."); - - if (cj->grav.ti_old_multipole != e->ti_current) - error("cj->grav.multipole not drifted."); -#endif - - /* Can we recurse further? */ - if (ci->split) { - - /* Loop over ci's children */ - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) - runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj); - } - - /* Ok, let's do the interaction here */ - } else { - - /* Start by constructing particle caches */ - - /* Cache to play with */ - struct gravity_cache *const ci_cache = &r->ci_gravity_cache; - - /* Computed the padded counts */ - const int gcount_i = ci->grav.count; - const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that we fit in cache */ - if (gcount_i > ci_cache->count) - error("Not enough space in the cache! gcount_i=%d", gcount_i); -#endif - - /* Recover the multipole info and the CoM locations */ - const struct multipole *multi_j = &cj->grav.multipole->m_pole; - const float r_max = cj->grav.multipole->r_max; - const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]), - (float)(cj->grav.multipole->CoM[1]), - (float)(cj->grav.multipole->CoM[2])}; - - /* Fill the cache */ - gravity_cache_populate_all_mpole( - e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i, - gcount_padded_i, ci, CoM_j, r_max * r_max, e->gravity_properties); - - /* Can we use the Newtonian version or do we need the truncated one ? */ - if (!periodic) { - - runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, - periodic, dim, e, ci->grav.parts, gcount_i, - cj); - - } else { - - runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, - dim, r_s_inv, e, ci->grav.parts, gcount_i, - cj); - } - - /* Write back to the particles */ - gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); - } -} - -/** - * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell. - * - * This function will try to recurse as far down the tree as possible and only - * default to direct summation if there is no better option. - * - * If using periodic BCs, we will abort the recursion if th distance between the - * cells is larger than the set threshold. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The other #cell. - * @param gettimer Are we timing this ? - */ -static INLINE void runner_dopair_recursive_grav(struct runner *r, - struct cell *ci, - struct cell *cj, int gettimer) { - - /* Some constants */ - const struct engine *e = r->e; - const int nodeID = e->nodeID; - const int periodic = e->mesh->periodic; - const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance = e->mesh->r_cut_max; - - /* Anything to do here? */ - if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) || - (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID))) - return; - -#ifdef SWIFT_DEBUG_CHECKS - - const int gcount_i = ci->grav.count; - const int gcount_j = cj->grav.count; - - /* Early abort? */ - if (gcount_i == 0 || gcount_j == 0) - error("Doing pair gravity on an empty cell !"); - - /* Sanity check */ - if (ci == cj) error("Pair interaction between a cell and itself."); - - if (cell_is_active_gravity(ci, e) && - ci->grav.ti_old_multipole != e->ti_current) - error("ci->grav.multipole not drifted."); - if (cell_is_active_gravity(cj, e) && - cj->grav.ti_old_multipole != e->ti_current) - error("cj->grav.multipole not drifted."); -#endif - - TIMER_TIC; - - /* Recover the multipole information */ - struct gravity_tensors *const multi_i = ci->grav.multipole; - struct gravity_tensors *const multi_j = cj->grav.multipole; - - /* Get the distance between the CoMs */ - double dx = multi_i->CoM[0] - multi_j->CoM[0]; - double dy = multi_i->CoM[1] - multi_j->CoM[1]; - double dz = multi_i->CoM[2] - multi_j->CoM[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - const double r2 = dx * dx + dy * dy + dz * dz; - - /* Minimal distance between any 2 particles in the two cells */ - const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max); - - /* Are we beyond the distance where the truncated forces are 0? */ - if (periodic && r_lr_check > max_distance) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Need to account for the interactions we missed */ - if (cell_is_active_gravity(ci, e)) - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; - if (cell_is_active_gravity(cj, e)) - multi_j->pot.num_interacted += multi_i->m_pole.num_gpart; -#endif - return; - } - - /* OK, we actually need to compute this pair. Let's find the cheapest - * option... */ - - /* Can we use M-M interactions ? */ - if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2, - multi_i->m_pole.max_softening, - multi_j->m_pole.max_softening)) { - - /* Go M-M */ - runner_dopair_grav_mm(r, ci, cj); - - } else if (!ci->split && !cj->split) { - - /* We have two leaves. Go P-P. */ - runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles*/ 1); - - } else { - - /* Alright, we'll have to split and recurse. */ - /* We know at least one of ci and cj is splittable */ - - const double ri_max = multi_i->r_max; - const double rj_max = multi_j->r_max; - - /* Split the larger of the two cells and start over again */ - if (ri_max > rj_max) { - - /* Can we actually split that interaction ? */ - if (ci->split) { - - /* Loop over ci's children */ - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) - runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); - } - - } else { - /* cj is split */ - - /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ - - /* Loop over cj's children */ - for (int k = 0; k < 8; k++) { - if (cj->progeny[k] != NULL) - runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); - } - } - } else { - - /* Can we actually split that interaction ? */ - if (cj->split) { - - /* Loop over cj's children */ - for (int k = 0; k < 8; k++) { - if (cj->progeny[k] != NULL) - runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); - } - - } else { - /* ci is split */ - - /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ - - /* Loop over ci's children */ - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) - runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); - } - } - } - } - - if (gettimer) TIMER_TOC(timer_dosub_pair_grav); -} - -/** - * @brief Computes the interaction of all the particles in a cell. - * - * This function will try to recurse as far down the tree as possible and only - * default to direct summation if there is no better option. - * - * @param r The #runner. - * @param c The first #cell. - * @param gettimer Are we timing this ? - */ -static INLINE void runner_doself_recursive_grav(struct runner *r, - struct cell *c, int gettimer) { - - /* Some constants */ - const struct engine *e = r->e; - -#ifdef SWIFT_DEBUG_CHECKS - /* Early abort? */ - if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); -#endif - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active_gravity(c, e)) return; - - /* If the cell is split, interact each progeny with itself, and with - each of its siblings. */ - if (c->split) { - - for (int j = 0; j < 8; j++) { - if (c->progeny[j] != NULL) { - - runner_doself_recursive_grav(r, c->progeny[j], 0); - - for (int k = j + 1; k < 8; k++) { - if (c->progeny[k] != NULL) { - - runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0); - } - } - } - } - } - - /* If the cell is not split, then just go for it... */ - else { - - runner_doself_grav_pp(r, c); - } - - if (gettimer) TIMER_TOC(timer_dosub_self_grav); -} - -/** - * @brief Performs all M-M interactions between a given top-level cell and all - * the other top-levels that are far enough. - * - * @param r The thread #runner. - * @param ci The #cell of interest. - * @param timer Are we timing this ? - */ -static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, - int timer) { - - /* Some constants */ - const struct engine *e = r->e; - const int periodic = e->mesh->periodic; - const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; - const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; - - TIMER_TIC; - - /* Recover the list of top-level cells */ - struct cell *cells = e->s->cells_top; - int *cells_with_particles = e->s->cells_with_particles_top; - const int nr_cells_with_particles = e->s->nr_cells_with_particles; - - /* Anything to do here? */ - if (!cell_is_active_gravity(ci, e)) return; - - if (ci->nodeID != engine_rank) - error("Non-local cell in long-range gravity task!"); - - /* Check multipole has been drifted */ - if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); - - /* Get this cell's multipole information */ - struct gravity_tensors *const multi_i = ci->grav.multipole; - - /* Find this cell's top-level (great-)parent */ - struct cell *top = ci; - while (top->parent != NULL) top = top->parent; - - /* Recover the top-level multipole (for distance checks) */ - struct gravity_tensors *const multi_top = top->grav.multipole; - const double CoM_rebuild_top[3] = {multi_top->CoM_rebuild[0], - multi_top->CoM_rebuild[1], - multi_top->CoM_rebuild[2]}; - - /* Loop over all the top-level cells and go for a M-M interaction if - * well-separated */ - for (int n = 0; n < nr_cells_with_particles; ++n) { - - /* Handle on the top-level cell and it's gravity business*/ - const struct cell *cj = &cells[cells_with_particles[n]]; - const struct gravity_tensors *const multi_j = cj->grav.multipole; - - /* Avoid self contributions */ - if (top == cj) continue; - - /* Skip empty cells */ - if (multi_j->m_pole.M_000 == 0.f) continue; - - /* Can we escape early in the periodic BC case? */ - if (periodic) { - - /* Minimal distance between any pair of particles */ - const double min_radius2 = - cell_min_dist2_same_size(top, cj, periodic, dim); - - /* Are we beyond the distance where the truncated forces are 0 ?*/ - if (min_radius2 > max_distance2) { - -#ifdef SWIFT_DEBUG_CHECKS - /* Need to account for the interactions we missed */ - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; -#endif - - /* Record that this multipole received a contribution */ - multi_i->pot.interacted = 1; +struct runner; +struct cell; - /* We are done here. */ - continue; - } - } +void runner_do_grav_down(struct runner *r, struct cell *c, int timer); - /* Get the distance between the CoMs at the last rebuild*/ - double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; - double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; - double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2]; +void runner_doself_recursive_grav(struct runner *r, struct cell *c, + int gettimer); - /* Apply BC */ - if (periodic) { - dx_r = nearest(dx_r, dim[0]); - dy_r = nearest(dy_r, dim[1]); - dz_r = nearest(dz_r, dim[2]); - } - const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; +void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, + struct cell *cj, int gettimer); - /* Are we in charge of this cell pair? */ - if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild, - theta_crit2, r2_rebuild, - multi_top->m_pole.max_softening, - multi_j->m_pole.max_softening)) { +void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags, + struct cell *restrict ci, + struct cell *restrict cj); - /* Call the PM interaction fucntion on the active sub-cells of ci */ - runner_dopair_grav_mm_nonsym(r, ci, cj); - // runner_dopair_recursive_grav_pm(r, ci, cj); +void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer); - /* Record that this multipole received a contribution */ - multi_i->pot.interacted = 1; +/* Internal functions (for unit tests and debugging) */ - } /* We are in charge of this pair */ - } /* Loop over top-level cells */ +void runner_doself_grav_pp(struct runner *r, struct cell *c); - if (timer) TIMER_TOC(timer_dograv_long_range); -} +void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, + const int symmetric, const int allow_mpole); #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */ diff --git a/src/runner_doiact_hydro.c b/src/runner_doiact_hydro.c new file mode 100644 index 0000000000000000000000000000000000000000..480ea59f0a536aa340b7e4d8f838bef3a0cca072 --- /dev/null +++ b/src/runner_doiact_hydro.c @@ -0,0 +1,63 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "engine.h" +#include "pressure_floor_iact.h" +#include "runner.h" +#include "runner_doiact_hydro_vec.h" +#include "space_getsid.h" +#include "timers.h" + +/* Import the density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_functions_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the gradient loop functions (if required). */ +#ifdef EXTRA_HYDRO_LOOP +#define FUNCTION gradient +#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT +#include "runner_doiact_functions_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP +#endif + +/* Import the force loop functions. */ +#define FUNCTION force +#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE +#include "runner_doiact_functions_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the limiter loop functions. */ +#define FUNCTION limiter +#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER +#include "runner_doiact_functions_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP diff --git a/src/runner_doiact_hydro.h b/src/runner_doiact_hydro.h new file mode 100644 index 0000000000000000000000000000000000000000..1fd54c1037e2d0b9c7a671311cfee4720ebe8d84 --- /dev/null +++ b/src/runner_doiact_hydro.h @@ -0,0 +1,151 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Before including this file, define FUNCTION, which is the + name of the interaction function. This creates the interaction functions + runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION, + and runner_dosub_FUNCTION calling the pairwise interaction function + runner_iact_FUNCTION. */ + +#define PASTE(x, y) x##_##y + +#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f) +#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION) + +#define _DOPAIR1(f) PASTE(runner_dopair1, f) +#define DOPAIR1 _DOPAIR1(FUNCTION) + +#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f) +#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION) + +#define _DOPAIR2(f) PASTE(runner_dopair2, f) +#define DOPAIR2 _DOPAIR2(FUNCTION) + +#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f) +#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) + +#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f) +#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION) + +#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f) +#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION) + +#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) +#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) + +#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f) +#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION) + +#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f) +#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION) + +#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f) +#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION) + +#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f) +#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION) + +#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f) +#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION) + +#define _DOSELF1(f) PASTE(runner_doself1, f) +#define DOSELF1 _DOSELF1(FUNCTION) + +#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f) +#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION) + +#define _DOSELF2(f) PASTE(runner_doself2, f) +#define DOSELF2 _DOSELF2(FUNCTION) + +#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f) +#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION) + +#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f) +#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION) + +#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f) +#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION) + +#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f) +#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION) + +#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f) +#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION) + +#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f) +#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION) + +#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f) +#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION) + +#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f) +#define IACT_NONSYM _IACT_NONSYM(FUNCTION) + +#define _IACT(f) PASTE(runner_iact, f) +#define IACT _IACT(FUNCTION) + +#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f) +#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION) + +#define _IACT_VEC(f) PASTE(runner_iact_vec, f) +#define IACT_VEC _IACT_VEC(FUNCTION) + +#define _TIMER_DOSELF(f) PASTE(timer_doself, f) +#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION) + +#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f) +#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION) + +#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f) +#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION) + +#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f) +#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION) + +#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f) +#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION) + +#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) +#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) + +void DOSELF1_BRANCH(struct runner *r, struct cell *c); +void DOSELF2_BRANCH(struct runner *r, struct cell *c); + +void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj); +void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj); + +void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer); + +void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer); +void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer); + +void DOSELF_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, + struct part *restrict parts, int *restrict ind, + int count); + +void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, + struct part *restrict parts_i, int *restrict ind, + int count, struct cell *restrict cj); + +void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, + int *ind, int count, struct cell *cj, int gettimer); diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_hydro_vec.c similarity index 99% rename from src/runner_doiact_vec.c rename to src/runner_doiact_hydro_vec.c index 68f34b0d3b8fc9c79097522f8a1618f86957612e..59401e4050dcb4481d1c56aa8857106558a06880 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_hydro_vec.c @@ -21,7 +21,7 @@ #include "../config.h" /* This object's header. */ -#include "runner_doiact_vec.h" +#include "runner_doiact_hydro_vec.h" #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_hydro_vec.h similarity index 100% rename from src/runner_doiact_vec.h rename to src/runner_doiact_hydro_vec.h diff --git a/src/runner_doiact_stars.c b/src/runner_doiact_stars.c new file mode 100644 index 0000000000000000000000000000000000000000..1e1267df5195f727a19252b6ee654629e23149b6 --- /dev/null +++ b/src/runner_doiact_stars.c @@ -0,0 +1,47 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "engine.h" +#include "feedback.h" +#include "runner.h" +#include "space_getsid.h" +#include "stars.h" +#include "timers.h" + +/* Import the stars density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_functions_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the stars feedback loop functions. */ +#define FUNCTION feedback +#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK +#include "runner_doiact_functions_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h index 7e9780def83bbdbab83a431a757a52f3ba51d2e4..2d41d5a0bd1b1003039e1795eec205889b46baf6 100644 --- a/src/runner_doiact_stars.h +++ b/src/runner_doiact_stars.h @@ -86,1307 +86,21 @@ #define _IACT_STARS(f) PASTE(runner_iact_nonsym_stars, f) #define IACT_STARS _IACT_STARS(FUNCTION) -/** - * @brief Calculate the number density of #part around the #spart - * - * @param r runner task - * @param c cell - * @param timer 1 if the time is to be recorded. - */ -void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { - -#ifdef SWIFT_DEBUG_CHECKS - if (c->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - TIMER_TIC; - - const struct engine *e = r->e; - const int with_cosmology = e->policy & engine_policy_cosmology; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Anything to do here? */ - if (c->hydro.count == 0 || c->stars.count == 0) return; - if (!cell_is_active_stars(c, e)) return; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int scount = c->stars.count; - const int count = c->hydro.count; - struct spart *restrict sparts = c->stars.parts; - struct part *restrict parts = c->hydro.parts; - struct xpart *restrict xparts = c->hydro.xparts; - - /* Loop over the sparts in ci. */ - for (int sid = 0; sid < scount; sid++) { - - /* Get a hold of the ith spart in ci. */ - struct spart *restrict si = &sparts[sid]; - - /* Skip inactive particles */ - if (!spart_is_active(si, e)) continue; - - /* Skip inactive particles */ - if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue; - - const float hi = si->h; - const float hig2 = hi * hi * kernel_gamma2; - const float six[3] = {(float)(si->x[0] - c->loc[0]), - (float)(si->x[1] - c->loc[1]), - (float)(si->x[2] - c->loc[2])}; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts[pjd]; - struct xpart *restrict xpj = &xparts[pjd]; - const float hj = pj->h; - - /* Early abort? */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), - (float)(pj->x[1] - c->loc[1]), - (float)(pj->x[2] - c->loc[2])}; - float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, si, pj, a, H); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, - ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in ci. */ - } /* loop over the sparts in ci. */ - - TIMER_TOC(TIMER_DOSELF_STARS); -} - -/** - * @brief Calculate the number density of cj #part around the ci #spart - * - * @param r runner task - * @param ci The first #cell - * @param cj The second #cell - */ -void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { - -#ifdef SWIFT_DEBUG_CHECKS -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#else - if (cj->nodeID != engine_rank) error("Should be run on a different node"); -#endif -#endif - - const struct engine *e = r->e; - const int with_cosmology = e->policy & engine_policy_cosmology; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Anything to do here? */ - if (cj->hydro.count == 0 || ci->stars.count == 0) return; - if (!cell_is_active_stars(ci, e)) return; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int scount_i = ci->stars.count; - const int count_j = cj->hydro.count; - struct spart *restrict sparts_i = ci->stars.parts; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - - /* Loop over the sparts in ci. */ - for (int sid = 0; sid < scount_i; sid++) { - - /* Get a hold of the ith spart in ci. */ - struct spart *restrict si = &sparts_i[sid]; - - /* Skip inactive particles */ - if (!spart_is_active(si, e)) continue; - - /* Skip inactive particles */ - if (!feedback_is_active(si, e->time, cosmo, with_cosmology)) continue; - - const float hi = si->h; - const float hig2 = hi * hi * kernel_gamma2; - const float six[3] = {(float)(si->x[0] - (cj->loc[0] + shift[0])), - (float)(si->x[1] - (cj->loc[1] + shift[1])), - (float)(si->x[2] - (cj->loc[2] + shift[2]))}; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - const float hj = pj->h; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), - (float)(pj->x[1] - cj->loc[1]), - (float)(pj->x[2] - cj->loc[2])}; - float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, si, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, - ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} - -/** - * @brief Compute the interactions between a cell pair. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - * @param sid The direction of the pair. - * @param shift The shift vector to apply to the particles in ci. - */ -void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, - const int sid, const double *shift) { - - TIMER_TIC; - - const struct engine *e = r->e; - const int with_cosmology = e->policy & engine_policy_cosmology; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - /* Get the cutoff shift. */ - double rshift = 0.0; - for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_stars = (ci->nodeID == e->nodeID) && (ci->stars.count != 0) && - (cj->hydro.count != 0) && cell_is_active_stars(ci, e); - const int do_cj_stars = (cj->nodeID == e->nodeID) && (cj->stars.count != 0) && - (ci->hydro.count != 0) && cell_is_active_stars(cj, e); -#else - /* here we are updating the hydro -> switch ci, cj for local */ - const int do_ci_stars = (cj->nodeID == e->nodeID) && (ci->stars.count != 0) && - (cj->hydro.count != 0) && cell_is_active_stars(ci, e); - const int do_cj_stars = (ci->nodeID == e->nodeID) && (cj->stars.count != 0) && - (ci->hydro.count != 0) && cell_is_active_stars(cj, e); -#endif - - if (do_ci_stars) { - - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; - const struct sort_entry *restrict sort_i = ci->stars.sort[sid]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Some constants used to checks that the parts are in the right frame */ - const float shift_threshold_x = - 2. * ci->width[0] + - 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); - const float shift_threshold_y = - 2. * ci->width[1] + - 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); - const float shift_threshold_z = - 2. * ci->width[2] + - 2. * max(ci->stars.dx_max_part, cj->hydro.dx_max_part); -#endif /* SWIFT_DEBUG_CHECKS */ - - /* Get some other useful values. */ - const double hi_max = ci->stars.h_max * kernel_gamma - rshift; - const int count_i = ci->stars.count; - const int count_j = cj->hydro.count; - struct spart *restrict sparts_i = ci->stars.parts; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - const double dj_min = sort_j[0].d; - const float dx_max_rshift = - (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift; - const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort); - - /* Loop over the sparts in ci. */ - for (int pid = count_i - 1; - pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { - - /* Get a hold of the ith part in ci. */ - struct spart *restrict spi = &sparts_i[sort_i[pid].i]; - const float hi = spi->h; - - /* Skip inactive particles */ - if (!spart_is_active(spi, e)) continue; - - /* Skip inactive particles */ - if (!feedback_is_active(spi, e->time, cosmo, with_cosmology)) continue; - - /* Compute distance from the other cell. */ - const double px[3] = {spi->x[0], spi->x[1], spi->x[2]}; - float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + - px[2] * runner_shift[sid][2]; - - /* Is there anything we need to interact with ? */ - const double di = dist + hi * kernel_gamma + dx_max_rshift; - if (di < dj_min) continue; - - /* Get some additional information about pi */ - const float hig2 = hi * hi * kernel_gamma2; - const float pix = spi->x[0] - (cj->loc[0] + shift[0]); - const float piy = spi->x[1] - (cj->loc[1] + shift[1]); - const float piz = spi->x[2] - (cj->loc[2] + shift[2]); - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { - - /* Recover pj */ - struct part *pj = &parts_j[sort_j[pjd].i]; - struct xpart *xpj = &xparts_j[sort_j[pjd].i]; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - const float hj = pj->h; - const float pjx = pj->x[0] - cj->loc[0]; - const float pjy = pj->x[1] - cj->loc[1]; - const float pjz = pj->x[2] - cj->loc[2]; - - /* Compute the pairwise distance. */ - float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles are in the correct frame after the shifts */ - if (pix > shift_threshold_x || pix < -shift_threshold_x) - error( - "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", - pix, ci->width[0]); - if (piy > shift_threshold_y || piy < -shift_threshold_y) - error( - "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", - piy, ci->width[1]); - if (piz > shift_threshold_z || piz < -shift_threshold_z) - error( - "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", - piz, ci->width[2]); - if (pjx > shift_threshold_x || pjx < -shift_threshold_x) - error( - "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", - pjx, ci->width[0]); - if (pjy > shift_threshold_y || pjy < -shift_threshold_y) - error( - "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", - pjy, ci->width[1]); - if (pjz > shift_threshold_z || pjz < -shift_threshold_z) - error( - "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", - pjz, ci->width[2]); - - /* Check that particles have been drifted to the current time */ - if (spi->ti_drift != e->ti_current) - error("Particle spi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ - } /* do_ci_stars */ - - if (do_cj_stars) { - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_i = ci->hydro.sort[sid]; - const struct sort_entry *restrict sort_j = cj->stars.sort[sid]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Some constants used to checks that the parts are in the right frame */ - const float shift_threshold_x = - 2. * ci->width[0] + - 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); - const float shift_threshold_y = - 2. * ci->width[1] + - 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); - const float shift_threshold_z = - 2. * ci->width[2] + - 2. * max(ci->hydro.dx_max_part, cj->stars.dx_max_part); -#endif /* SWIFT_DEBUG_CHECKS */ - - /* Get some other useful values. */ - const double hj_max = cj->hydro.h_max * kernel_gamma; - const int count_i = ci->hydro.count; - const int count_j = cj->stars.count; - struct part *restrict parts_i = ci->hydro.parts; - struct xpart *restrict xparts_i = ci->hydro.xparts; - struct spart *restrict sparts_j = cj->stars.parts; - const double di_max = sort_i[count_i - 1].d - rshift; - const float dx_max_rshift = - (ci->hydro.dx_max_sort + cj->stars.dx_max_sort) + rshift; - const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort); - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; - pjd++) { - - /* Get a hold of the jth part in cj. */ - struct spart *spj = &sparts_j[sort_j[pjd].i]; - const float hj = spj->h; - - /* Skip inactive particles */ - if (!spart_is_active(spj, e)) continue; - - /* Skip inactive particles */ - if (!feedback_is_active(spj, e->time, cosmo, with_cosmology)) continue; - - /* Compute distance from the other cell. */ - const double px[3] = {spj->x[0], spj->x[1], spj->x[2]}; - float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + - px[2] * runner_shift[sid][2]; - - /* Is there anything we need to interact with ? */ - const double dj = dist - hj * kernel_gamma - dx_max_rshift; - if (dj - rshift > di_max) continue; - - /* Get some additional information about pj */ - const float hjg2 = hj * hj * kernel_gamma2; - const float pjx = spj->x[0] - cj->loc[0]; - const float pjy = spj->x[1] - cj->loc[1]; - const float pjz = spj->x[2] - cj->loc[2]; - - /* Loop over the parts in ci. */ - for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { - - /* Recover pi */ - struct part *pi = &parts_i[sort_i[pid].i]; - struct xpart *xpi = &xparts_i[sort_i[pid].i]; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pi, e)) continue; - - const float hi = pi->h; - const float pix = pi->x[0] - (cj->loc[0] + shift[0]); - const float piy = pi->x[1] - (cj->loc[1] + shift[1]); - const float piz = pi->x[2] - (cj->loc[2] + shift[2]); - - /* Compute the pairwise distance. */ - float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles are in the correct frame after the shifts */ - if (pix > shift_threshold_x || pix < -shift_threshold_x) - error( - "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", - pix, ci->width[0]); - if (piy > shift_threshold_y || piy < -shift_threshold_y) - error( - "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", - piy, ci->width[1]); - if (piz > shift_threshold_z || piz < -shift_threshold_z) - error( - "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", - piz, ci->width[2]); - if (pjx > shift_threshold_x || pjx < -shift_threshold_x) - error( - "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", - pjx, ci->width[0]); - if (pjy > shift_threshold_y || pjy < -shift_threshold_y) - error( - "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", - pjy, ci->width[1]); - if (pjz > shift_threshold_z || pjz < -shift_threshold_z) - error( - "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", - pjz, ci->width[2]); - - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (spj->ti_drift != e->ti_current) - error("Particle spj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hjg2) { - - IACT_STARS(r2, dx, hj, hi, spj, pi, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hj, hi, spj, pi, xpi, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hj, hi, spj, pi, xpi, cosmo, - ti_current); -#endif - } - } /* loop over the parts in ci. */ - } /* loop over the parts in cj. */ - } /* Cell cj is active */ - - TIMER_TOC(TIMER_DOPAIR_STARS); -} - -void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj, int timer) { - - TIMER_TIC; - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_stars = ci->nodeID == r->e->nodeID; - const int do_cj_stars = cj->nodeID == r->e->nodeID; -#else - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_stars = cj->nodeID == r->e->nodeID; - const int do_cj_stars = ci->nodeID == r->e->nodeID; -#endif - if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0) - DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj); - if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0) - DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci); - - TIMER_TOC(TIMER_DOPAIR_STARS); -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * Version using a brute-force algorithm. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts_i The #part to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - * @param cj The second #cell. - * @param sid The direction of the pair. - * @param flipped Flag to check whether the cells have been flipped or not. - * @param shift The shift vector to apply to the particles in ci. - */ -void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts_i, int *restrict ind, - int scount, struct cell *restrict cj, const int sid, - const int flipped, const double *shift) { - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int count_j = cj->hydro.count; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Early abort? */ - if (count_j == 0) return; - - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_j = cj->hydro.sort[sid]; - const float dxj = cj->hydro.dx_max_sort; - - /* Sparts are on the left? */ - if (!flipped) { - - /* Loop over the sparts_i. */ - for (int pid = 0; pid < scount; pid++) { - - /* Get a hold of the ith spart in ci. */ - struct spart *restrict spi = &sparts_i[ind[pid]]; - const double pix = spi->x[0] - (shift[0]); - const double piy = spi->x[1] - (shift[1]); - const double piz = spi->x[2] - (shift[2]); - const float hi = spi->h; - const float hig2 = hi * hi * kernel_gamma2; - const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] + - piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - const double pjx = pj->x[0]; - const double pjy = pj->x[1]; - const double pjz = pj->x[2]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), - (float)(piz - pjz)}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (spi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the sparts in ci. */ - } - - /* Sparts are on the right. */ - else { - - /* Loop over the sparts_i. */ - for (int pid = 0; pid < scount; pid++) { - - /* Get a hold of the ith spart in ci. */ - struct spart *restrict spi = &sparts_i[ind[pid]]; - const double pix = spi->x[0] - (shift[0]); - const double piy = spi->x[1] - (shift[1]); - const double piz = spi->x[2] - (shift[2]); - const float hi = spi->h; - const float hig2 = hi * hi * kernel_gamma2; - const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] + - piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; - - /* Loop over the parts in cj. */ - for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; - - /* Skip inhibited particles. */ - if (part_is_inhibited(pj, e)) continue; - - const double pjx = pj->x[0]; - const double pjy = pj->x[1]; - const double pjz = pj->x[2]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), - (float)(piz - pjz)}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (spi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the sparts in ci. */ - } -} +void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c); +void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj); -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * Version using a brute-force algorithm. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts_i The #part to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - * @param cj The second #cell. - * @param shift The shift vector to apply to the particles in ci. - */ -void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts_i, - int *restrict ind, int scount, - struct cell *restrict cj, const double *shift) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int count_j = cj->hydro.count; - struct part *restrict parts_j = cj->hydro.parts; - struct xpart *restrict xparts_j = cj->hydro.xparts; - - /* Early abort? */ - if (count_j == 0) return; - - /* Loop over the parts_i. */ - for (int pid = 0; pid < scount; pid++) { - - /* Get a hold of the ith part in ci. */ - struct spart *restrict spi = &sparts_i[ind[pid]]; - - const double pix = spi->x[0] - (shift[0]); - const double piy = spi->x[1] - (shift[1]); - const double piz = spi->x[2] - (shift[2]); - const float hi = spi->h; - const float hig2 = hi * hi * kernel_gamma2; - -#ifdef SWIFT_DEBUG_CHECKS - if (!spart_is_active(spi, e)) - error("Trying to correct smoothing length of inactive particle !"); -#endif - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - - /* Skip inhibited particles */ - if (part_is_inhibited(pj, e)) continue; - - const double pjx = pj->x[0]; - const double pjy = pj->x[1]; - const double pjz = pj->x[2]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float dx[3] = {(float)(pix - pjx), (float)(piy - pjy), - (float)(piz - pjz)}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts The #spart to interact. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - */ -void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts, int *restrict ind, - int scount) { - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) error("Should be run on a different node"); -#endif - - const struct engine *e = r->e; - const integertime_t ti_current = e->ti_current; - const struct cosmology *cosmo = e->cosmology; - - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; - - const int count_i = ci->hydro.count; - struct part *restrict parts_j = ci->hydro.parts; - struct xpart *restrict xparts_j = ci->hydro.xparts; - - /* Early abort? */ - if (count_i == 0) return; - - /* Loop over the parts in ci. */ - for (int spid = 0; spid < scount; spid++) { - - /* Get a hold of the ith part in ci. */ - struct spart *spi = &sparts[ind[spid]]; - const float spix[3] = {(float)(spi->x[0] - ci->loc[0]), - (float)(spi->x[1] - ci->loc[1]), - (float)(spi->x[2] - ci->loc[2])}; - const float hi = spi->h; - const float hig2 = hi * hi * kernel_gamma2; - -#ifdef SWIFT_DEBUG_CHECKS - if (!spart_is_active(spi, e)) - error("Inactive particle in subset function!"); -#endif - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_i; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[pjd]; - struct xpart *restrict xpj = &xparts_j[pjd]; - - /* Early abort? */ - if (part_is_inhibited(pj, e)) continue; - - /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), - (float)(pj->x[1] - ci->loc[1]), - (float)(pj->x[2] - ci->loc[2])}; - float dx[3] = {spix[0] - pjx[0], spix[1] - pjx[1], spix[2] - pjx[2]}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj, - cosmo, ti_current); -#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj, - cosmo, ti_current); -#endif - } - } /* loop over the parts in cj. */ - } /* loop over the parts in ci. */ -} +void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, + int gettimer); -/** - * @brief Determine which version of DOSELF1_SUBSET_STARS needs to be called - * depending on the optimisation level. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts The #spart to interact. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - */ void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, struct spart *restrict sparts, - int *restrict ind, int scount) { + int *restrict ind, int scount); - DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount); -} - -/** - * @brief Determine which version of DOPAIR1_SUBSET_STARS needs to be called - * depending on the orientation of the cells or whether DOPAIR1_SUBSET_STARS - * needs to be called at all. - * - * @param r The #runner. - * @param ci The first #cell. - * @param sparts_i The #spart to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param scount The number of particles in @c ind. - * @param cj The second #cell. - */ void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, struct spart *restrict sparts_i, int *restrict ind, int scount, - struct cell *restrict cj) { - - const struct engine *e = r->e; - - /* Anything to do here? */ - if (cj->hydro.count == 0) return; - - /* Get the relative distance between the pairs, wrapping. */ - double shift[3] = {0.0, 0.0, 0.0}; - for (int k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - -#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS - DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift); -#else - /* Get the sorting index. */ - int sid = 0; - for (int k = 0; k < 3; k++) - sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) - ? 0 - : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); - - /* Switch the cells around? */ - const int flipped = runner_flip[sid]; - sid = sortlistID[sid]; - - /* Has the cell cj been sorted? */ - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) - error("Interacting unsorted cells."); - - DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift); -#endif -} + struct cell *restrict cj); void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts, - int *ind, int scount, struct cell *cj, int gettimer) { - - const struct engine *e = r->e; - struct space *s = e->s; - - /* Should we even bother? */ - if (!cell_is_active_stars(ci, e) && - (cj == NULL || !cell_is_active_stars(cj, e))) - return; - - /* Find out in which sub-cell of ci the parts are. */ - struct cell *sub = NULL; - if (ci->split) { - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) { - if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] && - &sparts[ind[0]] < - &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) { - sub = ci->progeny[k]; - break; - } - } - } - } - - /* Is this a single cell? */ - if (cj == NULL) { - - /* Recurse? */ - if (cell_can_recurse_in_self_stars_task(ci)) { - - /* Loop over all progeny. */ - DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0); - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != sub && ci->progeny[j] != NULL) - DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0); - - } - - /* Otherwise, compute self-interaction. */ - else - DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount); - } /* self-interaction. */ - - /* Otherwise, it's a pair interaction. */ - else { - - /* Recurse? */ - if (cell_can_recurse_in_pair_stars_task(ci, cj) && - cell_can_recurse_in_pair_stars_task(cj, ci)) { - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(s, &ci, &cj, shift); - - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) - DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount, - cj->progeny[pjd], 0); - if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) - DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount, - ci->progeny[pid], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) { - - /* Do any of the cells need to be drifted first? */ - if (cell_is_active_stars(ci, e)) { - if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!"); - if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); - } - - DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj); - } - - } /* otherwise, pair interaction. */ -} - -/** - * @brief Determine which version of DOSELF1_STARS needs to be called depending - * on the optimisation level. - * - * @param r #runner - * @param c #cell c - * - */ -void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) { - - const struct engine *restrict e = r->e; - - /* Anything to do here? */ - if (c->stars.count == 0) return; - - /* Anything to do here? */ - if (!cell_is_active_stars(c, e)) return; - - /* Did we mess up the recursion? */ - if (c->stars.h_max_old * kernel_gamma > c->dmin) - error("Cell smaller than smoothing length"); - - DOSELF1_STARS(r, c, 1); -} - -#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid) \ - ({ \ - const struct sort_entry *restrict sort_j = cj->TYPE.sort[sid]; \ - \ - for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ - const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ - if (PART##_is_inhibited(p, e)) continue; \ - \ - const float d = p->x[0] * runner_shift[sid][0] + \ - p->x[1] * runner_shift[sid][1] + \ - p->x[2] * runner_shift[sid][2]; \ - if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ - 1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) && \ - (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ - cj->width[0] * 1.0e-10) \ - error( \ - "particle shift diff exceeds dx_max_sort in cell cj. " \ - "cj->nodeID=%d " \ - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE \ - ".dx_max_sort=%e " \ - "cj->" #TYPE \ - ".dx_max_sort_old=%e, cellID=%i super->cellID=%i" \ - "cj->depth=%d cj->maxdepth=%d", \ - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \ - cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID, \ - cj->depth, cj->maxdepth); \ - } \ - }) - -/** - * @brief Determine which version of DOPAIR1_STARS needs to be called depending - * on the orientation of the cells or whether DOPAIR1_STARS needs to be called - * at all. - * - * @param r #runner - * @param ci #cell ci - * @param cj #cell cj - * - */ -void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { - - const struct engine *restrict e = r->e; - - /* Get the sort ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - const int ci_active = cell_is_active_stars(ci, e); - const int cj_active = cell_is_active_stars(cj, e); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_stars = ci->nodeID == e->nodeID; - const int do_cj_stars = cj->nodeID == e->nodeID; -#else - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_stars = cj->nodeID == e->nodeID; - const int do_cj_stars = ci->nodeID == e->nodeID; -#endif - const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && - ci_active && do_ci_stars); - const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && - cj_active && do_cj_stars); - - /* Anything to do here? */ - if (!do_ci && !do_cj) return; - - /* Check that cells are drifted. */ - if (do_ci && - (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) - error("Interacting undrifted cells."); - - /* Have the cells been sorted? */ - if (do_ci && (!(ci->stars.sorted & (1 << sid)) || - ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin)) - error("Interacting unsorted cells."); - - if (do_ci && (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)) - error("Interacting unsorted cells."); - - if (do_cj && - (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) - error("Interacting undrifted cells."); - - /* Have the cells been sorted? */ - if (do_cj && (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin)) - error("Interacting unsorted cells."); - - if (do_cj && (!(cj->stars.sorted & (1 << sid)) || - cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin)) - error("Interacting unsorted cells."); - -#ifdef SWIFT_DEBUG_CHECKS - if (do_ci) { - // MATTHIEU: This test is faulty. To be fixed... - // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid); - RUNNER_CHECK_SORT(stars, spart, ci, cj, sid); - } - - if (do_cj) { - // MATTHIEU: This test is faulty. To be fixed... - // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid); - RUNNER_CHECK_SORT(stars, spart, cj, ci, sid); - } -#endif /* SWIFT_DEBUG_CHECKS */ - -#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS - DOPAIR1_STARS_NAIVE(r, ci, cj, 1); -#else - DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift); -#endif -} - -/** - * @brief Compute grouped sub-cell interactions for pairs - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - * @param gettimer Do we have a timer ? - * - * @todo Hard-code the sid on the recursive calls to avoid the - * redundant computations to find the sid on-the-fly. - */ -void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { - - TIMER_TIC; - - struct space *s = r->e->s; - const struct engine *e = r->e; - - /* Should we even bother? */ - const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && - cell_is_active_stars(ci, e); - const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && - cell_is_active_stars(cj, e); - if (!should_do_ci && !should_do_cj) return; - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3]; - const int sid = space_getsid(s, &ci, &cj, shift); - - /* Recurse? */ - if (cell_can_recurse_in_pair_stars_task(ci, cj) && - cell_can_recurse_in_pair_stars_task(cj, ci)) { - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else { - -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - const int do_ci_stars = ci->nodeID == e->nodeID; - const int do_cj_stars = cj->nodeID == e->nodeID; -#else - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_stars = cj->nodeID == e->nodeID; - const int do_cj_stars = ci->nodeID == e->nodeID; -#endif - const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && - cell_is_active_stars(ci, e) && do_ci_stars; - const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && - cell_is_active_stars(cj, e) && do_cj_stars; - - if (do_ci) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_spart_drifted(ci, e)) - error("Interacting undrifted cells (sparts)."); - - if (!cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells (parts)."); - - /* Do any of the cells need to be sorted first? */ - if (!(ci->stars.sorted & (1 << sid)) || - ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) { - error("Interacting unsorted cell (sparts)."); - } - - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) - error("Interacting unsorted cell (parts). %i", cj->nodeID); - } - - if (do_cj) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_part_drifted(ci, e)) - error("Interacting undrifted cells (parts)."); - - if (!cell_are_spart_drifted(cj, e)) - error("Interacting undrifted cells (sparts)."); - - /* Do any of the cells need to be sorted first? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { - error("Interacting unsorted cell (parts)."); - } - - if (!(cj->stars.sorted & (1 << sid)) || - cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) { - error("Interacting unsorted cell (sparts)."); - } - } - - if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj); - } - - TIMER_TOC(TIMER_DOSUB_PAIR_STARS); -} - -/** - * @brief Compute grouped sub-cell interactions for self tasks - * - * @param r The #runner. - * @param ci The first #cell. - * @param gettimer Do we have a timer ? - */ -void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) { - - TIMER_TIC; - -#ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) - error("This function should not be called on foreign cells"); -#endif - - /* Should we even bother? */ - if (ci->hydro.count == 0 || ci->stars.count == 0 || - !cell_is_active_stars(ci, r->e)) - return; - - /* Recurse? */ - if (cell_can_recurse_in_self_stars_task(ci)) { - - /* Loop over all progeny. */ - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB_SELF1_STARS(r, ci->progeny[k], 0); - for (int j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0); - } - } - - /* Otherwise, compute self-interaction. */ - else { - - /* Drift the cell to the current timestep if needed. */ - if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell."); - - DOSELF1_BRANCH_STARS(r, ci); - } - - TIMER_TOC(TIMER_DOSUB_SELF_STARS); -} + int *ind, int scount, struct cell *cj, int gettimer); diff --git a/src/runner_drift.c b/src/runner_drift.c new file mode 100644 index 0000000000000000000000000000000000000000..8c4376743cd50ffea4709cb471959864cedcc4b7 --- /dev/null +++ b/src/runner_drift.c @@ -0,0 +1,96 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "engine.h" +#include "timers.h" + +/** + * @brief Drift all part in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_part(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_part(c, r->e, 0); + + if (timer) TIMER_TOC(timer_drift_part); +} + +/** + * @brief Drift all gpart in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_gpart(c, r->e, 0); + + if (timer) TIMER_TOC(timer_drift_gpart); +} + +/** + * @brief Drift all spart in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_spart(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_spart(c, r->e, 0); + + if (timer) TIMER_TOC(timer_drift_spart); +} + +/** + * @brief Drift all bpart in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_bpart(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_bpart(c, r->e, 0); + + if (timer) TIMER_TOC(timer_drift_bpart); +} diff --git a/src/runner_ghost.c b/src/runner_ghost.c new file mode 100644 index 0000000000000000000000000000000000000000..2c1e8cd7190858014f7914e293b5ffdadbdc2707 --- /dev/null +++ b/src/runner_ghost.c @@ -0,0 +1,1355 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "black_holes.h" +#include "cell.h" +#include "engine.h" +#include "feedback.h" +#include "pressure_floor.h" +#include "pressure_floor_iact.h" +#include "space_getsid.h" +#include "stars.h" +#include "timers.h" +#include "tracers.h" + +/* Import the density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the stars density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/** + * @brief Intermediate task after the density to check that the smoothing + * lengths are correct. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { + + struct spart *restrict sparts = c->stars.parts; + const struct engine *e = r->e; + const struct unit_system *us = e->internal_units; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const struct cosmology *cosmo = e->cosmology; + const struct feedback_props *feedback_props = e->feedback_props; + const float stars_h_max = e->hydro_properties->h_max; + const float stars_h_min = e->hydro_properties->h_min; + const float eps = e->stars_properties->h_tolerance; + const float stars_eta_dim = + pow_dimension(e->stars_properties->eta_neighbours); + const int max_smoothing_iter = e->stars_properties->max_smoothing_iterations; + int redo = 0, scount = 0; + + /* Running value of the maximal smoothing length */ + double h_max = c->stars.h_max; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != e->nodeID) + error("Running the star ghost on a foreign node!"); +#endif + + /* Anything to do here? */ + if (c->stars.count == 0) return; + if (!cell_is_active_stars(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_stars_ghost(r, c->progeny[k], 0); + + /* Update h_max */ + h_max = max(h_max, c->progeny[k]->stars.h_max); + } + } + } else { + + /* Init the list of active particles that have to be updated. */ + int *sid = NULL; + float *h_0 = NULL; + float *left = NULL; + float *right = NULL; + if ((sid = (int *)malloc(sizeof(int) * c->stars.count)) == NULL) + error("Can't allocate memory for sid."); + if ((h_0 = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) + error("Can't allocate memory for h_0."); + if ((left = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) + error("Can't allocate memory for left."); + if ((right = (float *)malloc(sizeof(float) * c->stars.count)) == NULL) + error("Can't allocate memory for right."); + for (int k = 0; k < c->stars.count; k++) + if (spart_is_active(&sparts[k], e) && + feedback_is_active(&sparts[k], e->time, cosmo, with_cosmology)) { + sid[scount] = k; + h_0[scount] = sparts[k].h; + left[scount] = 0.f; + right[scount] = stars_h_max; + ++scount; + } + + /* While there are particles that need to be updated... */ + for (int num_reruns = 0; scount > 0 && num_reruns < max_smoothing_iter; + num_reruns++) { + + /* Reset the redo-count. */ + redo = 0; + + /* Loop over the remaining active parts in this cell. */ + for (int i = 0; i < scount; i++) { + + /* Get a direct pointer on the part. */ + struct spart *sp = &sparts[sid[i]]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Is this part within the timestep? */ + if (!spart_is_active(sp, e)) + error("Ghost applied to inactive particle"); +#endif + + /* Get some useful values */ + const float h_init = h_0[i]; + const float h_old = sp->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + + float h_new; + int has_no_neighbours = 0; + + if (sp->density.wcount == 0.f) { /* No neighbours case */ + + /* Flag that there were no neighbours */ + has_no_neighbours = 1; + + /* Double h and try again */ + h_new = 2.f * h_old; + + } else { + + /* Finish the density calculation */ + stars_end_density(sp, cosmo); + + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = sp->density.wcount * h_old_dim; + const float n_target = stars_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + sp->density.wcount_dh * h_old_dim + + hydro_dimension * sp->density.wcount * h_old_dim_minus_one; + + /* Improve the bisection bounds */ + if (n_sum < n_target) + left[i] = max(left[i], h_old); + else if (n_sum > n_target) + right[i] = min(right[i], h_old); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check the validity of the left and right bounds */ + if (left[i] > right[i]) + error("Invalid left (%e) and right (%e)", left[i], right[i]); +#endif + + /* Skip if h is already h_max and we don't have enough neighbours + */ + /* Same if we are below h_min */ + if (((sp->h >= stars_h_max) && (f < 0.f)) || + ((sp->h <= stars_h_min) && (f > 0.f))) { + + stars_reset_feedback(sp); + + /* Only do feedback if stars have a reasonable birth time */ + if (feedback_do_feedback(sp)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(e->ti_current - 1, sp->time_bin); + + /* Get particle time-step */ + double dt; + if (with_cosmology) { + dt = cosmology_get_delta_time(e->cosmology, ti_begin, + ti_begin + ti_step); + } else { + dt = get_timestep(sp->time_bin, e->time_base); + } + + /* Calculate age of the star at current time */ + double star_age_end_of_step; + if (with_cosmology) { + star_age_end_of_step = + cosmology_get_delta_time_from_scale_factors( + cosmo, (double)sp->birth_scale_factor, cosmo->a); + } else { + star_age_end_of_step = (float)e->time - sp->birth_time; + } + + /* Has this star been around for a while ? */ + if (star_age_end_of_step > 0.) { + + /* Age of the star at the start of the step */ + const double star_age_beg_of_step = + max(star_age_end_of_step - dt, 0.); + + /* Compute the stellar evolution */ + feedback_evolve_spart(sp, feedback_props, cosmo, us, + star_age_beg_of_step, dt); + } else { + + /* Reset the feedback fields of the star particle */ + feedback_reset_feedback(sp, feedback_props); + } + } else { + + feedback_reset_feedback(sp, feedback_props); + } + + /* Ok, we are done with this particle */ + continue; + } + + /* Normal case: Use Newton-Raphson to get a better value of h */ + + /* Avoid floating point exception from f_prime = 0 */ + h_new = h_old - f / (f_prime + FLT_MIN); + + /* Be verbose about the particles that struggle to converge */ + if (num_reruns > max_smoothing_iter - 10) { + + message( + "Smoothing length convergence problem: iter=%d p->id=%lld " + "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " + "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", + num_reruns, sp->id, h_init, h_old, h_new, f, f_prime, n_sum, + n_target, left[i], right[i]); + } + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + + /* Verify that we are actually progrssing towards the answer */ + h_new = max(h_new, left[i]); + h_new = min(h_new, right[i]); + } + + /* Check whether the particle has an inappropriate smoothing length + */ + if (fabsf(h_new - h_old) > eps * h_old) { + + /* Ok, correct then */ + + /* Case where we have been oscillating around the solution */ + if ((h_new == left[i] && h_old == right[i]) || + (h_old == left[i] && h_new == right[i])) { + + /* Bissect the remaining interval */ + sp->h = pow_inv_dimension( + 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); + + } else { + + /* Normal case */ + sp->h = h_new; + } + + /* If below the absolute maximum, try again */ + if (sp->h < stars_h_max && sp->h > stars_h_min) { + + /* Flag for another round of fun */ + sid[redo] = sid[i]; + h_0[redo] = h_0[i]; + left[redo] = left[i]; + right[redo] = right[i]; + redo += 1; + + /* Re-initialise everything */ + stars_init_spart(sp); + feedback_init_spart(sp); + + /* Off we go ! */ + continue; + + } else if (sp->h <= stars_h_min) { + + /* Ok, this particle is a lost cause... */ + sp->h = stars_h_min; + + } else if (sp->h >= stars_h_max) { + + /* Ok, this particle is a lost cause... */ + sp->h = stars_h_max; + + /* Do some damage control if no neighbours at all were found */ + if (has_no_neighbours) { + stars_spart_has_no_neighbours(sp, cosmo); + } + + } else { + error( + "Fundamental problem with the smoothing length iteration " + "logic."); + } + } + + /* We now have a particle whose smoothing length has converged */ + + /* Check if h_max has increased */ + h_max = max(h_max, sp->h); + + stars_reset_feedback(sp); + + /* Only do feedback if stars have a reasonable birth time */ + if (feedback_do_feedback(sp)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(e->ti_current - 1, sp->time_bin); + + /* Get particle time-step */ + double dt; + if (with_cosmology) { + dt = cosmology_get_delta_time(e->cosmology, ti_begin, + ti_begin + ti_step); + } else { + dt = get_timestep(sp->time_bin, e->time_base); + } + + /* Calculate age of the star at current time */ + double star_age_end_of_step; + if (with_cosmology) { + star_age_end_of_step = cosmology_get_delta_time_from_scale_factors( + cosmo, sp->birth_scale_factor, (float)cosmo->a); + } else { + star_age_end_of_step = (float)e->time - sp->birth_time; + } + + /* Has this star been around for a while ? */ + if (star_age_end_of_step > 0.) { + + /* Age of the star at the start of the step */ + const double star_age_beg_of_step = + max(star_age_end_of_step - dt, 0.); + + /* Compute the stellar evolution */ + feedback_evolve_spart(sp, feedback_props, cosmo, us, + star_age_beg_of_step, dt); + } else { + + /* Reset the feedback fields of the star particle */ + feedback_reset_feedback(sp, feedback_props); + } + } else { + + /* Reset the feedback fields of the star particle */ + feedback_reset_feedback(sp, feedback_props); + } + } + + /* We now need to treat the particles whose smoothing length had not + * converged again */ + + /* Re-set the counter for the next loop (potentially). */ + scount = redo; + if (scount > 0) { + + /* Climb up the cell hierarchy. */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + + /* Run through this cell's density interactions. */ + for (struct link *l = finger->stars.density; l != NULL; l = l->next) { + +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + + /* Self-interaction? */ + if (l->t->type == task_type_self) + runner_doself_subset_branch_stars_density(r, finger, sparts, sid, + scount); + + /* Otherwise, pair interaction? */ + else if (l->t->type == task_type_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dopair_subset_branch_stars_density( + r, finger, sparts, sid, scount, l->t->cj); + else + runner_dopair_subset_branch_stars_density( + r, finger, sparts, sid, scount, l->t->ci); + } + + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_stars_density(r, finger, sparts, sid, scount, + NULL, 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dosub_subset_stars_density(r, finger, sparts, sid, + scount, l->t->cj, 1); + else + runner_dosub_subset_stars_density(r, finger, sparts, sid, + scount, l->t->ci, 1); + } + } + } + } + } + + if (scount) { + error("Smoothing length failed to converge on %i particles.", scount); + } + + /* Be clean */ + free(left); + free(right); + free(sid); + free(h_0); + } + + /* Update h_max */ + c->stars.h_max = h_max; + + /* The ghost may not always be at the top level. + * Therefore we need to update h_max between the super- and top-levels */ + if (c->stars.ghost) { + for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { + atomic_max_d(&tmp->stars.h_max, h_max); + } + } + + if (timer) TIMER_TOC(timer_do_stars_ghost); +} + +/** + * @brief Intermediate task after the density to check that the smoothing + * lengths are correct. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c, + int timer) { + + struct bpart *restrict bparts = c->black_holes.parts; + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const float black_holes_h_max = e->hydro_properties->h_max; + const float black_holes_h_min = e->hydro_properties->h_min; + const float eps = e->black_holes_properties->h_tolerance; + const float black_holes_eta_dim = + pow_dimension(e->black_holes_properties->eta_neighbours); + const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; + int redo = 0, bcount = 0; + + /* Running value of the maximal smoothing length */ + double h_max = c->black_holes.h_max; + + TIMER_TIC; + + /* Anything to do here? */ + if (c->black_holes.count == 0) return; + if (!cell_is_active_black_holes(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_black_holes_density_ghost(r, c->progeny[k], 0); + + /* Update h_max */ + h_max = max(h_max, c->progeny[k]->black_holes.h_max); + } + } + } else { + + /* Init the list of active particles that have to be updated. */ + int *sid = NULL; + float *h_0 = NULL; + float *left = NULL; + float *right = NULL; + if ((sid = (int *)malloc(sizeof(int) * c->black_holes.count)) == NULL) + error("Can't allocate memory for sid."); + if ((h_0 = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) + error("Can't allocate memory for h_0."); + if ((left = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) + error("Can't allocate memory for left."); + if ((right = (float *)malloc(sizeof(float) * c->black_holes.count)) == NULL) + error("Can't allocate memory for right."); + for (int k = 0; k < c->black_holes.count; k++) + if (bpart_is_active(&bparts[k], e)) { + sid[bcount] = k; + h_0[bcount] = bparts[k].h; + left[bcount] = 0.f; + right[bcount] = black_holes_h_max; + ++bcount; + } + + /* While there are particles that need to be updated... */ + for (int num_reruns = 0; bcount > 0 && num_reruns < max_smoothing_iter; + num_reruns++) { + + /* Reset the redo-count. */ + redo = 0; + + /* Loop over the remaining active parts in this cell. */ + for (int i = 0; i < bcount; i++) { + + /* Get a direct pointer on the part. */ + struct bpart *bp = &bparts[sid[i]]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Is this part within the timestep? */ + if (!bpart_is_active(bp, e)) + error("Ghost applied to inactive particle"); +#endif + + /* Get some useful values */ + const float h_init = h_0[i]; + const float h_old = bp->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + + float h_new; + int has_no_neighbours = 0; + + if (bp->density.wcount == 0.f) { /* No neighbours case */ + + /* Flag that there were no neighbours */ + has_no_neighbours = 1; + + /* Double h and try again */ + h_new = 2.f * h_old; + + } else { + + /* Finish the density calculation */ + black_holes_end_density(bp, cosmo); + + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = bp->density.wcount * h_old_dim; + const float n_target = black_holes_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + bp->density.wcount_dh * h_old_dim + + hydro_dimension * bp->density.wcount * h_old_dim_minus_one; + + /* Improve the bisection bounds */ + if (n_sum < n_target) + left[i] = max(left[i], h_old); + else if (n_sum > n_target) + right[i] = min(right[i], h_old); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check the validity of the left and right bounds */ + if (left[i] > right[i]) + error("Invalid left (%e) and right (%e)", left[i], right[i]); +#endif + + /* Skip if h is already h_max and we don't have enough neighbours + */ + /* Same if we are below h_min */ + if (((bp->h >= black_holes_h_max) && (f < 0.f)) || + ((bp->h <= black_holes_h_min) && (f > 0.f))) { + + black_holes_reset_feedback(bp); + + /* Ok, we are done with this particle */ + continue; + } + + /* Normal case: Use Newton-Raphson to get a better value of h */ + + /* Avoid floating point exception from f_prime = 0 */ + h_new = h_old - f / (f_prime + FLT_MIN); + + /* Be verbose about the particles that struggle to converge */ + if (num_reruns > max_smoothing_iter - 10) { + + message( + "Smoothing length convergence problem: iter=%d p->id=%lld " + "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " + "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", + num_reruns, bp->id, h_init, h_old, h_new, f, f_prime, n_sum, + n_target, left[i], right[i]); + } + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + + /* Verify that we are actually progrssing towards the answer */ + h_new = max(h_new, left[i]); + h_new = min(h_new, right[i]); + } + + /* Check whether the particle has an inappropriate smoothing length + */ + if (fabsf(h_new - h_old) > eps * h_old) { + + /* Ok, correct then */ + + /* Case where we have been oscillating around the solution */ + if ((h_new == left[i] && h_old == right[i]) || + (h_old == left[i] && h_new == right[i])) { + + /* Bissect the remaining interval */ + bp->h = pow_inv_dimension( + 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); + + } else { + + /* Normal case */ + bp->h = h_new; + } + + /* If below the absolute maximum, try again */ + if (bp->h < black_holes_h_max && bp->h > black_holes_h_min) { + + /* Flag for another round of fun */ + sid[redo] = sid[i]; + h_0[redo] = h_0[i]; + left[redo] = left[i]; + right[redo] = right[i]; + redo += 1; + + /* Re-initialise everything */ + black_holes_init_bpart(bp); + + /* Off we go ! */ + continue; + + } else if (bp->h <= black_holes_h_min) { + + /* Ok, this particle is a lost cause... */ + bp->h = black_holes_h_min; + + } else if (bp->h >= black_holes_h_max) { + + /* Ok, this particle is a lost cause... */ + bp->h = black_holes_h_max; + + /* Do some damage control if no neighbours at all were found */ + if (has_no_neighbours) { + black_holes_bpart_has_no_neighbours(bp, cosmo); + } + + } else { + error( + "Fundamental problem with the smoothing length iteration " + "logic."); + } + } + + /* We now have a particle whose smoothing length has converged */ + + black_holes_reset_feedback(bp); + + /* Check if h_max has increased */ + h_max = max(h_max, bp->h); + } + + /* We now need to treat the particles whose smoothing length had not + * converged again */ + + /* Re-set the counter for the next loop (potentially). */ + bcount = redo; + if (bcount > 0) { + + /* Climb up the cell hierarchy. */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + + /* Run through this cell's density interactions. */ + for (struct link *l = finger->black_holes.density; l != NULL; + l = l->next) { + +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + + /* Self-interaction? */ + if (l->t->type == task_type_self) + runner_doself_subset_branch_bh_density(r, finger, bparts, sid, + bcount); + + /* Otherwise, pair interaction? */ + else if (l->t->type == task_type_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dopair_subset_branch_bh_density(r, finger, bparts, sid, + bcount, l->t->cj); + else + runner_dopair_subset_branch_bh_density(r, finger, bparts, sid, + bcount, l->t->ci); + } + + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, + NULL, 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, + l->t->cj, 1); + else + runner_dosub_subset_bh_density(r, finger, bparts, sid, bcount, + l->t->ci, 1); + } + } + } + } + } + + if (bcount) { + error("Smoothing length failed to converge on %i particles.", bcount); + } + + /* Be clean */ + free(left); + free(right); + free(sid); + free(h_0); + } + + /* Update h_max */ + c->black_holes.h_max = h_max; + + /* The ghost may not always be at the top level. + * Therefore we need to update h_max between the super- and top-levels */ + if (c->black_holes.density_ghost) { + for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { + atomic_max_d(&tmp->black_holes.h_max, h_max); + } + } + + if (timer) TIMER_TOC(timer_do_black_holes_ghost); +} + +/** + * @brief Intermediate task after the BHs have done their swallowing step. + * This is used to update the BH quantities if necessary. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c, + int timer) { + + struct bpart *restrict bparts = c->black_holes.parts; + const int count = c->black_holes.count; + const struct engine *e = r->e; + const int with_cosmology = e->policy & engine_policy_cosmology; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + runner_do_black_holes_swallow_ghost(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int i = 0; i < count; i++) { + + /* Get a direct pointer on the part. */ + struct bpart *bp = &bparts[i]; + + if (bpart_is_active(bp, e)) { + + /* Compute the final operations for repositioning of this BH */ + black_holes_end_reposition(bp, e->black_holes_properties, + e->physical_constants, e->cosmology); + + /* Get particle time-step */ + double dt; + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(bp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(e->ti_current - 1, bp->time_bin); + + dt = cosmology_get_delta_time(e->cosmology, ti_begin, + ti_begin + ti_step); + } else { + dt = get_timestep(bp->time_bin, e->time_base); + } + + /* Compute variables required for the feedback loop */ + black_holes_prepare_feedback(bp, e->black_holes_properties, + e->physical_constants, e->cosmology, dt); + } + } + } + + if (timer) TIMER_TOC(timer_do_black_holes_ghost); +} + +/** + * @brief Intermediate task after the gradient loop that does final operations + * on the gradient quantities and optionally slope limits the gradients + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { + +#ifdef EXTRA_HYDRO_LOOP + + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const double time_base = e->time_base; + const struct cosmology *cosmo = e->cosmology; + const struct hydro_props *hydro_props = e->hydro_properties; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int i = 0; i < count; i++) { + + /* Get a direct pointer on the part. */ + struct part *restrict p = &parts[i]; + struct xpart *restrict xp = &xparts[i]; + + if (part_is_active(p, e)) { + + /* Finish the gradient calculation */ + hydro_end_gradient(p); + + /* As of here, particle force variables will be set. */ + + /* Calculate the time-step for passing to hydro_prepare_force. + * This is the physical time between the start and end of the time-step + * without any scale-factor powers. */ + double dt_alpha; + + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_alpha = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + + /* Compute variables required for the force loop */ + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); + + /* The particle force values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the force loop over neighbours */ + hydro_reset_acceleration(p); + } + } + } + + if (timer) TIMER_TOC(timer_do_extra_ghost); + +#else + error("SWIFT was not compiled with the extra hydro loop activated."); +#endif +} + +/** + * @brief Intermediate task after the density to check that the smoothing + * lengths are correct. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_ghost(struct runner *r, struct cell *c, int timer) { + + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const struct engine *e = r->e; + const struct space *s = e->s; + const struct hydro_space *hs = &s->hs; + const struct cosmology *cosmo = e->cosmology; + const struct chemistry_global_data *chemistry = e->chemistry; + + const int with_cosmology = (e->policy & engine_policy_cosmology); + + const float hydro_h_max = e->hydro_properties->h_max; + const float hydro_h_min = e->hydro_properties->h_min; + const float eps = e->hydro_properties->h_tolerance; + const float hydro_eta_dim = + pow_dimension(e->hydro_properties->eta_neighbours); + const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations; + int redo = 0, count = 0; + + /* Running value of the maximal smoothing length */ + double h_max = c->hydro.h_max; + + TIMER_TIC; + + /* Anything to do here? */ + if (c->hydro.count == 0) return; + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_ghost(r, c->progeny[k], 0); + + /* Update h_max */ + h_max = max(h_max, c->progeny[k]->hydro.h_max); + } + } + } else { + + /* Init the list of active particles that have to be updated and their + * current smoothing lengths. */ + int *pid = NULL; + float *h_0 = NULL; + float *left = NULL; + float *right = NULL; + if ((pid = (int *)malloc(sizeof(int) * c->hydro.count)) == NULL) + error("Can't allocate memory for pid."); + if ((h_0 = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) + error("Can't allocate memory for h_0."); + if ((left = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) + error("Can't allocate memory for left."); + if ((right = (float *)malloc(sizeof(float) * c->hydro.count)) == NULL) + error("Can't allocate memory for right."); + for (int k = 0; k < c->hydro.count; k++) + if (part_is_active(&parts[k], e)) { + pid[count] = k; + h_0[count] = parts[k].h; + left[count] = 0.f; + right[count] = hydro_h_max; + ++count; + } + + /* While there are particles that need to be updated... */ + for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter; + num_reruns++) { + + /* Reset the redo-count. */ + redo = 0; + + /* Loop over the remaining active parts in this cell. */ + for (int i = 0; i < count; i++) { + + /* Get a direct pointer on the part. */ + struct part *p = &parts[pid[i]]; + struct xpart *xp = &xparts[pid[i]]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Is this part within the timestep? */ + if (!part_is_active(p, e)) error("Ghost applied to inactive particle"); +#endif + + /* Get some useful values */ + const float h_init = h_0[i]; + const float h_old = p->h; + const float h_old_dim = pow_dimension(h_old); + const float h_old_dim_minus_one = pow_dimension_minus_one(h_old); + + float h_new; + int has_no_neighbours = 0; + + if (p->density.wcount == 0.f) { /* No neighbours case */ + + /* Flag that there were no neighbours */ + has_no_neighbours = 1; + + /* Double h and try again */ + h_new = 2.f * h_old; + + } else { + + /* Finish the density calculation */ + hydro_end_density(p, cosmo); + chemistry_end_density(p, chemistry, cosmo); + pressure_floor_end_density(p, cosmo); + + /* Compute one step of the Newton-Raphson scheme */ + const float n_sum = p->density.wcount * h_old_dim; + const float n_target = hydro_eta_dim; + const float f = n_sum - n_target; + const float f_prime = + p->density.wcount_dh * h_old_dim + + hydro_dimension * p->density.wcount * h_old_dim_minus_one; + + /* Improve the bisection bounds */ + if (n_sum < n_target) + left[i] = max(left[i], h_old); + else if (n_sum > n_target) + right[i] = min(right[i], h_old); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check the validity of the left and right bounds */ + if (left[i] > right[i]) + error("Invalid left (%e) and right (%e)", left[i], right[i]); +#endif + + /* Skip if h is already h_max and we don't have enough neighbours */ + /* Same if we are below h_min */ + if (((p->h >= hydro_h_max) && (f < 0.f)) || + ((p->h <= hydro_h_min) && (f > 0.f))) { + + /* We have a particle whose smoothing length is already set (wants + * to be larger but has already hit the maximum OR wants to be + * smaller but has already reached the minimum). So, just tidy up + * as if the smoothing length had converged correctly */ + +#ifdef EXTRA_HYDRO_LOOP + + /* As of here, particle gradient variables will be set. */ + /* The force variables are set in the extra ghost. */ + + /* Compute variables required for the gradient loop */ + hydro_prepare_gradient(p, xp, cosmo); + + /* The particle gradient values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the gradient loop over neighbours + */ + hydro_reset_gradient(p); + +#else + const struct hydro_props *hydro_props = e->hydro_properties; + + /* Calculate the time-step for passing to hydro_prepare_force, used + * for the evolution of alpha factors (i.e. those involved in the + * artificial viscosity and thermal conduction terms) */ + const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; + double dt_alpha; + + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_alpha = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + + /* As of here, particle force variables will be set. */ + + /* Compute variables required for the force loop */ + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); + + /* The particle force values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the force loop over neighbours */ + hydro_reset_acceleration(p); + +#endif /* EXTRA_HYDRO_LOOP */ + + /* Ok, we are done with this particle */ + continue; + } + + /* Normal case: Use Newton-Raphson to get a better value of h */ + + /* Avoid floating point exception from f_prime = 0 */ + h_new = h_old - f / (f_prime + FLT_MIN); + + /* Be verbose about the particles that struggle to converge */ + if (num_reruns > max_smoothing_iter - 10) { + + message( + "Smoothing length convergence problem: iter=%d p->id=%lld " + "h_init=%12.8e h_old=%12.8e h_new=%12.8e f=%f f_prime=%f " + "n_sum=%12.8e n_target=%12.8e left=%12.8e right=%12.8e", + num_reruns, p->id, h_init, h_old, h_new, f, f_prime, n_sum, + n_target, left[i], right[i]); + } + +#ifdef SWIFT_DEBUG_CHECKS + if ((f > 0.f && h_new > h_old) || (f < 0.f && h_new < h_old)) + error( + "Smoothing length correction not going in the right direction"); +#endif + + /* Safety check: truncate to the range [ h_old/2 , 2h_old ]. */ + h_new = min(h_new, 2.f * h_old); + h_new = max(h_new, 0.5f * h_old); + + /* Verify that we are actually progrssing towards the answer */ + h_new = max(h_new, left[i]); + h_new = min(h_new, right[i]); + } + + /* Check whether the particle has an inappropriate smoothing length + */ + if (fabsf(h_new - h_old) > eps * h_old) { + + /* Ok, correct then */ + + /* Case where we have been oscillating around the solution */ + if ((h_new == left[i] && h_old == right[i]) || + (h_old == left[i] && h_new == right[i])) { + + /* Bissect the remaining interval */ + p->h = pow_inv_dimension( + 0.5f * (pow_dimension(left[i]) + pow_dimension(right[i]))); + + } else { + + /* Normal case */ + p->h = h_new; + } + + /* If within the allowed range, try again */ + if (p->h < hydro_h_max && p->h > hydro_h_min) { + + /* Flag for another round of fun */ + pid[redo] = pid[i]; + h_0[redo] = h_0[i]; + left[redo] = left[i]; + right[redo] = right[i]; + redo += 1; + + /* Re-initialise everything */ + hydro_init_part(p, hs); + chemistry_init_part(p, chemistry); + pressure_floor_init_part(p, xp); + tracers_after_init(p, xp, e->internal_units, e->physical_constants, + with_cosmology, e->cosmology, + e->hydro_properties, e->cooling_func, e->time); + + /* Off we go ! */ + continue; + + } else if (p->h <= hydro_h_min) { + + /* Ok, this particle is a lost cause... */ + p->h = hydro_h_min; + + } else if (p->h >= hydro_h_max) { + + /* Ok, this particle is a lost cause... */ + p->h = hydro_h_max; + + /* Do some damage control if no neighbours at all were found */ + if (has_no_neighbours) { + hydro_part_has_no_neighbours(p, xp, cosmo); + chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo); + pressure_floor_part_has_no_neighbours(p, xp, cosmo); + } + + } else { + error( + "Fundamental problem with the smoothing length iteration " + "logic."); + } + } + + /* We now have a particle whose smoothing length has converged */ + + /* Check if h_max is increased */ + h_max = max(h_max, p->h); + +#ifdef EXTRA_HYDRO_LOOP + + /* As of here, particle gradient variables will be set. */ + /* The force variables are set in the extra ghost. */ + + /* Compute variables required for the gradient loop */ + hydro_prepare_gradient(p, xp, cosmo); + + /* The particle gradient values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the gradient loop over neighbours */ + hydro_reset_gradient(p); + +#else + const struct hydro_props *hydro_props = e->hydro_properties; + + /* Calculate the time-step for passing to hydro_prepare_force, used + * for the evolution of alpha factors (i.e. those involved in the + * artificial viscosity and thermal conduction terms) */ + const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; + double dt_alpha; + + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_alpha = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + } else { + dt_alpha = get_timestep(p->time_bin, time_base); + } + + /* As of here, particle force variables will be set. */ + + /* Compute variables required for the force loop */ + hydro_prepare_force(p, xp, cosmo, hydro_props, dt_alpha); + + /* The particle force values are now set. Do _NOT_ + try to read any particle density variables! */ + + /* Prepare the particle for the force loop over neighbours */ + hydro_reset_acceleration(p); + +#endif /* EXTRA_HYDRO_LOOP */ + } + + /* We now need to treat the particles whose smoothing length had not + * converged again */ + + /* Re-set the counter for the next loop (potentially). */ + count = redo; + if (count > 0) { + + /* Climb up the cell hierarchy. */ + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { + + /* Run through this cell's density interactions. */ + for (struct link *l = finger->hydro.density; l != NULL; l = l->next) { + +#ifdef SWIFT_DEBUG_CHECKS + if (l->t->ti_run < r->e->ti_current) + error("Density task should have been run."); +#endif + + /* Self-interaction? */ + if (l->t->type == task_type_self) + runner_doself_subset_branch_density(r, finger, parts, pid, count); + + /* Otherwise, pair interaction? */ + else if (l->t->type == task_type_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dopair_subset_branch_density(r, finger, parts, pid, + count, l->t->cj); + else + runner_dopair_subset_branch_density(r, finger, parts, pid, + count, l->t->ci); + } + + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_density(r, finger, parts, pid, count, NULL, + 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { + + /* Left or right? */ + if (l->t->ci == finger) + runner_dosub_subset_density(r, finger, parts, pid, count, + l->t->cj, 1); + else + runner_dosub_subset_density(r, finger, parts, pid, count, + l->t->ci, 1); + } + } + } + } + } + + if (count) { + error("Smoothing length failed to converge on %i particles.", count); + } + + /* Be clean */ + free(left); + free(right); + free(pid); + free(h_0); + } + + /* Update h_max */ + c->hydro.h_max = h_max; + + /* The ghost may not always be at the top level. + * Therefore we need to update h_max between the super- and top-levels */ + if (c->hydro.ghost) { + for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { + atomic_max_d(&tmp->hydro.h_max, h_max); + } + } + + if (timer) TIMER_TOC(timer_do_ghost); +} diff --git a/src/runner_main.c b/src/runner_main.c new file mode 100644 index 0000000000000000000000000000000000000000..a674b64ae671bf33df0b5ba9eaa951097d738ba9 --- /dev/null +++ b/src/runner_main.c @@ -0,0 +1,495 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "engine.h" +#include "scheduler.h" +#include "space_getsid.h" +#include "timers.h" + +/* Import the gravity loop functions. */ +#include "runner_doiact_grav.h" + +/* Import the density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the gradient loop functions (if required). */ +#ifdef EXTRA_HYDRO_LOOP +#define FUNCTION gradient +#define FUNCTION_TASK_LOOP TASK_LOOP_GRADIENT +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP +#endif + +/* Import the force loop functions. */ +#define FUNCTION force +#define FUNCTION_TASK_LOOP TASK_LOOP_FORCE +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the limiter loop functions. */ +#define FUNCTION limiter +#define FUNCTION_TASK_LOOP TASK_LOOP_LIMITER +#include "runner_doiact_hydro.h" +#undef FUNCTION +#undef FUNCTION_TASK_LOOP + +/* Import the stars density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the stars feedback loop functions. */ +#define FUNCTION feedback +#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK +#include "runner_doiact_stars.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole density loop functions. */ +#define FUNCTION density +#define FUNCTION_TASK_LOOP TASK_LOOP_DENSITY +#include "runner_doiact_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole feedback loop functions. */ +#define FUNCTION swallow +#define FUNCTION_TASK_LOOP TASK_LOOP_SWALLOW +#include "runner_doiact_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/* Import the black hole feedback loop functions. */ +#define FUNCTION feedback +#define FUNCTION_TASK_LOOP TASK_LOOP_FEEDBACK +#include "runner_doiact_black_holes.h" +#undef FUNCTION_TASK_LOOP +#undef FUNCTION + +/** + * @brief The #runner main thread routine. + * + * @param data A pointer to this thread's data. + */ +void *runner_main(void *data) { + + struct runner *r = (struct runner *)data; + struct engine *e = r->e; + struct scheduler *sched = &e->sched; + unsigned int seed = r->id; + pthread_setspecific(sched->local_seed_pointer, &seed); + /* Main loop. */ + while (1) { + + /* Wait at the barrier. */ + engine_barrier(e); + + /* Can we go home yet? */ + if (e->step_props & engine_step_prop_done) break; + + /* Re-set the pointer to the previous task, as there is none. */ + struct task *t = NULL; + struct task *prev = NULL; + + /* Loop while there are tasks... */ + while (1) { + + /* If there's no old task, try to get a new one. */ + if (t == NULL) { + + /* Get the task. */ + TIMER_TIC + t = scheduler_gettask(sched, r->qid, prev); + TIMER_TOC(timer_gettask); + + /* Did I get anything? */ + if (t == NULL) break; + } + + /* Get the cells. */ + struct cell *ci = t->ci; + struct cell *cj = t->cj; + +#ifdef SWIFT_DEBUG_TASKS + /* Mark the thread we run on */ + t->rid = r->cpuid; + + /* And recover the pair direction */ + if (t->type == task_type_pair || t->type == task_type_sub_pair) { + struct cell *ci_temp = ci; + struct cell *cj_temp = cj; + double shift[3]; + t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift); + } else { + t->sid = -1; + } +#endif + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we haven't scheduled an inactive task */ + t->ti_run = e->ti_current; + /* Store the task that will be running (for debugging only) */ + r->t = t; +#endif + + /* Different types of tasks... */ + switch (t->type) { + case task_type_self: + if (t->subtype == task_subtype_density) + runner_doself1_branch_density(r, ci); +#ifdef EXTRA_HYDRO_LOOP + else if (t->subtype == task_subtype_gradient) + runner_doself1_branch_gradient(r, ci); +#endif + else if (t->subtype == task_subtype_force) + runner_doself2_branch_force(r, ci); + else if (t->subtype == task_subtype_limiter) + runner_doself2_branch_limiter(r, ci); + else if (t->subtype == task_subtype_grav) + runner_doself_recursive_grav(r, ci, 1); + else if (t->subtype == task_subtype_external_grav) + runner_do_grav_external(r, ci, 1); + else if (t->subtype == task_subtype_stars_density) + runner_doself_branch_stars_density(r, ci); + else if (t->subtype == task_subtype_stars_feedback) + runner_doself_branch_stars_feedback(r, ci); + else if (t->subtype == task_subtype_bh_density) + runner_doself_branch_bh_density(r, ci); + else if (t->subtype == task_subtype_bh_swallow) + runner_doself_branch_bh_swallow(r, ci); + else if (t->subtype == task_subtype_do_gas_swallow) + runner_do_gas_swallow_self(r, ci, 1); + else if (t->subtype == task_subtype_do_bh_swallow) + runner_do_bh_swallow_self(r, ci, 1); + else if (t->subtype == task_subtype_bh_feedback) + runner_doself_branch_bh_feedback(r, ci); + else + error("Unknown/invalid task subtype (%s).", + subtaskID_names[t->subtype]); + break; + + case task_type_pair: + if (t->subtype == task_subtype_density) + runner_dopair1_branch_density(r, ci, cj); +#ifdef EXTRA_HYDRO_LOOP + else if (t->subtype == task_subtype_gradient) + runner_dopair1_branch_gradient(r, ci, cj); +#endif + else if (t->subtype == task_subtype_force) + runner_dopair2_branch_force(r, ci, cj); + else if (t->subtype == task_subtype_limiter) + runner_dopair2_branch_limiter(r, ci, cj); + else if (t->subtype == task_subtype_grav) + runner_dopair_recursive_grav(r, ci, cj, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dopair_branch_stars_density(r, ci, cj); + else if (t->subtype == task_subtype_stars_feedback) + runner_dopair_branch_stars_feedback(r, ci, cj); + else if (t->subtype == task_subtype_bh_density) + runner_dopair_branch_bh_density(r, ci, cj); + else if (t->subtype == task_subtype_bh_swallow) + runner_dopair_branch_bh_swallow(r, ci, cj); + else if (t->subtype == task_subtype_do_gas_swallow) + runner_do_gas_swallow_pair(r, ci, cj, 1); + else if (t->subtype == task_subtype_do_bh_swallow) + runner_do_bh_swallow_pair(r, ci, cj, 1); + else if (t->subtype == task_subtype_bh_feedback) + runner_dopair_branch_bh_feedback(r, ci, cj); + else + error("Unknown/invalid task subtype (%s/%s).", + taskID_names[t->type], subtaskID_names[t->subtype]); + break; + + case task_type_sub_self: + if (t->subtype == task_subtype_density) + runner_dosub_self1_density(r, ci, 1); +#ifdef EXTRA_HYDRO_LOOP + else if (t->subtype == task_subtype_gradient) + runner_dosub_self1_gradient(r, ci, 1); +#endif + else if (t->subtype == task_subtype_force) + runner_dosub_self2_force(r, ci, 1); + else if (t->subtype == task_subtype_limiter) + runner_dosub_self2_limiter(r, ci, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dosub_self_stars_density(r, ci, 1); + else if (t->subtype == task_subtype_stars_feedback) + runner_dosub_self_stars_feedback(r, ci, 1); + else if (t->subtype == task_subtype_bh_density) + runner_dosub_self_bh_density(r, ci, 1); + else if (t->subtype == task_subtype_bh_swallow) + runner_dosub_self_bh_swallow(r, ci, 1); + else if (t->subtype == task_subtype_do_gas_swallow) + runner_do_gas_swallow_self(r, ci, 1); + else if (t->subtype == task_subtype_do_bh_swallow) + runner_do_bh_swallow_self(r, ci, 1); + else if (t->subtype == task_subtype_bh_feedback) + runner_dosub_self_bh_feedback(r, ci, 1); + else + error("Unknown/invalid task subtype (%s/%s).", + taskID_names[t->type], subtaskID_names[t->subtype]); + break; + + case task_type_sub_pair: + if (t->subtype == task_subtype_density) + runner_dosub_pair1_density(r, ci, cj, 1); +#ifdef EXTRA_HYDRO_LOOP + else if (t->subtype == task_subtype_gradient) + runner_dosub_pair1_gradient(r, ci, cj, 1); +#endif + else if (t->subtype == task_subtype_force) + runner_dosub_pair2_force(r, ci, cj, 1); + else if (t->subtype == task_subtype_limiter) + runner_dosub_pair2_limiter(r, ci, cj, 1); + else if (t->subtype == task_subtype_stars_density) + runner_dosub_pair_stars_density(r, ci, cj, 1); + else if (t->subtype == task_subtype_stars_feedback) + runner_dosub_pair_stars_feedback(r, ci, cj, 1); + else if (t->subtype == task_subtype_bh_density) + runner_dosub_pair_bh_density(r, ci, cj, 1); + else if (t->subtype == task_subtype_bh_swallow) + runner_dosub_pair_bh_swallow(r, ci, cj, 1); + else if (t->subtype == task_subtype_do_gas_swallow) + runner_do_gas_swallow_pair(r, ci, cj, 1); + else if (t->subtype == task_subtype_do_bh_swallow) + runner_do_bh_swallow_pair(r, ci, cj, 1); + else if (t->subtype == task_subtype_bh_feedback) + runner_dosub_pair_bh_feedback(r, ci, cj, 1); + else + error("Unknown/invalid task subtype (%s/%s).", + taskID_names[t->type], subtaskID_names[t->subtype]); + break; + + case task_type_sort: + /* Cleanup only if any of the indices went stale. */ + runner_do_hydro_sort( + r, ci, t->flags, + ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin, 1); + /* Reset the sort flags as our work here is done. */ + t->flags = 0; + break; + case task_type_stars_sort: + /* Cleanup only if any of the indices went stale. */ + runner_do_stars_sort( + r, ci, t->flags, + ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin, 1); + /* Reset the sort flags as our work here is done. */ + t->flags = 0; + break; + case task_type_init_grav: + runner_do_init_grav(r, ci, 1); + break; + case task_type_ghost: + runner_do_ghost(r, ci, 1); + break; +#ifdef EXTRA_HYDRO_LOOP + case task_type_extra_ghost: + runner_do_extra_ghost(r, ci, 1); + break; +#endif + case task_type_stars_ghost: + runner_do_stars_ghost(r, ci, 1); + break; + case task_type_bh_density_ghost: + runner_do_black_holes_density_ghost(r, ci, 1); + break; + case task_type_bh_swallow_ghost3: + runner_do_black_holes_swallow_ghost(r, ci, 1); + break; + case task_type_drift_part: + runner_do_drift_part(r, ci, 1); + break; + case task_type_drift_spart: + runner_do_drift_spart(r, ci, 1); + break; + case task_type_drift_bpart: + runner_do_drift_bpart(r, ci, 1); + break; + case task_type_drift_gpart: + runner_do_drift_gpart(r, ci, 1); + break; + case task_type_kick1: + runner_do_kick1(r, ci, 1); + break; + case task_type_kick2: + runner_do_kick2(r, ci, 1); + break; + case task_type_end_hydro_force: + runner_do_end_hydro_force(r, ci, 1); + break; + case task_type_end_grav_force: + runner_do_end_grav_force(r, ci, 1); + break; + case task_type_logger: + runner_do_logger(r, ci, 1); + break; + case task_type_timestep: + runner_do_timestep(r, ci, 1); + break; + case task_type_timestep_limiter: + runner_do_limiter(r, ci, 0, 1); + break; +#ifdef WITH_MPI + case task_type_send: + if (t->subtype == task_subtype_tend_part) { + free(t->buff); + } else if (t->subtype == task_subtype_tend_gpart) { + free(t->buff); + } else if (t->subtype == task_subtype_tend_spart) { + free(t->buff); + } else if (t->subtype == task_subtype_tend_bpart) { + free(t->buff); + } else if (t->subtype == task_subtype_sf_counts) { + free(t->buff); + } else if (t->subtype == task_subtype_part_swallow) { + free(t->buff); + } else if (t->subtype == task_subtype_bpart_merger) { + free(t->buff); + } + break; + case task_type_recv: + if (t->subtype == task_subtype_tend_part) { + cell_unpack_end_step_hydro(ci, (struct pcell_step_hydro *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_tend_gpart) { + cell_unpack_end_step_grav(ci, (struct pcell_step_grav *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_tend_spart) { + cell_unpack_end_step_stars(ci, (struct pcell_step_stars *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_tend_bpart) { + cell_unpack_end_step_black_holes( + ci, (struct pcell_step_black_holes *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_sf_counts) { + cell_unpack_sf_counts(ci, (struct pcell_sf *)t->buff); + cell_clear_stars_sort_flags(ci, /*clear_unused_flags=*/0); + free(t->buff); + } else if (t->subtype == task_subtype_xv) { + runner_do_recv_part(r, ci, 1, 1); + } else if (t->subtype == task_subtype_rho) { + runner_do_recv_part(r, ci, 0, 1); + } else if (t->subtype == task_subtype_gradient) { + runner_do_recv_part(r, ci, 0, 1); + } else if (t->subtype == task_subtype_part_swallow) { + cell_unpack_part_swallow(ci, + (struct black_holes_part_data *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_bpart_merger) { + cell_unpack_bpart_swallow(ci, + (struct black_holes_bpart_data *)t->buff); + free(t->buff); + } else if (t->subtype == task_subtype_limiter) { + runner_do_recv_part(r, ci, 0, 1); + } else if (t->subtype == task_subtype_gpart) { + runner_do_recv_gpart(r, ci, 1); + } else if (t->subtype == task_subtype_spart) { + runner_do_recv_spart(r, ci, 1, 1); + } else if (t->subtype == task_subtype_bpart_rho) { + runner_do_recv_bpart(r, ci, 1, 1); + } else if (t->subtype == task_subtype_bpart_swallow) { + runner_do_recv_bpart(r, ci, 0, 1); + } else if (t->subtype == task_subtype_bpart_feedback) { + runner_do_recv_bpart(r, ci, 0, 1); + } else if (t->subtype == task_subtype_multipole) { + cell_unpack_multipoles(ci, (struct gravity_tensors *)t->buff); + free(t->buff); + } else { + error("Unknown/invalid task subtype (%d).", t->subtype); + } + break; +#endif + case task_type_grav_down: + runner_do_grav_down(r, t->ci, 1); + break; + case task_type_grav_mesh: + runner_do_grav_mesh(r, t->ci, 1); + break; + case task_type_grav_long_range: + runner_do_grav_long_range(r, t->ci, 1); + break; + case task_type_grav_mm: + runner_dopair_grav_mm_progenies(r, t->flags, t->ci, t->cj); + break; + case task_type_cooling: + runner_do_cooling(r, t->ci, 1); + break; + case task_type_star_formation: + runner_do_star_formation(r, t->ci, 1); + break; + case task_type_stars_resort: + runner_do_stars_resort(r, t->ci, 1); + break; + case task_type_fof_self: + runner_do_fof_self(r, t->ci, 1); + break; + case task_type_fof_pair: + runner_do_fof_pair(r, t->ci, t->cj, 1); + break; + default: + error("Unknown/invalid task type (%d).", t->type); + } + +/* Mark that we have run this task on these cells */ +#ifdef SWIFT_DEBUG_CHECKS + if (ci != NULL) { + ci->tasks_executed[t->type]++; + ci->subtasks_executed[t->subtype]++; + } + if (cj != NULL) { + cj->tasks_executed[t->type]++; + cj->subtasks_executed[t->subtype]++; + } + + /* This runner is not doing a task anymore */ + r->t = NULL; +#endif + + /* We're done with this task, see if we get a next one. */ + prev = t; + t = scheduler_done(sched, t); + + } /* main loop. */ + } + + /* Be kind, rewind. */ + return NULL; +} diff --git a/src/runner_others.c b/src/runner_others.c new file mode 100644 index 0000000000000000000000000000000000000000..5ffaf7aa321f658b6e0e7e10a9cb8ad2f4a5a541 --- /dev/null +++ b/src/runner_others.c @@ -0,0 +1,660 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <float.h> +#include <limits.h> +#include <stdlib.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "chemistry.h" +#include "cooling.h" +#include "engine.h" +#include "error.h" +#include "gravity.h" +#include "hydro.h" +#include "logger.h" +#include "pressure_floor.h" +#include "space.h" +#include "star_formation.h" +#include "star_formation_logger.h" +#include "stars.h" +#include "timers.h" +#include "tracers.h" + +/** + * @brief Calculate gravity acceleration from external potential + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { + + struct gpart *restrict gparts = c->grav.parts; + const int gcount = c->grav.count; + const struct engine *e = r->e; + const struct external_potential *potential = e->external_potential; + const struct phys_const *constants = e->physical_constants; + const double time = r->e->time; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0); + } else { + + /* Loop over the gparts in this cell. */ + for (int i = 0; i < gcount; i++) { + + /* Get a direct pointer on the part. */ + struct gpart *restrict gp = &gparts[i]; + + /* Is this part within the time step? */ + if (gpart_is_active(gp, e)) { + external_gravity_acceleration(time, potential, constants, gp); + } + } + } + + if (timer) TIMER_TOC(timer_dograv_external); +} + +/** + * @brief Calculate gravity accelerations from the periodic mesh + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) { + + struct gpart *restrict gparts = c->grav.parts; + const int gcount = c->grav.count; + const struct engine *e = r->e; + +#ifdef SWIFT_DEBUG_CHECKS + if (!e->s->periodic) error("Calling mesh forces in non-periodic mode."); +#endif + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_grav_mesh(r, c->progeny[k], 0); + } else { + + /* Get the forces from the gravity mesh */ + pm_mesh_interpolate_forces(e->mesh, e, gparts, gcount); + } + + if (timer) TIMER_TOC(timer_dograv_mesh); +} + +/** + * @brief Calculate change in thermal state of particles induced + * by radiative cooling and heating. + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_cooling(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const struct cooling_function_data *cooling_func = e->cooling_func; + const struct phys_const *constants = e->physical_constants; + const struct unit_system *us = e->internal_units; + const struct hydro_props *hydro_props = e->hydro_properties; + const struct entropy_floor_properties *entropy_floor_props = e->entropy_floor; + const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_cooling(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int i = 0; i < count; i++) { + + /* Get a direct pointer on the part. */ + struct part *restrict p = &parts[i]; + struct xpart *restrict xp = &xparts[i]; + + if (part_is_active(p, e)) { + + double dt_cool, dt_therm; + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_cool = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + dt_therm = cosmology_get_therm_kick_factor(e->cosmology, ti_begin, + ti_begin + ti_step); + + } else { + dt_cool = get_timestep(p->time_bin, time_base); + dt_therm = get_timestep(p->time_bin, time_base); + } + + /* Let's cool ! */ + cooling_cool_part(constants, us, cosmo, hydro_props, + entropy_floor_props, cooling_func, p, xp, dt_cool, + dt_therm); + } + } + } + + if (timer) TIMER_TOC(timer_do_cooling); +} + +/** + * + */ +void runner_do_star_formation(struct runner *r, struct cell *c, int timer) { + + struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const struct star_formation *sf_props = e->star_formation; + const struct phys_const *phys_const = e->physical_constants; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const int with_feedback = (e->policy & engine_policy_feedback); + const struct hydro_props *restrict hydro_props = e->hydro_properties; + const struct unit_system *restrict us = e->internal_units; + struct cooling_function_data *restrict cooling = e->cooling_func; + const struct entropy_floor_properties *entropy_floor = e->entropy_floor; + const double time_base = e->time_base; + const integertime_t ti_current = e->ti_current; + const int current_stars_count = c->stars.count; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != e->nodeID) + error("Running star formation task on a foreign node!"); +#endif + + /* Anything to do here? */ + if (c->hydro.count == 0 || !cell_is_active_hydro(c, e)) { + star_formation_logger_log_inactive_cell(&c->stars.sfh); + return; + } + + /* Reset the SFR */ + star_formation_logger_init(&c->stars.sfh); + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + /* Load the child cell */ + struct cell *restrict cp = c->progeny[k]; + + /* Do the recursion */ + runner_do_star_formation(r, cp, 0); + + /* Update current cell using child cells */ + star_formation_logger_add(&c->stars.sfh, &cp->stars.sfh); + } + } else { + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* Only work on active particles */ + if (part_is_active(p, e)) { + + /* Is this particle star forming? */ + if (star_formation_is_star_forming(p, xp, sf_props, phys_const, cosmo, + hydro_props, us, cooling, + entropy_floor)) { + + /* Time-step size for this particle */ + double dt_star; + if (with_cosmology) { + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current - 1, p->time_bin); + + dt_star = + cosmology_get_delta_time(cosmo, ti_begin, ti_begin + ti_step); + + } else { + dt_star = get_timestep(p->time_bin, time_base); + } + + /* Compute the SF rate of the particle */ + star_formation_compute_SFR(p, xp, sf_props, phys_const, cosmo, + dt_star); + + /* Add the SFR and SFR*dt to the SFH struct of this cell */ + star_formation_logger_log_active_part(p, xp, &c->stars.sfh, dt_star); + + /* Are we forming a star particle from this SF rate? */ + if (star_formation_should_convert_to_star(p, xp, sf_props, e, + dt_star)) { + + /* Convert the gas particle to a star particle */ + struct spart *sp = cell_convert_part_to_spart(e, c, p, xp); + + /* Did we get a star? (Or did we run out of spare ones?) */ + if (sp != NULL) { + + /* message("We formed a star id=%lld cellID=%d", sp->id, + * c->cellID); */ + + /* Copy the properties of the gas particle to the star particle */ + star_formation_copy_properties(p, xp, sp, e, sf_props, cosmo, + with_cosmology, phys_const, + hydro_props, us, cooling); + + /* Update the Star formation history */ + star_formation_logger_log_new_spart(sp, &c->stars.sfh); + } + } + + } else { /* Are we not star-forming? */ + + /* Update the particle to flag it as not star-forming */ + star_formation_update_part_not_SFR(p, xp, e, sf_props, + with_cosmology); + + } /* Not Star-forming? */ + + } else { /* is active? */ + + /* Check if the particle is not inhibited */ + if (!part_is_inhibited(p, e)) { + star_formation_logger_log_inactive_part(p, xp, &c->stars.sfh); + } + } + } /* Loop over particles */ + } + + /* If we formed any stars, the star sorts are now invalid. We need to + * re-compute them. */ + if (with_feedback && (c == c->top) && + (current_stars_count != c->stars.count)) { + cell_set_star_resort_flag(c); + } + + if (timer) TIMER_TOC(timer_do_star_formation); +} + +/** + * @brief End the hydro force calculation of all active particles in a cell + * by multiplying the acccelerations by the relevant constants + * + * @param r The #runner thread. + * @param c The #cell. + * @param timer Are we timing this ? + */ +void runner_do_end_hydro_force(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_end_hydro_force(r, c->progeny[k], 0); + } else { + + const struct cosmology *cosmo = e->cosmology; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + + if (part_is_active(p, e)) { + + /* Finish the force loop */ + hydro_end_force(p, cosmo); + chemistry_end_force(p, cosmo); + +#ifdef SWIFT_BOUNDARY_PARTICLES + + /* Get the ID of the part */ + const long long id = p->id; + + /* Cancel hdyro forces of these particles */ + if (id < SWIFT_BOUNDARY_PARTICLES) { + + /* Don't move ! */ + hydro_reset_acceleration(p); + +#if defined(GIZMO_MFV_SPH) || defined(GIZMO_MFM_SPH) + + /* Some values need to be reset in the Gizmo case. */ + hydro_prepare_force(p, &c->hydro.xparts[k], cosmo, + e->hydro_properties, 0); +#endif + } +#endif + } + } + } + + if (timer) TIMER_TOC(timer_end_hydro_force); +} + +/** + * @brief End the gravity force calculation of all active particles in a cell + * by multiplying the acccelerations by the relevant constants + * + * @param r The #runner thread. + * @param c The #cell. + * @param timer Are we timing this ? + */ +void runner_do_end_grav_force(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_end_grav_force(r, c->progeny[k], 0); + } else { + + const struct space *s = e->s; + const int periodic = s->periodic; + const float G_newton = e->physical_constants->const_newton_G; + + /* Potential normalisation in the case of periodic gravity */ + float potential_normalisation = 0.; + if (periodic && (e->policy & engine_policy_self_gravity)) { + const double volume = s->dim[0] * s->dim[1] * s->dim[2]; + const double r_s = e->mesh->r_s; + potential_normalisation = 4. * M_PI * e->total_mass * r_s * r_s / volume; + } + + const int gcount = c->grav.count; + struct gpart *restrict gparts = c->grav.parts; + + /* Loop over the g-particles in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the gpart. */ + struct gpart *restrict gp = &gparts[k]; + + if (gpart_is_active(gp, e)) { + + /* Finish the force calculation */ + gravity_end_force(gp, G_newton, potential_normalisation, periodic); + +#ifdef SWIFT_MAKE_GRAVITY_GLASS + + /* Negate the gravity forces */ + gp->a_grav[0] *= -1.f; + gp->a_grav[1] *= -1.f; + gp->a_grav[2] *= -1.f; +#endif + +#ifdef SWIFT_NO_GRAVITY_BELOW_ID + + /* Get the ID of the gpart */ + long long id = 0; + if (gp->type == swift_type_gas) + id = e->s->parts[-gp->id_or_neg_offset].id; + else if (gp->type == swift_type_stars) + id = e->s->sparts[-gp->id_or_neg_offset].id; + else if (gp->type == swift_type_black_hole) + error("Unexisting type"); + else + id = gp->id_or_neg_offset; + + /* Cancel gravity forces of these particles */ + if (id < SWIFT_NO_GRAVITY_BELOW_ID) { + + /* Don't move ! */ + gp->a_grav[0] = 0.f; + gp->a_grav[1] = 0.f; + gp->a_grav[2] = 0.f; + } +#endif + +#ifdef SWIFT_DEBUG_CHECKS + if ((e->policy & engine_policy_self_gravity) && + !(e->policy & engine_policy_black_holes)) { + + /* Let's add a self interaction to simplify the count */ + gp->num_interacted++; + + /* Check that this gpart has interacted with all the other + * particles (via direct or multipoles) in the box */ + if (gp->num_interacted != + e->total_nr_gparts - e->count_inhibited_gparts) { + + /* Get the ID of the gpart */ + long long my_id = 0; + if (gp->type == swift_type_gas) + my_id = e->s->parts[-gp->id_or_neg_offset].id; + else if (gp->type == swift_type_stars) + my_id = e->s->sparts[-gp->id_or_neg_offset].id; + else if (gp->type == swift_type_black_hole) + error("Unexisting type"); + else + my_id = gp->id_or_neg_offset; + + error( + "g-particle (id=%lld, type=%s) did not interact " + "gravitationally with all other gparts " + "gp->num_interacted=%lld, total_gparts=%lld (local " + "num_gparts=%zd inhibited_gparts=%lld)", + my_id, part_type_names[gp->type], gp->num_interacted, + e->total_nr_gparts, e->s->nr_gparts, e->count_inhibited_gparts); + } + } +#endif + } + } + } + if (timer) TIMER_TOC(timer_end_grav_force); +} + +/** + * @brief Write the required particles through the logger. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_logger(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_LOGGER + TIMER_TIC; + + const struct engine *e = r->e; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + const int count = c->hydro.count; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return; + + /* Recurse? Avoid spending too much time in useless cells. */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_logger(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs to be log */ + /* This is the same function than part_is_active, except for + * debugging checks */ + if (part_is_active(p, e)) { + + if (logger_should_write(&xp->logger_data, e->logger)) { + /* Write particle */ + /* Currently writing everything, should adapt it through time */ + logger_log_part(e->logger, p, + logger_mask_data[logger_x].mask | + logger_mask_data[logger_v].mask | + logger_mask_data[logger_a].mask | + logger_mask_data[logger_u].mask | + logger_mask_data[logger_h].mask | + logger_mask_data[logger_rho].mask | + logger_mask_data[logger_consts].mask, + &xp->logger_data.last_offset); + + /* Set counter back to zero */ + xp->logger_data.steps_since_last_output = 0; + } else + /* Update counter */ + xp->logger_data.steps_since_last_output += 1; + } + } + } + + if (c->grav.count > 0) error("gparts not implemented"); + + if (c->stars.count > 0) error("sparts not implemented"); + + if (timer) TIMER_TOC(timer_logger); + +#else + error("Logger disabled, please enable it during configuration"); +#endif +} + +/** + * @brief Recursively search for FOF groups in a single cell. + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_do_fof_self(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_FOF + + TIMER_TIC; + + const struct engine *e = r->e; + struct space *s = e->s; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int periodic = s->periodic; + const struct gpart *const gparts = s->gparts; + const double search_r2 = e->fof_properties->l_x2; + + rec_fof_search_self(e->fof_properties, dim, search_r2, periodic, gparts, c); + + if (timer) TIMER_TOC(timer_fof_self); + +#else + error("SWIFT was not compiled with FOF enabled!"); +#endif +} + +/** + * @brief Recursively search for FOF groups between a pair of cells. + * + * @param r runner task + * @param ci cell i + * @param cj cell j + * @param timer 1 if the time is to be recorded. + */ +void runner_do_fof_pair(struct runner *r, struct cell *ci, struct cell *cj, + int timer) { + +#ifdef WITH_FOF + + TIMER_TIC; + + const struct engine *e = r->e; + struct space *s = e->s; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int periodic = s->periodic; + const struct gpart *const gparts = s->gparts; + const double search_r2 = e->fof_properties->l_x2; + + rec_fof_search_pair(e->fof_properties, dim, search_r2, periodic, gparts, ci, + cj); + + if (timer) TIMER_TOC(timer_fof_pair); +#else + error("SWIFT was not compiled with FOF enabled!"); +#endif +} diff --git a/src/runner_recv.c b/src/runner_recv.c new file mode 100644 index 0000000000000000000000000000000000000000..803e68c2106933684109e798e24952a0dbdfea6e --- /dev/null +++ b/src/runner_recv.c @@ -0,0 +1,368 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "engine.h" +#include "timers.h" + +/** + * @brief Construct the cell properties from the received #part. + * + * @param r The runner thread. + * @param c The cell. + * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? + * @param timer Are we timing this ? + */ +void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, + int timer) { +#ifdef WITH_MPI + + const struct part *restrict parts = c->hydro.parts; + const size_t nr_parts = c->hydro.count; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_hydro_end_min = max_nr_timesteps; + integertime_t ti_hydro_end_max = 0; + timebin_t time_bin_min = num_time_bins; + timebin_t time_bin_max = 0; + float h_max = 0.f; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + + /* Clear this cell's sorted mask. */ + if (clear_sorts) c->hydro.sorted = 0; + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_parts; k++) { + if (parts[k].time_bin == time_bin_inhibited) continue; + time_bin_min = min(time_bin_min, parts[k].time_bin); + time_bin_max = max(time_bin_max, parts[k].time_bin); + h_max = max(h_max, parts[k].h); + } + + /* Convert into a time */ + ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max); + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { + runner_do_recv_part(r, c->progeny[k], clear_sorts, 0); + ti_hydro_end_min = + min(ti_hydro_end_min, c->progeny[k]->hydro.ti_end_min); + ti_hydro_end_max = + max(ti_hydro_end_max, c->progeny[k]->hydro.ti_end_max); + h_max = max(h_max, c->progeny[k]->hydro.h_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_hydro_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_hydro_end_min, ti_current); +#endif + + /* ... and store. */ + // c->hydro.ti_end_min = ti_hydro_end_min; + // c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_old_part = ti_current; + c->hydro.h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_part); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #gpart. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_MPI + + const struct gpart *restrict gparts = c->grav.parts; + const size_t nr_gparts = c->grav.count; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_gravity_end_min = max_nr_timesteps; + integertime_t ti_gravity_end_max = 0; + timebin_t time_bin_min = num_time_bins; + timebin_t time_bin_max = 0; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_gparts; k++) { + if (gparts[k].time_bin == time_bin_inhibited) continue; + time_bin_min = min(time_bin_min, gparts[k].time_bin); + time_bin_max = max(time_bin_max, gparts[k].time_bin); + } + + /* Convert into a time */ + ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max); + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->grav.count > 0) { + runner_do_recv_gpart(r, c->progeny[k], 0); + ti_gravity_end_min = + min(ti_gravity_end_min, c->progeny[k]->grav.ti_end_min); + ti_gravity_end_max = + max(ti_gravity_end_max, c->progeny[k]->grav.ti_end_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_gravity_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_gravity_end_min, ti_current); +#endif + + /* ... and store. */ + // c->grav.ti_end_min = ti_gravity_end_min; + // c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_old_part = ti_current; + + if (timer) TIMER_TOC(timer_dorecv_gpart); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #spart. + * + * @param r The runner thread. + * @param c The cell. + * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? + * @param timer Are we timing this ? + */ +void runner_do_recv_spart(struct runner *r, struct cell *c, int clear_sorts, + int timer) { + +#ifdef WITH_MPI + + struct spart *restrict sparts = c->stars.parts; + const size_t nr_sparts = c->stars.count; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_stars_end_min = max_nr_timesteps; + integertime_t ti_stars_end_max = 0; + timebin_t time_bin_min = num_time_bins; + timebin_t time_bin_max = 0; + float h_max = 0.f; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + + /* Clear this cell's sorted mask. */ + if (clear_sorts) c->stars.sorted = 0; + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_sparts; k++) { +#ifdef DEBUG_INTERACTIONS_STARS + sparts[k].num_ngb_force = 0; +#endif + if (sparts[k].time_bin == time_bin_inhibited) continue; + time_bin_min = min(time_bin_min, sparts[k].time_bin); + time_bin_max = max(time_bin_max, sparts[k].time_bin); + h_max = max(h_max, sparts[k].h); + } + + /* Convert into a time */ + ti_stars_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_stars_end_max = get_integer_time_end(ti_current, time_bin_max); + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + runner_do_recv_spart(r, c->progeny[k], clear_sorts, 0); + ti_stars_end_min = + min(ti_stars_end_min, c->progeny[k]->stars.ti_end_min); + ti_stars_end_max = + max(ti_stars_end_max, c->progeny[k]->stars.ti_end_max); + h_max = max(h_max, c->progeny[k]->stars.h_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_stars_end_min < ti_current && + !(r->e->policy & engine_policy_star_formation)) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_stars_end_min, ti_current); +#endif + + /* ... and store. */ + // c->grav.ti_end_min = ti_gravity_end_min; + // c->grav.ti_end_max = ti_gravity_end_max; + c->stars.ti_old_part = ti_current; + c->stars.h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_spart); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #bpart. + * + * Note that we do not need to clear the sorts since we do not sort + * the black holes. + * + * @param r The runner thread. + * @param c The cell. + * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? + * @param timer Are we timing this ? + */ +void runner_do_recv_bpart(struct runner *r, struct cell *c, int clear_sorts, + int timer) { + +#ifdef WITH_MPI + + struct bpart *restrict bparts = c->black_holes.parts; + const size_t nr_bparts = c->black_holes.count; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_black_holes_end_min = max_nr_timesteps; + integertime_t ti_black_holes_end_max = 0; + timebin_t time_bin_min = num_time_bins; + timebin_t time_bin_max = 0; + float h_max = 0.f; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) error("Updating a local cell!"); +#endif + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_bparts; k++) { +#ifdef DEBUG_INTERACTIONS_BLACK_HOLES + bparts[k].num_ngb_force = 0; +#endif + + /* message("Receiving bparts id=%lld time_bin=%d", */ + /* bparts[k].id, bparts[k].time_bin); */ + + if (bparts[k].time_bin == time_bin_inhibited) continue; + time_bin_min = min(time_bin_min, bparts[k].time_bin); + time_bin_max = max(time_bin_max, bparts[k].time_bin); + h_max = max(h_max, bparts[k].h); + } + + /* Convert into a time */ + ti_black_holes_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_black_holes_end_max = get_integer_time_end(ti_current, time_bin_max); + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL && c->progeny[k]->black_holes.count > 0) { + runner_do_recv_bpart(r, c->progeny[k], clear_sorts, 0); + ti_black_holes_end_min = + min(ti_black_holes_end_min, c->progeny[k]->black_holes.ti_end_min); + ti_black_holes_end_max = + max(ti_black_holes_end_max, c->progeny[k]->black_holes.ti_end_max); + h_max = max(h_max, c->progeny[k]->black_holes.h_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_black_holes_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_black_holes_end_min, ti_current); +#endif + + /* ... and store. */ + // c->grav.ti_end_min = ti_gravity_end_min; + // c->grav.ti_end_max = ti_gravity_end_max; + c->black_holes.ti_old_part = ti_current; + c->black_holes.h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_bpart); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} diff --git a/src/runner_sort.c b/src/runner_sort.c new file mode 100644 index 0000000000000000000000000000000000000000..914b64f93b970000885b1b578d762d3f15455332 --- /dev/null +++ b/src/runner_sort.c @@ -0,0 +1,708 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "cell.h" +#include "engine.h" +#include "timers.h" + +/** + * @brief Sorts again all the stars in a given cell hierarchy. + * + * This is intended to be used after the star formation task has been run + * to get the cells back into a state where self/pair star tasks can be run. + * + * @param r The thread #runner. + * @param c The top-level cell to run on. + * @param timer Are we timing this? + */ +void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != r->e->nodeID) error("Task must be run locally!"); +#endif + + TIMER_TIC; + + /* Did we demand a recalculation of the stars'sorts? */ + if (cell_get_flag(c, cell_flag_do_stars_resort)) { + runner_do_all_stars_sort(r, c); + cell_clear_flag(c, cell_flag_do_stars_resort); + } + + if (timer) TIMER_TOC(timer_do_stars_resort); +} + +/** + * @brief Sort the entries in ascending order using QuickSort. + * + * @param sort The entries + * @param N The number of entries. + */ +void runner_do_sort_ascending(struct sort_entry *sort, int N) { + + struct { + short int lo, hi; + } qstack[10]; + int qpos, i, j, lo, hi, imin; + struct sort_entry temp; + float pivot; + + /* Sort parts in cell_i in decreasing order with quicksort */ + qstack[0].lo = 0; + qstack[0].hi = N - 1; + qpos = 0; + while (qpos >= 0) { + lo = qstack[qpos].lo; + hi = qstack[qpos].hi; + qpos -= 1; + if (hi - lo < 15) { + for (i = lo; i < hi; i++) { + imin = i; + for (j = i + 1; j <= hi; j++) + if (sort[j].d < sort[imin].d) imin = j; + if (imin != i) { + temp = sort[imin]; + sort[imin] = sort[i]; + sort[i] = temp; + } + } + } else { + pivot = sort[(lo + hi) / 2].d; + i = lo; + j = hi; + while (i <= j) { + while (sort[i].d < pivot) i++; + while (sort[j].d > pivot) j--; + if (i <= j) { + if (i < j) { + temp = sort[i]; + sort[i] = sort[j]; + sort[j] = temp; + } + i += 1; + j -= 1; + } + } + if (j > (lo + hi) / 2) { + if (lo < j) { + qpos += 1; + qstack[qpos].lo = lo; + qstack[qpos].hi = j; + } + if (i < hi) { + qpos += 1; + qstack[qpos].lo = i; + qstack[qpos].hi = hi; + } + } else { + if (i < hi) { + qpos += 1; + qstack[qpos].lo = i; + qstack[qpos].hi = hi; + } + if (lo < j) { + qpos += 1; + qstack[qpos].lo = lo; + qstack[qpos].hi = j; + } + } + } + } +} + +#ifdef SWIFT_DEBUG_CHECKS +/** + * @brief Recursively checks that the flags are consistent in a cell hierarchy. + * + * Debugging function. Exists in two flavours: hydro & stars. + */ +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + \ + if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \ + if (c->split) \ + for (int k = 0; k < 8; k++) \ + if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0) \ + runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted); \ + } +#else +#define RUNNER_CHECK_SORTS(TYPE) \ + void runner_check_sorts_##TYPE(struct cell *c, int flags) { \ + error("Calling debugging code without debugging flag activated."); \ + } +#endif + +RUNNER_CHECK_SORTS(hydro) +RUNNER_CHECK_SORTS(stars) + +/** + * @brief Sort the particles in the given cell along all cardinal directions. + * + * @param r The #runner. + * @param c The #cell. + * @param flags Cell flag. + * @param cleanup If true, re-build the sorts for the selected flags instead + * of just adding them. + * @param clock Flag indicating whether to record the timing or not, needed + * for recursive calls. + */ +void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { + + struct sort_entry *fingers[8]; + const int count = c->hydro.count; + const struct part *parts = c->hydro.parts; + struct xpart *xparts = c->hydro.xparts; + float buff[8]; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super == NULL) error("Task called above the super level!!!"); +#endif + + /* We need to do the local sorts plus whatever was requested further up. */ + flags |= c->hydro.do_sort; + if (cleanup) { + c->hydro.sorted = 0; + } else { + flags &= ~c->hydro.sorted; + } + if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort)) return; + + /* Check that the particles have been moved to the current time */ + if (flags && !cell_are_part_drifted(c, r->e)) + error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_hydro(c, c->hydro.sorted); + + /* Make sure the sort flags are consistent (upard). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->hydro.sorted & ~c->hydro.sorted) + error("Inconsistent sort flags (upward)."); + } + + /* Update the sort timer which represents the last time the sorts + were re-set. */ + if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current; +#endif + + /* Allocate memory for sorting. */ + cell_malloc_hydro_sorts(c, flags); + + /* Does this cell have any progeny? */ + if (c->split) { + + /* Fill in the gaps within the progeny. */ + float dx_max_sort = 0.0f; + float dx_max_sort_old = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + + if (c->progeny[k]->hydro.count > 0) { + + /* Only propagate cleanup if the progeny is stale. */ + runner_do_hydro_sort( + r, c->progeny[k], flags, + cleanup && (c->progeny[k]->hydro.dx_max_sort_old > + space_maxreldx * c->progeny[k]->dmin), + 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort); + dx_max_sort_old = + max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old); + } else { + + /* We need to clean up the unused flags that were in case the + number of particles in the cell would change */ + cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1); + } + } + } + c->hydro.dx_max_sort = dx_max_sort; + c->hydro.dx_max_sort_old = dx_max_sort_old; + + /* Loop over the 13 different sort arrays. */ + for (int j = 0; j < 13; j++) { + + /* Has this sort array been flagged? */ + if (!(flags & (1 << j))) continue; + + /* Init the particle index offsets. */ + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) + if (c->progeny[k - 1] != NULL) + off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count; + else + off[k] = off[k - 1]; + + /* Init the entries and indices. */ + int inds[8]; + for (int k = 0; k < 8; k++) { + inds[k] = k; + if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) { + fingers[k] = c->progeny[k]->hydro.sort[j]; + buff[k] = fingers[k]->d; + off[k] = off[k]; + } else + buff[k] = FLT_MAX; + } + + /* Sort the buffer. */ + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) + if (buff[inds[k]] < buff[inds[i]]) { + int temp_i = inds[i]; + inds[i] = inds[k]; + inds[k] = temp_i; + } + + /* For each entry in the new sort list. */ + struct sort_entry *finger = c->hydro.sort[j]; + for (int ind = 0; ind < count; ind++) { + + /* Copy the minimum into the new sort array. */ + finger[ind].d = buff[inds[0]]; + finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; + + /* Update the buffer. */ + fingers[inds[0]] += 1; + buff[inds[0]] = fingers[inds[0]]->d; + + /* Find the smallest entry. */ + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; + inds[k - 1] = inds[k]; + inds[k] = temp_i; + } + + } /* Merge. */ + + /* Add a sentinel. */ + c->hydro.sort[j][count].d = FLT_MAX; + c->hydro.sort[j][count].i = 0; + + /* Mark as sorted. */ + atomic_or(&c->hydro.sorted, 1 << j); + + } /* loop over sort arrays. */ + + } /* progeny? */ + + /* Otherwise, just sort. */ + else { + + /* Reset the sort distance */ + if (c->hydro.sorted == 0) { +#ifdef SWIFT_DEBUG_CHECKS + if (xparts != NULL && c->nodeID != engine_rank) + error("Have non-NULL xparts in foreign cell"); +#endif + + /* And the individual sort distances if we are a local cell */ + if (xparts != NULL) { + for (int k = 0; k < count; k++) { + xparts[k].x_diff_sort[0] = 0.0f; + xparts[k].x_diff_sort[1] = 0.0f; + xparts[k].x_diff_sort[2] = 0.0f; + } + } + c->hydro.dx_max_sort_old = 0.f; + c->hydro.dx_max_sort = 0.f; + } + + /* Fill the sort array. */ + for (int k = 0; k < count; k++) { + const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->hydro.sort[j][k].i = k; + c->hydro.sort[j][k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; + } + } + + /* Add the sentinel and sort. */ + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->hydro.sort[j][count].d = FLT_MAX; + c->hydro.sort[j][count].i = 0; + runner_do_sort_ascending(c->hydro.sort[j], count); + atomic_or(&c->hydro.sorted, 1 << j); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify the sorting. */ + for (int j = 0; j < 13; j++) { + if (!(flags & (1 << j))) continue; + struct sort_entry *finger = c->hydro.sort[j]; + for (int k = 1; k < count; k++) { + if (finger[k].d < finger[k - 1].d) + error("Sorting failed, ascending array."); + if (finger[k].i >= count) error("Sorting failed, indices borked."); + } + } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_hydro(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->hydro.sorted & ~c->hydro.sorted) + error("Inconsistent sort flags."); + } +#endif + + /* Clear the cell's sort flags. */ + c->hydro.do_sort = 0; + cell_clear_flag(c, cell_flag_do_hydro_sub_sort); + c->hydro.requires_sorts = 0; + + if (clock) TIMER_TOC(timer_dosort); +} + +/** + * @brief Sort the stars particles in the given cell along all cardinal + * directions. + * + * @param r The #runner. + * @param c The #cell. + * @param flags Cell flag. + * @param cleanup If true, re-build the sorts for the selected flags instead + * of just adding them. + * @param clock Flag indicating whether to record the timing or not, needed + * for recursive calls. + */ +void runner_do_stars_sort(struct runner *r, struct cell *c, int flags, + int cleanup, int clock) { + + struct sort_entry *fingers[8]; + const int count = c->stars.count; + struct spart *sparts = c->stars.parts; + float buff[8]; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super == NULL) error("Task called above the super level!!!"); +#endif + + /* We need to do the local sorts plus whatever was requested further up. */ + flags |= c->stars.do_sort; + if (cleanup) { + c->stars.sorted = 0; + } else { + flags &= ~c->stars.sorted; + } + if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return; + + /* Check that the particles have been moved to the current time */ + if (flags && !cell_are_spart_drifted(c, r->e)) { + error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, c->stars.sorted); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags (upward)."); + } + + /* Update the sort timer which represents the last time the sorts + were re-set. */ + if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current; +#endif + + /* start by allocating the entry arrays in the requested dimensions. */ + cell_malloc_stars_sorts(c, flags); + + /* Does this cell have any progeny? */ + if (c->split) { + + /* Fill in the gaps within the progeny. */ + float dx_max_sort = 0.0f; + float dx_max_sort_old = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + + if (c->progeny[k]->stars.count > 0) { + + /* Only propagate cleanup if the progeny is stale. */ + const int cleanup_prog = + cleanup && (c->progeny[k]->stars.dx_max_sort_old > + space_maxreldx * c->progeny[k]->dmin); + runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort); + dx_max_sort_old = + max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old); + } else { + + /* We need to clean up the unused flags that were in case the + number of particles in the cell would change */ + cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1); + } + } + } + c->stars.dx_max_sort = dx_max_sort; + c->stars.dx_max_sort_old = dx_max_sort_old; + + /* Loop over the 13 different sort arrays. */ + for (int j = 0; j < 13; j++) { + + /* Has this sort array been flagged? */ + if (!(flags & (1 << j))) continue; + + /* Init the particle index offsets. */ + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) + if (c->progeny[k - 1] != NULL) + off[k] = off[k - 1] + c->progeny[k - 1]->stars.count; + else + off[k] = off[k - 1]; + + /* Init the entries and indices. */ + int inds[8]; + for (int k = 0; k < 8; k++) { + inds[k] = k; + if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) { + fingers[k] = c->progeny[k]->stars.sort[j]; + buff[k] = fingers[k]->d; + off[k] = off[k]; + } else + buff[k] = FLT_MAX; + } + + /* Sort the buffer. */ + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) + if (buff[inds[k]] < buff[inds[i]]) { + int temp_i = inds[i]; + inds[i] = inds[k]; + inds[k] = temp_i; + } + + /* For each entry in the new sort list. */ + struct sort_entry *finger = c->stars.sort[j]; + for (int ind = 0; ind < count; ind++) { + + /* Copy the minimum into the new sort array. */ + finger[ind].d = buff[inds[0]]; + finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; + + /* Update the buffer. */ + fingers[inds[0]] += 1; + buff[inds[0]] = fingers[inds[0]]->d; + + /* Find the smallest entry. */ + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; + inds[k - 1] = inds[k]; + inds[k] = temp_i; + } + + } /* Merge. */ + + /* Add a sentinel. */ + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + + /* Mark as sorted. */ + atomic_or(&c->stars.sorted, 1 << j); + + } /* loop over sort arrays. */ + + } /* progeny? */ + + /* Otherwise, just sort. */ + else { + + /* Reset the sort distance */ + if (c->stars.sorted == 0) { + + /* And the individual sort distances if we are a local cell */ + for (int k = 0; k < count; k++) { + sparts[k].x_diff_sort[0] = 0.0f; + sparts[k].x_diff_sort[1] = 0.0f; + sparts[k].x_diff_sort[2] = 0.0f; + } + c->stars.dx_max_sort_old = 0.f; + c->stars.dx_max_sort = 0.f; + } + + /* Fill the sort array. */ + for (int k = 0; k < count; k++) { + const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]}; + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][k].i = k; + c->stars.sort[j][k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; + } + } + + /* Add the sentinel and sort. */ + for (int j = 0; j < 13; j++) + if (flags & (1 << j)) { + c->stars.sort[j][count].d = FLT_MAX; + c->stars.sort[j][count].i = 0; + runner_do_sort_ascending(c->stars.sort[j], count); + atomic_or(&c->stars.sorted, 1 << j); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify the sorting. */ + for (int j = 0; j < 13; j++) { + if (!(flags & (1 << j))) continue; + struct sort_entry *finger = c->stars.sort[j]; + for (int k = 1; k < count; k++) { + if (finger[k].d < finger[k - 1].d) + error("Sorting failed, ascending array."); + if (finger[k].i >= count) error("Sorting failed, indices borked."); + } + } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts_stars(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->stars.sorted & ~c->stars.sorted) + error("Inconsistent sort flags."); + } +#endif + + /* Clear the cell's sort flags. */ + c->stars.do_sort = 0; + cell_clear_flag(c, cell_flag_do_stars_sub_sort); + c->stars.requires_sorts = 0; + + if (clock) TIMER_TOC(timer_do_stars_sort); +} + +/** + * @brief Recurse into a cell until reaching the super level and call + * the hydro sorting function there. + * + * This function must be called at or above the super level! + * + * This function will sort the particles in all 13 directions. + * + * @param r the #runner. + * @param c the #cell. + */ +void runner_do_all_hydro_sort(struct runner *r, struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); +#endif + + if (!cell_is_active_hydro(c, r->e)) return; + + /* Shall we sort at this level? */ + if (c->hydro.super == c) { + + /* Sort everything */ + runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); + + } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super != NULL) error("Function called below the super level!"); +#endif + + /* Ok, then, let's try lower */ + if (c->split) { + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]); + } + } else { +#ifdef SWIFT_DEBUG_CHECKS + error("Reached a leaf without encountering a hydro super cell!"); +#endif + } + } +} + +/** + * @brief Recurse into a cell until reaching the super level and call + * the star sorting function there. + * + * This function must be called at or above the super level! + * + * This function will sort the particles in all 13 directions. + * + * @param r the #runner. + * @param c the #cell. + */ +void runner_do_all_stars_sort(struct runner *r, struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID != engine_rank) error("Function called on a foreign cell!"); +#endif + + if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return; + + /* Shall we sort at this level? */ + if (c->hydro.super == c) { + + /* Sort everything */ + runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0); + + } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.super != NULL) error("Function called below the super level!"); +#endif + + /* Ok, then, let's try lower */ + if (c->split) { + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]); + } + } else { +#ifdef SWIFT_DEBUG_CHECKS + error("Reached a leaf without encountering a hydro super cell!"); +#endif + } + } +} diff --git a/src/runner_time_integration.c b/src/runner_time_integration.c new file mode 100644 index 0000000000000000000000000000000000000000..e1f5de709da804330953b47a647d0f0ce13de7bb --- /dev/null +++ b/src/runner_time_integration.c @@ -0,0 +1,987 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "runner.h" + +/* Local headers. */ +#include "active.h" +#include "black_holes.h" +#include "cell.h" +#include "engine.h" +#include "kick.h" +#include "timers.h" +#include "timestep.h" +#include "timestep_limiter.h" +#include "tracers.h" + +/** + * @brief Initialize the multipoles before the gravity calculation. + * + * @param r The runner thread. + * @param c The cell. + * @param timer 1 if the time is to be recorded. + */ +void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (!(e->policy & engine_policy_self_gravity)) + error("Grav-init task called outside of self-gravity calculation"); +#endif + + /* Anything to do here? */ + if (!cell_is_active_gravity(c, e)) return; + + /* Reset the gravity acceleration tensors */ + gravity_field_tensors_init(&c->grav.multipole->pot, e->ti_current); + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) runner_do_init_grav(r, c->progeny[k], 0); + } + } + + if (timer) TIMER_TOC(timer_init_grav); +} + +/** + * @brief Perform the first half-kick on all the active particles in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_kick1(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const struct hydro_props *hydro_props = e->hydro_properties; + const struct entropy_floor_properties *entropy_floor = e->entropy_floor; + const int with_cosmology = (e->policy & engine_policy_cosmology); + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + const integertime_t ti_current = e->ti_current; + const double time_base = e->time_base; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e) && + !cell_is_starting_stars(c, e) && !cell_is_starting_black_holes(c, e)) + return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0); + } else { + + /* Loop over the parts in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs to be kicked */ + if (part_is_starting(p, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + if (p->wakeup == time_bin_awake) + error("Woken-up particle that has not been processed in kick1"); +#endif + + /* Skip particles that have been woken up and treated by the limiter. */ + if (p->wakeup != time_bin_not_awake) continue; + + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current + 1, p->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = ti_begin + ti_step; + + if (ti_begin != ti_current) + error( + "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " + "ti_step=%lld time_bin=%d wakeup=%d ti_current=%lld", + ti_end, ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; + if (with_cosmology) { + dt_kick_hydro = cosmology_get_hydro_kick_factor( + cosmo, ti_begin, ti_begin + ti_step / 2); + dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); + dt_kick_therm = cosmology_get_therm_kick_factor( + cosmo, ti_begin, ti_begin + ti_step / 2); + dt_kick_corr = cosmology_get_corr_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); + } else { + dt_kick_hydro = (ti_step / 2) * time_base; + dt_kick_grav = (ti_step / 2) * time_base; + dt_kick_therm = (ti_step / 2) * time_base; + dt_kick_corr = (ti_step / 2) * time_base; + } + + /* do the kick */ + kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, + dt_kick_corr, cosmo, hydro_props, entropy_floor, ti_begin, + ti_begin + ti_step / 2); + + /* Update the accelerations to be used in the drift for hydro */ + if (p->gpart != NULL) { + + xp->a_grav[0] = p->gpart->a_grav[0]; + xp->a_grav[1] = p->gpart->a_grav[1]; + xp->a_grav[2] = p->gpart->a_grav[2]; + } + } + } + + /* Loop over the gparts in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *restrict gp = &gparts[k]; + + /* If the g-particle has no counterpart and needs to be kicked */ + if ((gp->type == swift_type_dark_matter || + gp->type == swift_type_dark_matter_background) && + gpart_is_starting(gp, e)) { + + const integertime_t ti_step = get_integer_timestep(gp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current + 1, gp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = + get_integer_time_end(ti_current + 1, gp->time_bin); + + if (ti_begin != ti_current) + error( + "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " + "ti_step=%lld time_bin=%d ti_current=%lld", + ti_end, ti_begin, ti_step, gp->time_bin, ti_current); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); + } else { + dt_kick_grav = (ti_step / 2) * time_base; + } + + /* do the kick */ + kick_gpart(gp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2); + } + } + + /* Loop over the stars particles in this cell. */ + for (int k = 0; k < scount; k++) { + + /* Get a handle on the s-part. */ + struct spart *restrict sp = &sparts[k]; + + /* If particle needs to be kicked */ + if (spart_is_starting(sp, e)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current + 1, sp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = + get_integer_time_end(ti_current + 1, sp->time_bin); + + if (ti_begin != ti_current) + error( + "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " + "ti_step=%lld time_bin=%d ti_current=%lld", + ti_end, ti_begin, ti_step, sp->time_bin, ti_current); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor(cosmo, ti_begin, + ti_begin + ti_step / 2); + } else { + dt_kick_grav = (ti_step / 2) * time_base; + } + + /* do the kick */ + kick_spart(sp, dt_kick_grav, ti_begin, ti_begin + ti_step / 2); + } + } + } + + if (timer) TIMER_TOC(timer_kick1); +} + +/** + * @brief Perform the second half-kick on all the active particles in a cell. + * + * Also prepares particles to be drifted. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_kick2(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const struct cosmology *cosmo = e->cosmology; + const struct hydro_props *hydro_props = e->hydro_properties; + const struct entropy_floor_properties *entropy_floor = e->entropy_floor; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; + const integertime_t ti_current = e->ti_current; + const double time_base = e->time_base; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) && + !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) + return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0); + } else { + + /* Loop over the particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs to be kicked */ + if (part_is_active(p, e)) { + + integertime_t ti_begin, ti_end, ti_step; + +#ifdef SWIFT_DEBUG_CHECKS + if (p->wakeup == time_bin_awake) + error("Woken-up particle that has not been processed in kick1"); +#endif + + if (p->wakeup == time_bin_not_awake) { + + /* Time-step from a regular kick */ + ti_step = get_integer_timestep(p->time_bin); + ti_begin = get_integer_time_begin(ti_current, p->time_bin); + ti_end = ti_begin + ti_step; + + } else { + + /* Time-step that follows a wake-up call */ + ti_begin = get_integer_time_begin(ti_current, p->wakeup); + ti_end = get_integer_time_end(ti_current, p->time_bin); + ti_step = ti_end - ti_begin; + + /* Reset the flag. Everything is back to normal from now on. */ + p->wakeup = time_bin_awake; + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error( + "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld " + "time_bin=%d wakeup=%d ti_current=%lld", + ti_begin, ti_step, p->time_bin, p->wakeup, ti_current); +#endif + /* Time interval for this half-kick */ + double dt_kick_grav, dt_kick_hydro, dt_kick_therm, dt_kick_corr; + if (with_cosmology) { + dt_kick_hydro = cosmology_get_hydro_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_end); + dt_kick_grav = cosmology_get_grav_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_end); + dt_kick_therm = cosmology_get_therm_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_end); + dt_kick_corr = cosmology_get_corr_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_end); + } else { + dt_kick_hydro = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_grav = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_therm = (ti_end - (ti_begin + ti_step / 2)) * time_base; + dt_kick_corr = (ti_end - (ti_begin + ti_step / 2)) * time_base; + } + + /* Finish the time-step with a second half-kick */ + kick_part(p, xp, dt_kick_hydro, dt_kick_grav, dt_kick_therm, + dt_kick_corr, cosmo, hydro_props, entropy_floor, + ti_begin + ti_step / 2, ti_end); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that kick and the drift are synchronized */ + if (p->ti_drift != p->ti_kick) error("Error integrating part in time."); +#endif + + /* Prepare the values to be drifted */ + hydro_reset_predicted_values(p, xp, cosmo); + } + } + + /* Loop over the g-particles in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *restrict gp = &gparts[k]; + + /* If the g-particle has no counterpart and needs to be kicked */ + if ((gp->type == swift_type_dark_matter || + gp->type == swift_type_dark_matter_background) && + gpart_is_active(gp, e)) { + + const integertime_t ti_step = get_integer_timestep(gp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, gp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error("Particle in wrong time-bin"); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + } else { + dt_kick_grav = (ti_step / 2) * time_base; + } + + /* Finish the time-step with a second half-kick */ + kick_gpart(gp, dt_kick_grav, ti_begin + ti_step / 2, + ti_begin + ti_step); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that kick and the drift are synchronized */ + if (gp->ti_drift != gp->ti_kick) + error("Error integrating g-part in time."); +#endif + + /* Prepare the values to be drifted */ + gravity_reset_predicted_values(gp); + } + } + + /* Loop over the particles in this cell. */ + for (int k = 0; k < scount; k++) { + + /* Get a handle on the part. */ + struct spart *restrict sp = &sparts[k]; + + /* If particle needs to be kicked */ + if (spart_is_active(sp, e)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, sp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error("Particle in wrong time-bin"); +#endif + + /* Time interval for this half-kick */ + double dt_kick_grav; + if (with_cosmology) { + dt_kick_grav = cosmology_get_grav_kick_factor( + cosmo, ti_begin + ti_step / 2, ti_begin + ti_step); + } else { + dt_kick_grav = (ti_step / 2) * time_base; + } + + /* Finish the time-step with a second half-kick */ + kick_spart(sp, dt_kick_grav, ti_begin + ti_step / 2, + ti_begin + ti_step); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that kick and the drift are synchronized */ + if (sp->ti_drift != sp->ti_kick) + error("Error integrating s-part in time."); +#endif + + /* Prepare the values to be drifted */ + stars_reset_predicted_values(sp); + } + } + } + if (timer) TIMER_TOC(timer_kick2); +} + +/** + * @brief Computes the next time-step of all active particles in this cell + * and update the cell's statistics. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_timestep(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const int with_cosmology = (e->policy & engine_policy_cosmology); + const int count = c->hydro.count; + const int gcount = c->grav.count; + const int scount = c->stars.count; + const int bcount = c->black_holes.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + struct gpart *restrict gparts = c->grav.parts; + struct spart *restrict sparts = c->stars.parts; + struct bpart *restrict bparts = c->black_holes.parts; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e) && + !cell_is_active_stars(c, e) && !cell_is_active_black_holes(c, e)) { + c->hydro.updated = 0; + c->grav.updated = 0; + c->stars.updated = 0; + c->black_holes.updated = 0; + return; + } + + int updated = 0, g_updated = 0, s_updated = 0, b_updated = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; + integertime_t ti_stars_end_min = max_nr_timesteps, ti_stars_end_max = 0, + ti_stars_beg_max = 0; + integertime_t ti_black_holes_end_min = max_nr_timesteps, + ti_black_holes_end_max = 0, ti_black_holes_beg_max = 0; + + /* No children? */ + if (!c->split) { + + /* Loop over the particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs updating */ + if (part_is_active(p, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, p->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + + /* Get new time-step */ + const integertime_t ti_new_step = get_part_timestep(p, xp, e); + + /* Update particle */ + p->time_bin = get_time_bin(ti_new_step); + if (p->gpart != NULL) p->gpart->time_bin = p->time_bin; + + /* Update the tracers properties */ + tracers_after_timestep(p, xp, e->internal_units, e->physical_constants, + with_cosmology, e->cosmology, + e->hydro_properties, e->cooling_func, e->time); + + /* Number of updated particles */ + updated++; + if (p->gpart != NULL) g_updated++; + + /* What is the next sync-point ? */ + ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); + ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); + + /* What is the next starting point for this cell ? */ + ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); + + if (p->gpart != NULL) { + + /* What is the next sync-point ? */ + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + } + } + + else { /* part is inactive */ + + if (!part_is_inhibited(p, e)) { + + const integertime_t ti_end = + get_integer_time_end(ti_current, p->time_bin); + + const integertime_t ti_beg = + get_integer_time_begin(ti_current + 1, p->time_bin); + + /* What is the next sync-point ? */ + ti_hydro_end_min = min(ti_end, ti_hydro_end_min); + ti_hydro_end_max = max(ti_end, ti_hydro_end_max); + + /* What is the next starting point for this cell ? */ + ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max); + + if (p->gpart != NULL) { + + /* What is the next sync-point ? */ + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } + } + } + } + + /* Loop over the g-particles in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *restrict gp = &gparts[k]; + + /* If the g-particle has no counterpart */ + if (gp->type == swift_type_dark_matter || + gp->type == swift_type_dark_matter_background) { + + /* need to be updated ? */ + if (gpart_is_active(gp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, gp->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + + /* Get new time-step */ + const integertime_t ti_new_step = get_gpart_timestep(gp, e); + + /* Update particle */ + gp->time_bin = get_time_bin(ti_new_step); + + /* Number of updated g-particles */ + g_updated++; + + /* What is the next sync-point ? */ + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + + } else { /* gpart is inactive */ + + if (!gpart_is_inhibited(gp, e)) { + + const integertime_t ti_end = + get_integer_time_end(ti_current, gp->time_bin); + + /* What is the next sync-point ? */ + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + const integertime_t ti_beg = + get_integer_time_begin(ti_current + 1, gp->time_bin); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } + } + } + } + + /* Loop over the star particles in this cell. */ + for (int k = 0; k < scount; k++) { + + /* Get a handle on the part. */ + struct spart *restrict sp = &sparts[k]; + + /* need to be updated ? */ + if (spart_is_active(sp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, sp->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + /* Get new time-step */ + const integertime_t ti_new_step = get_spart_timestep(sp, e); + + /* Update particle */ + sp->time_bin = get_time_bin(ti_new_step); + sp->gpart->time_bin = get_time_bin(ti_new_step); + + /* Number of updated s-particles */ + s_updated++; + g_updated++; + + ti_stars_end_min = min(ti_current + ti_new_step, ti_stars_end_min); + ti_stars_end_max = max(ti_current + ti_new_step, ti_stars_end_max); + ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_stars_beg_max = max(ti_current, ti_stars_beg_max); + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + + /* star particle is inactive but not inhibited */ + } else { + + if (!spart_is_inhibited(sp, e)) { + + const integertime_t ti_end = + get_integer_time_end(ti_current, sp->time_bin); + + const integertime_t ti_beg = + get_integer_time_begin(ti_current + 1, sp->time_bin); + + ti_stars_end_min = min(ti_end, ti_stars_end_min); + ti_stars_end_max = max(ti_end, ti_stars_end_max); + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_stars_beg_max = max(ti_beg, ti_stars_beg_max); + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } + } + } + + /* Loop over the star particles in this cell. */ + for (int k = 0; k < bcount; k++) { + + /* Get a handle on the part. */ + struct bpart *restrict bp = &bparts[k]; + + /* need to be updated ? */ + if (bpart_is_active(bp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, bp->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + /* Get new time-step */ + const integertime_t ti_new_step = get_bpart_timestep(bp, e); + + /* Update particle */ + bp->time_bin = get_time_bin(ti_new_step); + bp->gpart->time_bin = get_time_bin(ti_new_step); + + /* Number of updated s-particles */ + b_updated++; + g_updated++; + + ti_black_holes_end_min = + min(ti_current + ti_new_step, ti_black_holes_end_min); + ti_black_holes_end_max = + max(ti_current + ti_new_step, ti_black_holes_end_max); + ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_black_holes_beg_max = max(ti_current, ti_black_holes_beg_max); + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + + /* star particle is inactive but not inhibited */ + } else { + + if (!bpart_is_inhibited(bp, e)) { + + const integertime_t ti_end = + get_integer_time_end(ti_current, bp->time_bin); + + const integertime_t ti_beg = + get_integer_time_begin(ti_current + 1, bp->time_bin); + + ti_black_holes_end_min = min(ti_end, ti_black_holes_end_min); + ti_black_holes_end_max = max(ti_end, ti_black_holes_end_max); + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_black_holes_beg_max = max(ti_beg, ti_black_holes_beg_max); + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } + } + } + + } else { + + /* Loop over the progeny. */ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + /* Recurse */ + runner_do_timestep(r, cp, 0); + + /* And aggregate */ + updated += cp->hydro.updated; + g_updated += cp->grav.updated; + s_updated += cp->stars.updated; + b_updated += cp->black_holes.updated; + + ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); + ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); + ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); + + ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); + ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); + ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); + + ti_stars_end_min = min(cp->stars.ti_end_min, ti_stars_end_min); + ti_stars_end_max = max(cp->grav.ti_end_max, ti_stars_end_max); + ti_stars_beg_max = max(cp->grav.ti_beg_max, ti_stars_beg_max); + + ti_black_holes_end_min = + min(cp->black_holes.ti_end_min, ti_black_holes_end_min); + ti_black_holes_end_max = + max(cp->grav.ti_end_max, ti_black_holes_end_max); + ti_black_holes_beg_max = + max(cp->grav.ti_beg_max, ti_black_holes_beg_max); + } + } + } + + /* Store the values. */ + c->hydro.updated = updated; + c->grav.updated = g_updated; + c->stars.updated = s_updated; + c->black_holes.updated = b_updated; + + c->hydro.ti_end_min = ti_hydro_end_min; + c->hydro.ti_end_max = ti_hydro_end_max; + c->hydro.ti_beg_max = ti_hydro_beg_max; + c->grav.ti_end_min = ti_gravity_end_min; + c->grav.ti_end_max = ti_gravity_end_max; + c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.ti_end_min = ti_stars_end_min; + c->stars.ti_end_max = ti_stars_end_max; + c->stars.ti_beg_max = ti_stars_beg_max; + c->black_holes.ti_end_min = ti_black_holes_end_min; + c->black_holes.ti_end_max = ti_black_holes_end_max; + c->black_holes.ti_beg_max = ti_black_holes_beg_max; + +#ifdef SWIFT_DEBUG_CHECKS + if (c->hydro.ti_end_min == e->ti_current && + c->hydro.ti_end_min < max_nr_timesteps) + error("End of next hydro step is current time!"); + if (c->grav.ti_end_min == e->ti_current && + c->grav.ti_end_min < max_nr_timesteps) + error("End of next gravity step is current time!"); + if (c->stars.ti_end_min == e->ti_current && + c->stars.ti_end_min < max_nr_timesteps) + error("End of next stars step is current time!"); + if (c->black_holes.ti_end_min == e->ti_current && + c->black_holes.ti_end_min < max_nr_timesteps) + error("End of next black holes step is current time!"); +#endif + + if (timer) TIMER_TOC(timer_timestep); +} + +/** + * @brief Apply the time-step limiter to all awaken particles in a cell + * hierarchy. + * + * @param r The task #runner. + * @param c The #cell. + * @param force Limit the particles irrespective of the #cell flags. + * @param timer Are we timing this ? + */ +void runner_do_limiter(struct runner *r, struct cell *c, int force, int timer) { + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const int count = c->hydro.count; + struct part *restrict parts = c->hydro.parts; + struct xpart *restrict xparts = c->hydro.xparts; + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we only limit local cells. */ + if (c->nodeID != engine_rank) error("Limiting dt of a foreign cell is nope."); +#endif + + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; + + /* Limit irrespective of cell flags? */ + force = (force || cell_get_flag(c, cell_flag_do_hydro_limiter)); + + /* Early abort? */ + if (c->hydro.count == 0) { + + /* Clear the limiter flags. */ + cell_clear_flag( + c, cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter); + return; + } + + /* Loop over the progeny ? */ + if (c->split && (force || cell_get_flag(c, cell_flag_do_hydro_sub_limiter))) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *restrict cp = c->progeny[k]; + + /* Recurse */ + runner_do_limiter(r, cp, force, 0); + + /* And aggregate */ + ti_hydro_end_min = min(cp->hydro.ti_end_min, ti_hydro_end_min); + ti_hydro_end_max = max(cp->hydro.ti_end_max, ti_hydro_end_max); + ti_hydro_beg_max = max(cp->hydro.ti_beg_max, ti_hydro_beg_max); + ti_gravity_end_min = min(cp->grav.ti_end_min, ti_gravity_end_min); + ti_gravity_end_max = max(cp->grav.ti_end_max, ti_gravity_end_max); + ti_gravity_beg_max = max(cp->grav.ti_beg_max, ti_gravity_beg_max); + } + } + + /* Store the updated values */ + c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); + c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); + c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); + c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); + c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); + c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); + + } else if (!c->split && force) { + + ti_hydro_end_min = c->hydro.ti_end_min; + ti_hydro_end_max = c->hydro.ti_end_max; + ti_hydro_beg_max = c->hydro.ti_beg_max; + ti_gravity_end_min = c->grav.ti_end_min; + ti_gravity_end_max = c->grav.ti_end_max; + ti_gravity_beg_max = c->grav.ti_beg_max; + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* Avoid inhibited particles */ + if (part_is_inhibited(p, e)) continue; + + /* If the particle will be active no need to wake it up */ + if (part_is_active(p, e) && p->wakeup != time_bin_not_awake) + p->wakeup = time_bin_not_awake; + + /* Bip, bip, bip... wake-up time */ + if (p->wakeup <= time_bin_awake) { + + /* Apply the limiter and get the new time-step size */ + const integertime_t ti_new_step = timestep_limit_part(p, xp, e); + + /* What is the next sync-point ? */ + ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); + ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); + + /* What is the next starting point for this cell ? */ + ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); + + /* Also limit the gpart counter-part */ + if (p->gpart != NULL) { + + /* Register the time-bin */ + p->gpart->time_bin = p->time_bin; + + /* What is the next sync-point ? */ + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + } + } + } + + /* Store the updated values */ + c->hydro.ti_end_min = min(c->hydro.ti_end_min, ti_hydro_end_min); + c->hydro.ti_end_max = max(c->hydro.ti_end_max, ti_hydro_end_max); + c->hydro.ti_beg_max = max(c->hydro.ti_beg_max, ti_hydro_beg_max); + c->grav.ti_end_min = min(c->grav.ti_end_min, ti_gravity_end_min); + c->grav.ti_end_max = max(c->grav.ti_end_max, ti_gravity_end_max); + c->grav.ti_beg_max = max(c->grav.ti_beg_max, ti_gravity_beg_max); + } + + /* Clear the limiter flags. */ + cell_clear_flag(c, + cell_flag_do_hydro_limiter | cell_flag_do_hydro_sub_limiter); + + if (timer) TIMER_TOC(timer_do_limiter); +} diff --git a/src/timestep_limiter.h b/src/timestep_limiter.h index d8555a352c8e1a799ac13d268932c9d37f30fe33..01b72daea5599b662c38fdc4b3ada8b2ac5b3d11 100644 --- a/src/timestep_limiter.h +++ b/src/timestep_limiter.h @@ -22,6 +22,9 @@ /* Config parameters. */ #include "../config.h" +/* Local headers. */ +#include "kick.h" + /** * @brief Wakes up a particle by rewinding it's kick1 back in time and applying * a new one such that the particle becomes active again in the next time-step.