diff --git a/src/cell.h b/src/cell.h index cfb5778f06d2d436f3844e33adf2bb31761aeaaa..6beadfd571838fb1a255c2977adddcbd994a4241 100644 --- a/src/cell.h +++ b/src/cell.h @@ -671,6 +671,68 @@ int cell_can_use_pair_mm(const struct cell *ci, const struct cell *cj, int cell_can_use_pair_mm_rebuild(const struct cell *ci, const struct cell *cj, const struct engine *e, const struct space *s); +/** + * @brief Compute the square of the minimal distance between any two points in + * two cells of the same size + * + * @param ci The first #cell. + * @param cj The second #cell. + * @param periodic Are we using periodic BCs? + * @param dim The dimensions of the simulation volume + */ +__attribute__((always_inline)) INLINE static double cell_min_dist2_same_size( + const struct cell *restrict ci, const struct cell *restrict cj, + const int periodic, const double dim[3]) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->width[0] != cj->width[0]) error("Cells of different size!"); + if (ci->width[1] != cj->width[1]) error("Cells of different size!"); + if (ci->width[2] != cj->width[2]) error("Cells of different size!"); +#endif + + const double cix_min = ci->loc[0]; + const double ciy_min = ci->loc[1]; + const double ciz_min = ci->loc[2]; + const double cjx_min = cj->loc[0]; + const double cjy_min = cj->loc[1]; + const double cjz_min = cj->loc[2]; + + const double cix_max = ci->loc[0] + ci->width[0]; + const double ciy_max = ci->loc[1] + ci->width[1]; + const double ciz_max = ci->loc[2] + ci->width[2]; + const double cjx_max = cj->loc[0] + cj->width[0]; + const double cjy_max = cj->loc[1] + cj->width[1]; + const double cjz_max = cj->loc[2] + cj->width[2]; + + if (periodic) { + + const double dx = min4(fabs(nearest(cix_min - cjx_min, dim[0])), + fabs(nearest(cix_min - cjx_max, dim[0])), + fabs(nearest(cix_max - cjx_min, dim[0])), + fabs(nearest(cix_max - cjx_max, dim[0]))); + + const double dy = min4(fabs(nearest(ciy_min - cjy_min, dim[1])), + fabs(nearest(ciy_min - cjy_max, dim[1])), + fabs(nearest(ciy_max - cjy_min, dim[1])), + fabs(nearest(ciy_max - cjy_max, dim[1]))); + + const double dz = min4(fabs(nearest(ciz_min - cjz_min, dim[2])), + fabs(nearest(ciz_min - cjz_max, dim[2])), + fabs(nearest(ciz_max - cjz_min, dim[2])), + fabs(nearest(ciz_max - cjz_max, dim[2]))); + + return dx * dx + dy * dy + dz * dz; + + } else { + + const double dx = min(fabs(cix_max - cjx_min), fabs(cix_min - cjx_max)); + const double dy = min(fabs(ciy_max - cjy_min), fabs(ciy_min - cjy_max)); + const double dz = min(fabs(ciz_max - cjz_min), fabs(ciz_min - cjz_max)); + + return dx * dx + dy * dy + dz * dz; + } +} + /* Inlined functions (for speed). */ /** diff --git a/src/engine.c b/src/engine.c index de1e269ca4ada16750e33fa5f897d716f65a7953..3a1c0bbe47779478ebc29dbcb400b9089d16936e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -3629,13 +3629,20 @@ void engine_makeproxies(struct engine *e) { const int with_gravity = (e->policy & engine_policy_self_gravity); const double theta_crit_inv = e->gravity_properties->theta_crit_inv; const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance = e->mesh->r_cut_max; + const double max_mesh_dist = e->mesh->r_cut_max; + const double max_mesh_dist2 = max_mesh_dist * max_mesh_dist; - /* Maximal distance between CoMs and any particle in the cell */ - const double r_max2 = cell_width[0] * cell_width[0] + - cell_width[1] * cell_width[1] + - cell_width[2] * cell_width[2]; - const double r_max = sqrt(r_max2); + /* Distance between centre of the cell and corners */ + const double r_diag2 = cell_width[0] * cell_width[0] + + cell_width[1] * cell_width[1] + + cell_width[2] * cell_width[2]; + const double r_diag = 0.5 * sqrt(r_diag2); + + /* Maximal distance from a shifted CoM to centre of cell */ + const double delta_CoM = engine_max_proxy_centre_frac * r_diag; + + /* Maximal distance from shifted CoM to any corner */ + const double r_max = r_diag + 2. * delta_CoM; /* Prepare the proxies and the proxy index. */ if (e->proxy_ind == NULL) @@ -3645,20 +3652,20 @@ void engine_makeproxies(struct engine *e) { e->nr_proxies = 0; /* Compute how many cells away we need to walk */ - int delta = 1; /*hydro case */ + int delta_cells = 1; /*hydro case */ /* Gravity needs to take the opening angle into account */ if (with_gravity) { const double distance = 2. * r_max * theta_crit_inv; - delta = (int)(distance / cells[0].dmin) + 1; + delta_cells = (int)(distance / cells[0].dmin) + 1; } /* Turn this into upper and lower bounds for loops */ - int delta_m = delta; - int delta_p = delta; + int delta_m = delta_cells; + int delta_p = delta_cells; /* Special case where every cell is in range of every other one */ - if (delta >= cdim[0] / 2) { + if (delta_cells >= cdim[0] / 2) { if (cdim[0] % 2 == 0) { delta_m = cdim[0] / 2; delta_p = cdim[0] / 2 - 1; @@ -3673,46 +3680,35 @@ void engine_makeproxies(struct engine *e) { message( "Looking for proxies up to %d top-level cells away (delta_m=%d " "delta_p=%d)", - delta, delta_m, delta_p); + delta_cells, delta_m, delta_p); /* Loop over each cell in the space. */ - int ind[3]; - for (ind[0] = 0; ind[0] < cdim[0]; ind[0]++) { - for (ind[1] = 0; ind[1] < cdim[1]; ind[1]++) { - for (ind[2] = 0; ind[2] < cdim[2]; ind[2]++) { + for (int i = 0; i < cdim[0]; i++) { + for (int j = 0; j < cdim[1]; j++) { + for (int k = 0; k < cdim[2]; k++) { /* Get the cell ID. */ - const int cid = cell_getid(cdim, ind[0], ind[1], ind[2]); - - /* and it's location */ - const double loc_i[3] = {cells[cid].loc[0], cells[cid].loc[1], - cells[cid].loc[2]}; - - /* Loop over all its neighbours (periodic). */ - for (int i = -delta_m; i <= delta_p; i++) { - int ii = ind[0] + i; - if (ii >= cdim[0]) - ii -= cdim[0]; - else if (ii < 0) - ii += cdim[0]; - for (int j = -delta_m; j <= delta_p; j++) { - int jj = ind[1] + j; - if (jj >= cdim[1]) - jj -= cdim[1]; - else if (jj < 0) - jj += cdim[1]; - for (int k = -delta_m; k <= delta_p; k++) { - int kk = ind[2] + k; - if (kk >= cdim[2]) - kk -= cdim[2]; - else if (kk < 0) - kk += cdim[2]; + const int cid = cell_getid(cdim, i, j, k); + + /* Loop over all its neighbours neighbours in range. */ + for (int ii = -delta_m; ii <= delta_p; ii++) { + int iii = i + ii; + if (!periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -delta_m; jj <= delta_p; jj++) { + int jjj = j + jj; + if (!periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -delta_m; kk <= delta_p; kk++) { + int kkk = k + kk; + if (!periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; /* Get the cell ID. */ - const int cjd = cell_getid(cdim, ii, jj, kk); + const int cjd = cell_getid(cdim, iii, jjj, kkk); - /* Early abort (same cell) */ - if (cid == cjd) continue; + /* Early abort */ + if (cid >= cjd) continue; /* Early abort (both same node) */ if (cells[cid].nodeID == nodeID && cells[cjd].nodeID == nodeID) @@ -3732,15 +3728,12 @@ void engine_makeproxies(struct engine *e) { /* This is super-ugly but checks for direct neighbours */ /* with periodic BC */ - if (((abs(ind[0] - ii) <= 1 || - abs(ind[0] - ii - cdim[0]) <= 1 || - abs(ind[0] - ii + cdim[0]) <= 1) && - (abs(ind[1] - jj) <= 1 || - abs(ind[1] - jj - cdim[1]) <= 1 || - abs(ind[1] - jj + cdim[1]) <= 1) && - (abs(ind[2] - kk) <= 1 || - abs(ind[2] - kk - cdim[2]) <= 1 || - abs(ind[2] - kk + cdim[2]) <= 1))) + if (((abs(i - iii) <= 1 || abs(i - iii - cdim[0]) <= 1 || + abs(i - iii + cdim[0]) <= 1) && + (abs(j - jjj) <= 1 || abs(j - jjj - cdim[1]) <= 1 || + abs(j - jjj + cdim[1]) <= 1) && + (abs(k - kkk) <= 1 || abs(k - kkk - cdim[2]) <= 1 || + abs(k - kkk + cdim[2]) <= 1))) proxy_type |= (int)proxy_cell_type_hydro; } @@ -3754,44 +3747,28 @@ void engine_makeproxies(struct engine *e) { for an M2L interaction and hence require a proxy as this pair of cells cannot rely on just an M2L calculation. */ - const double loc_j[3] = {cells[cjd].loc[0], cells[cjd].loc[1], - cells[cjd].loc[2]}; - - /* Start with the distance between the cell centres. */ - double dx = loc_i[0] - loc_j[0]; - double dy = loc_i[1] - loc_j[1]; - double dz = loc_i[2] - loc_j[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - - /* Add to it for the case where the future CoMs are in the - * corners */ - dx += cell_width[0]; - dy += cell_width[1]; - dz += cell_width[2]; - - /* This is a crazy upper-bound but the best we can do */ - const double r2 = dx * dx + dy * dy + dz * dz; + /* Minimal distance between any two points in the cells */ + const double min_dist_centres2 = cell_min_dist2_same_size( + &cells[cid], &cells[cjd], periodic, dim); - /* Minimal distance between any pair of particles */ - const double min_radius = sqrt(r2) - 2. * r_max; + /* Let's now assume the CoMs will shift a bit */ + const double min_dist_CoM = + sqrt(min_dist_centres2) - 2. * delta_CoM; + const double min_dist_CoM2 = min_dist_CoM * min_dist_CoM; /* Are we beyond the distance where the truncated forces are 0 * but not too far such that M2L can be used? */ if (periodic) { - if ((min_radius < max_distance) && - (!gravity_M2L_accept(r_max, r_max, theta_crit2, r2))) + if ((min_dist_CoM2 < max_mesh_dist2) && + (!gravity_M2L_accept(r_max, r_max, theta_crit2, + min_dist_CoM2))) proxy_type |= (int)proxy_cell_type_gravity; } else { - if (!gravity_M2L_accept(r_max, r_max, theta_crit2, r2)) + if (!gravity_M2L_accept(r_max, r_max, theta_crit2, + min_dist_CoM2)) proxy_type |= (int)proxy_cell_type_gravity; } } @@ -3803,8 +3780,8 @@ void engine_makeproxies(struct engine *e) { if (cells[cid].nodeID == nodeID && cells[cjd].nodeID != nodeID) { /* Do we already have a relationship with this node? */ - int pid = e->proxy_ind[cells[cjd].nodeID]; - if (pid < 0) { + int proxy_id = e->proxy_ind[cells[cjd].nodeID]; + if (proxy_id < 0) { if (e->nr_proxies == engine_maxproxies) error("Maximum number of proxies exceeded."); @@ -3814,24 +3791,31 @@ void engine_makeproxies(struct engine *e) { /* Store the information */ e->proxy_ind[cells[cjd].nodeID] = e->nr_proxies; - pid = e->nr_proxies; + proxy_id = e->nr_proxies; e->nr_proxies += 1; + + /* Check the maximal proxy limit */ + if ((size_t)proxy_id > 8 * sizeof(long long)) + error( + "Created more than %zd proxies. cell.mpi.sendto will " + "overflow.", + 8 * sizeof(long long)); } /* Add the cell to the proxy */ - proxy_addcell_in(&proxies[pid], &cells[cjd], proxy_type); - proxy_addcell_out(&proxies[pid], &cells[cid], proxy_type); + proxy_addcell_in(&proxies[proxy_id], &cells[cjd], proxy_type); + proxy_addcell_out(&proxies[proxy_id], &cells[cid], proxy_type); /* Store info about where to send the cell */ - cells[cid].mpi.sendto |= (1ULL << pid); + cells[cid].mpi.sendto |= (1ULL << proxy_id); } /* Same for the symmetric case? */ if (cells[cjd].nodeID == nodeID && cells[cid].nodeID != nodeID) { /* Do we already have a relationship with this node? */ - int pid = e->proxy_ind[cells[cid].nodeID]; - if (pid < 0) { + int proxy_id = e->proxy_ind[cells[cid].nodeID]; + if (proxy_id < 0) { if (e->nr_proxies == engine_maxproxies) error("Maximum number of proxies exceeded."); @@ -3841,16 +3825,23 @@ void engine_makeproxies(struct engine *e) { /* Store the information */ e->proxy_ind[cells[cid].nodeID] = e->nr_proxies; - pid = e->nr_proxies; + proxy_id = e->nr_proxies; e->nr_proxies += 1; + + /* Check the maximal proxy limit */ + if ((size_t)proxy_id > 8 * sizeof(long long)) + error( + "Created more than %zd proxies. cell.mpi.sendto will " + "overflow.", + 8 * sizeof(long long)); } /* Add the cell to the proxy */ - proxy_addcell_in(&proxies[pid], &cells[cid], proxy_type); - proxy_addcell_out(&proxies[pid], &cells[cjd], proxy_type); + proxy_addcell_in(&proxies[proxy_id], &cells[cid], proxy_type); + proxy_addcell_out(&proxies[proxy_id], &cells[cjd], proxy_type); /* Store info about where to send the cell */ - cells[cjd].mpi.sendto |= (1ULL << pid); + cells[cjd].mpi.sendto |= (1ULL << proxy_id); } } } diff --git a/src/engine.h b/src/engine.h index 50eef4b314da4c294a86e59e29542859f7f402f2..96d90a5e79d36a5daaaa66d6cca6f16aa337701e 100644 --- a/src/engine.h +++ b/src/engine.h @@ -98,6 +98,7 @@ enum engine_step_properties { #define engine_maxproxies 64 #define engine_tasksreweight 1 #define engine_parts_size_grow 1.05 +#define engine_max_proxy_centre_frac 0.2 #define engine_redistribute_alloc_margin 1.2 #define engine_default_energy_file_name "energy" #define engine_default_timesteps_file_name "timesteps" diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index dda69319608f06bb73de58fdfa4c41155ca6fd88..279e4c95cfdb046cf2c6dc60ecfb0e923923ddc7 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -766,7 +766,7 @@ void engine_make_hierarchical_tasks_stars(struct engine *e, struct cell *c) { void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, void *extra_data) { - struct engine *e = ((struct engine **)extra_data)[0]; + struct engine *e = (struct engine *)extra_data; struct space *s = e->s; struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; @@ -776,6 +776,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, struct cell *cells = s->cells_top; const double theta_crit = e->gravity_properties->theta_crit; const double max_distance = e->mesh->r_cut_max; + const double max_distance2 = max_distance * max_distance; /* Compute how many cells away we need to walk */ const double distance = 2.5 * cells[0].width[0] / theta_crit; @@ -811,91 +812,50 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, /* Skip cells without gravity particles */ if (ci->grav.count == 0) continue; - /* Is that cell local ? */ - if (ci->nodeID != nodeID) continue; - - /* If the cells is local build a self-interaction */ - scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL); - - /* Recover the multipole information */ - const struct gravity_tensors *const multi_i = ci->grav.multipole; - const double CoM_i[3] = {multi_i->CoM[0], multi_i->CoM[1], multi_i->CoM[2]}; - -#ifdef SWIFT_DEBUG_CHECKS - if (cell_getid(cdim, i, j, k) != cid) - error("Incorrect calculation of indices (i,j,k)=(%d,%d,%d) cid=%d", i, j, - k, cid); - - if (multi_i->r_max != multi_i->r_max_rebuild) - error( - "Multipole size not equal ot it's size after rebuild. But we just " - "rebuilt..."); -#endif + /* If the cell is local build a self-interaction */ + if (ci->nodeID == nodeID) { + scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, + NULL); + } /* Loop over every other cell within (Manhattan) range delta */ - for (int x = -delta_m; x <= delta_p; x++) { - int ii = i + x; - if (ii >= cdim[0]) - ii -= cdim[0]; - else if (ii < 0) - ii += cdim[0]; - for (int y = -delta_m; y <= delta_p; y++) { - int jj = j + y; - if (jj >= cdim[1]) - jj -= cdim[1]; - else if (jj < 0) - jj += cdim[1]; - for (int z = -delta_m; z <= delta_p; z++) { - int kk = k + z; - if (kk >= cdim[2]) - kk -= cdim[2]; - else if (kk < 0) - kk += cdim[2]; + for (int ii = -delta_m; ii <= delta_p; ii++) { + int iii = i + ii; + if (!periodic && (iii < 0 || iii >= cdim[0])) continue; + iii = (iii + cdim[0]) % cdim[0]; + for (int jj = -delta_m; jj <= delta_p; jj++) { + int jjj = j + jj; + if (!periodic && (jjj < 0 || jjj >= cdim[1])) continue; + jjj = (jjj + cdim[1]) % cdim[1]; + for (int kk = -delta_m; kk <= delta_p; kk++) { + int kkk = k + kk; + if (!periodic && (kkk < 0 || kkk >= cdim[2])) continue; + kkk = (kkk + cdim[2]) % cdim[2]; /* Get the cell */ - const int cjd = cell_getid(cdim, ii, jj, kk); + const int cjd = cell_getid(cdim, iii, jjj, kkk); struct cell *cj = &cells[cjd]; -#ifdef SWIFT_DEBUG_CHECKS - const int iii = cjd / (cdim[1] * cdim[2]); - const int jjj = (cjd / cdim[2]) % cdim[1]; - const int kkk = cjd % cdim[2]; - - if (ii != iii || jj != jjj || kk != kkk) - error( - "Incorrect calculation of indices (iii,jjj,kkk)=(%d,%d,%d) " - "cjd=%d", - iii, jjj, kkk, cjd); -#endif - - /* Avoid duplicates of local pairs*/ - if (cid <= cjd && cj->nodeID == nodeID) continue; - - /* Skip cells without gravity particles */ - if (cj->grav.count == 0) continue; + /* Avoid duplicates, empty cells and completely foreign pairs */ + if (cid >= cjd || cj->grav.count == 0 || + (ci->nodeID != nodeID && cj->nodeID != nodeID)) + continue; /* Recover the multipole information */ - const struct gravity_tensors *const multi_j = cj->grav.multipole; - - /* Get the distance between the CoMs */ - double dx = CoM_i[0] - multi_j->CoM[0]; - double dy = CoM_i[1] - multi_j->CoM[1]; - double dz = CoM_i[2] - multi_j->CoM[2]; - - /* Apply BC */ - if (periodic) { - dx = nearest(dx, dim[0]); - dy = nearest(dy, dim[1]); - dz = nearest(dz, dim[2]); - } - const double r2 = dx * dx + dy * dy + dz * dz; + const struct gravity_tensors *multi_i = ci->grav.multipole; + const struct gravity_tensors *multi_j = cj->grav.multipole; + + if (multi_i == NULL && ci->nodeID != nodeID) + error("Multipole of ci was not exchanged properly via the proxies"); + if (multi_j == NULL && cj->nodeID != nodeID) + error("Multipole of cj was not exchanged properly via the proxies"); /* Minimal distance between any pair of particles */ - const double min_radius = - sqrt(r2) - (multi_i->r_max + multi_j->r_max); + const double min_radius2 = + cell_min_dist2_same_size(ci, cj, periodic, dim); /* Are we beyond the distance where the truncated forces are 0 ?*/ - if (periodic && min_radius > max_distance) continue; + if (periodic && min_radius2 > max_distance2) continue; /* Are the cells too close for a MM interaction ? */ if (!cell_can_use_pair_mm_rebuild(ci, cj, e, s)) { @@ -903,6 +863,54 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, /* Ok, we need to add a direct pair calculation */ scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci, cj); + +#ifdef SWIFT_DEBUG_CHECKS +#ifdef WITH_MPI + + /* Let's cross-check that we had a proxy for that cell */ + if (ci->nodeID == nodeID && cj->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[cj->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", cj->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (; n < p->nr_cells_in; n++) + if (p->cells_in[n] == cj) { + break; + } + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "grav task!", + cjd); + } else if (cj->nodeID == nodeID && ci->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[ci->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", ci->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (; n < p->nr_cells_in; n++) + if (p->cells_in[n] == ci) { + break; + } + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "grav task!", + cid); + } +#endif /* WITH_MPI */ +#endif /* SWIFT_DEBUG_CHECKS */ } } } @@ -932,26 +940,6 @@ void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements, } } -/** - * @brief Constructs the top-level tasks for the short-range gravity - * interactions (master function). - * - * - Create the FFT task and the array of gravity ghosts. - * - Call the mapper function to create the other tasks. - * - * @param e The #engine. - */ -void engine_make_self_gravity_tasks(struct engine *e) { - - struct space *s = e->s; - struct task **ghosts = NULL; - - /* Create the multipole self and pair tasks. */ - void *extra_data[2] = {e, ghosts}; - threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL, - s->nr_cells, 1, 0, extra_data); -} - /** * @brief Constructs the top-level tasks for the external gravity. * @@ -1768,6 +1756,8 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Get the cell index. */ const int cid = (size_t)(map_data) + ind; + + /* Integer indices of the cell in the top-level grid */ const int i = cid / (cdim[1] * cdim[2]); const int j = (cid / cdim[2]) % cdim[1]; const int k = cid % cdim[2]; @@ -1778,10 +1768,11 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Skip cells without hydro particles */ if (ci->hydro.count == 0) continue; - /* If the cells is local build a self-interaction */ - if (ci->nodeID == nodeID) + /* If the cell is local build a self-interaction */ + if (ci->nodeID == nodeID) { scheduler_addtask(sched, task_type_self, task_subtype_density, 0, 0, ci, NULL); + } /* Now loop over all the neighbours of this cell */ for (int ii = -1; ii < 2; ii++) { @@ -1810,6 +1801,50 @@ void engine_make_hydroloop_tasks_mapper(void *map_data, int num_elements, const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; scheduler_addtask(sched, task_type_pair, task_subtype_density, sid, 0, ci, cj); + +#ifdef SWIFT_DEBUG_CHECKS +#ifdef WITH_MPI + + /* Let's cross-check that we had a proxy for that cell */ + if (ci->nodeID == nodeID && cj->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[cj->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", cj->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (n = 0; n < p->nr_cells_in; n++) + if (p->cells_in[n] == cj) break; + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "hydro task!", + cjd); + } else if (cj->nodeID == nodeID && ci->nodeID != engine_rank) { + + /* Find the proxy for this node */ + const int proxy_id = e->proxy_ind[ci->nodeID]; + if (proxy_id < 0) + error("No proxy exists for that foreign node %d!", ci->nodeID); + + const struct proxy *p = &e->proxies[proxy_id]; + + /* Check whether the cell exists in the proxy */ + int n = 0; + for (n = 0; n < p->nr_cells_in; n++) + if (p->cells_in[n] == ci) break; + if (n == p->nr_cells_in) + error( + "Cell %d not found in the proxy but trying to construct " + "hydro task!", + cid); + } +#endif /* WITH_MPI */ +#endif /* SWIFT_DEBUG_CHECKS */ } } } @@ -1908,8 +1943,17 @@ void engine_maketasks(struct engine *e) { s->nr_cells, 1, 0, e); } + if (e->verbose) + message("Making stellar feedback tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); + + tic2 = getticks(); + /* Add the self gravity tasks. */ - if (e->policy & engine_policy_self_gravity) engine_make_self_gravity_tasks(e); + if (e->policy & engine_policy_self_gravity) { + threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL, + s->nr_cells, 1, 0, e); + } if (e->verbose) message("Making gravity tasks took %.3f %s.", diff --git a/src/minmax.h b/src/minmax.h index 90dd87968a94d9601a87fd3b826000c166a98966..e4d7c8788ea1e43d1c296a212193049a94347949 100644 --- a/src/minmax.h +++ b/src/minmax.h @@ -71,4 +71,36 @@ max(_temp, _z); \ }) +/** + * @brief Minimum of four numbers + * + * This macro evaluates its arguments exactly once. + */ +#define min4(x, y, z, w) \ + ({ \ + const __typeof__(x) _x = (x); \ + const __typeof__(y) _y = (y); \ + const __typeof__(z) _z = (z); \ + const __typeof__(w) _w = (w); \ + const __typeof__(x) _temp1 = min(_x, _y); \ + const __typeof__(x) _temp2 = min(_z, _w); \ + min(_temp1, _temp2); \ + }) + +/** + * @brief Maximum of four numbers + * + * This macro evaluates its arguments exactly once. + */ +#define max4(x, y, z, w) \ + ({ \ + const __typeof__(x) _x = (x); \ + const __typeof__(y) _y = (y); \ + const __typeof__(z) _z = (z); \ + const __typeof__(w) _w = (w); \ + const __typeof__(x) _temp1 = max(_x, _y); \ + const __typeof__(x) _temp2 = max(_z, _w); \ + max(_temp1, _temp2); \ + }) + #endif /* SWIFT_MINMAX_H */ diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 5bcd57d643911703bc0f4e19f17fdb63a11a5c12..2ed2495154195d09af2825eab4c277150b73ec01 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -1714,7 +1714,7 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; const double theta_crit2 = e->gravity_properties->theta_crit2; - const double max_distance = e->mesh->r_cut_max; + const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; TIMER_TIC; @@ -1759,24 +1759,11 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, /* Skip empty cells */ if (multi_j->m_pole.M_000 == 0.f) continue; - /* Get the distance between the CoMs at the last rebuild*/ - double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; - double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; - double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2]; - - /* Apply BC */ - if (periodic) { - dx_r = nearest(dx_r, dim[0]); - dy_r = nearest(dy_r, dim[1]); - dz_r = nearest(dz_r, dim[2]); - } - const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; - - const double max_radius = - sqrt(r2_rebuild) - (multi_top->r_max_rebuild + multi_j->r_max_rebuild); + /* Minimal distance between any pair of particles */ + const double min_radius2 = cell_min_dist2_same_size(ci, cj, periodic, dim); /* Are we beyond the distance where the truncated forces are 0 ?*/ - if (periodic && max_radius > max_distance) { + if (periodic && min_radius2 > max_distance2) { #ifdef SWIFT_DEBUG_CHECKS /* Need to account for the interactions we missed */ @@ -1790,6 +1777,19 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, continue; } + /* Get the distance between the CoMs at the last rebuild*/ + double dx_r = CoM_rebuild_top[0] - multi_j->CoM_rebuild[0]; + double dy_r = CoM_rebuild_top[1] - multi_j->CoM_rebuild[1]; + double dz_r = CoM_rebuild_top[2] - multi_j->CoM_rebuild[2]; + + /* Apply BC */ + if (periodic) { + dx_r = nearest(dx_r, dim[0]); + dy_r = nearest(dy_r, dim[1]); + dz_r = nearest(dz_r, dim[2]); + } + const double r2_rebuild = dx_r * dx_r + dy_r * dy_r + dz_r * dz_r; + /* Are we in charge of this cell pair? */ if (gravity_M2L_accept(multi_top->r_max_rebuild, multi_j->r_max_rebuild, theta_crit2, r2_rebuild)) {