Commit 24be404f authored by James Willis's avatar James Willis
Browse files

Merge branch 'dopair2-vectorisation' into debug_interactions

parents ea50f018 c62da58f
......@@ -55,7 +55,9 @@ tests/brute_force_125_standard.dat
tests/swift_dopair_125_standard.dat
tests/brute_force_125_perturbed.dat
tests/swift_dopair_125_perturbed.dat
tests/brute_force_active.dat
tests/brute_force_pair_active.dat
tests/brute_force_dopair2_active.dat
tests/swift_dopair2_force_active.dat
tests/brute_force_periodic_BC_perturbed.dat
tests/swift_dopair_active.dat
tests/test_nonsym_density_serial.dat
......
......@@ -30,6 +30,8 @@
#include "sort_part.h"
#include "vector.h"
#include <float.h>
#define NUM_VEC_PROC 2
#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)
......@@ -407,12 +409,15 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
float fake_pix = 2.0f * parts_i[sort_i[ci->count - 1].i].x[0];
const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), -(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
for (int i = ci->count - first_pi_align;
i < ci->count - first_pi_align + VEC_SIZE; i++) {
x[i] = fake_pix;
y[i] = 1.f;
z[i] = 1.f;
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = 1.f;
m[i] = 1.f;
......@@ -477,11 +482,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
float fake_pjx = 2.0f * cj->parts[sort_j[cj->count - 1].i].x[0];
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx), -(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) {
xj[i] = fake_pjx;
yj[i] = 1.f;
zj[i] = 1.f;
xj[i] = pos_padded_j[0];
yj[i] = pos_padded_j[1];
zj[i] = pos_padded_j[2];
hj[i] = 1.f;
mj[i] = 1.f;
......@@ -491,6 +497,194 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
}
}
/**
* @brief Populate caches by only reading particles that are within range of
* each other within the adjoining cell.Also read the particles into the cache
* in sorted order.
*
* @param ci The i #cell.
* @param cj The j #cell.
* @param ci_cache The #cache for cell ci.
* @param cj_cache The #cache for cell cj.
* @param sort_i The array of sorted particle indices for cell ci.
* @param sort_j The array of sorted particle indices for cell ci.
* @param shift The amount to shift the particle positions to account for BCs
* @param first_pi The first particle in cell ci that is in range.
* @param last_pj The last particle in cell cj that is in range.
* @param num_vec_proc Number of vectors that will be used to process the
* interaction.
*/
__attribute__((always_inline)) INLINE void
cache_read_two_partial_cells_sorted_force(
const struct cell *const ci, const struct cell *const cj,
struct cache *const ci_cache, struct cache *const cj_cache,
const struct entry *restrict sort_i, const struct entry *restrict sort_j,
const double *const shift, int *first_pi, int *last_pj,
const int num_vec_proc) {
int idx;
/* Pad number of particles read to the vector size. */
int rem = (ci->count - *first_pi) % (num_vec_proc * VEC_SIZE);
if (rem != 0) {
int pad = (num_vec_proc * VEC_SIZE) - rem;
if (*first_pi - pad >= 0) *first_pi -= pad;
}
rem = *last_pj % (num_vec_proc * VEC_SIZE);
if (rem != 0) {
int pad = (num_vec_proc * VEC_SIZE) - rem;
if (*last_pj + pad < cj->count) *last_pj += pad;
}
int first_pi_align = *first_pi;
int last_pj_align = *last_pj;
const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts;
double loc[3];
loc[0] = ci->loc[0];
loc[1] = ci->loc[1];
loc[2] = ci->loc[2];
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, h, ci_cache->h, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vx, ci_cache->vx, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, rho, ci_cache->rho, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, grad_h, ci_cache->grad_h,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, pOrho2, ci_cache->pOrho2,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, balsara, ci_cache->balsara,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed,
SWIFT_CACHE_ALIGNMENT);
int ci_cache_count = ci->count - first_pi_align;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
* used instead of double precision. Also shift the cell ci, particles
* positions
* due to BCs but leave cell cj. */
for (int i = 0; i < ci_cache_count; i++) {
/* Make sure ci_cache is filled from the first element. */
idx = sort_i[i + first_pi_align].i;
x[i] = (float)(parts_i[idx].x[0] - loc[0] - shift[0]);
y[i] = (float)(parts_i[idx].x[1] - loc[1] - shift[1]);
z[i] = (float)(parts_i[idx].x[2] - loc[2] - shift[2]);
h[i] = parts_i[idx].h;
m[i] = parts_i[idx].mass;
vx[i] = parts_i[idx].v[0];
vy[i] = parts_i[idx].v[1];
vz[i] = parts_i[idx].v[2];
rho[i] = parts_i[idx].rho;
grad_h[i] = parts_i[idx].force.f;
pOrho2[i] = parts_i[idx].force.P_over_rho2;
balsara[i] = parts_i[idx].force.balsara;
soundspeed[i] = parts_i[idx].force.soundspeed;
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx), -(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->parts[0].h;
for (int i = ci->count - first_pi_align;
i < ci->count - first_pi_align + VEC_SIZE; i++) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = h_padded;
m[i] = 1.f;
vx[i] = 1.f;
vy[i] = 1.f;
vz[i] = 1.f;
rho[i] = 1.f;
grad_h[i] = 1.f;
pOrho2[i] = 1.f;
balsara[i] = 1.f;
soundspeed[i] = 1.f;
}
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr(float, xj, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, yj, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, zj, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, hj, cj_cache->h, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, mj, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vxj, cj_cache->vx, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vyj, cj_cache->vy, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, rhoj, cj_cache->rho, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, grad_hj, cj_cache->grad_h,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, pOrho2j, cj_cache->pOrho2,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, balsaraj, cj_cache->balsara,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, soundspeedj, cj_cache->soundspeed,
SWIFT_CACHE_ALIGNMENT);
for (int i = 0; i <= last_pj_align; i++) {
idx = sort_j[i].i;
xj[i] = (float)(parts_j[idx].x[0] - loc[0]);
yj[i] = (float)(parts_j[idx].x[1] - loc[1]);
zj[i] = (float)(parts_j[idx].x[2] - loc[2]);
hj[i] = parts_j[idx].h;
mj[i] = parts_j[idx].mass;
vxj[i] = parts_j[idx].v[0];
vyj[i] = parts_j[idx].v[1];
vzj[i] = parts_j[idx].v[2];
rhoj[i] = parts_j[idx].rho;
grad_hj[i] = parts_j[idx].force.f;
pOrho2j[i] = parts_j[idx].force.P_over_rho2;
balsaraj[i] = parts_j[idx].force.balsara;
soundspeedj[i] = parts_j[idx].force.soundspeed;
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx), -(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
const float h_padded_j = cj->parts[0].h;
for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) {
xj[i] = pos_padded_j[0];
yj[i] = pos_padded_j[1];
zj[i] = pos_padded_j[2];
hj[i] = h_padded_j;
mj[i] = 1.f;
vxj[i] = 1.f;
vyj[i] = 1.f;
vzj[i] = 1.f;
rhoj[i] = 1.f;
grad_hj[i] = 1.f;
pOrho2j[i] = 1.f;
balsaraj[i] = 1.f;
soundspeedj[i] = 1.f;
}
}
/* @brief Clean the memory allocated by a #cache object.
*
* @param c The #cache to clean.
......
......@@ -175,6 +175,11 @@ __attribute__((always_inline)) INLINE static void hydro_timestep_extra(
__attribute__((always_inline)) INLINE static void hydro_init_part(
struct part *restrict p, const struct hydro_space *hs) {
#ifdef DEBUG_INTERACTIONS
for (int i = 0; i < NUM_OF_NEIGHBOURS; ++i) p->ids_ngbs_density[i] = -1;
p->num_ngb_density = 0;
#endif
p->rho = 0.f;
p->density.wcount = 0.f;
p->density.wcount_dh = 0.f;
......@@ -311,6 +316,11 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
__attribute__((always_inline)) INLINE static void hydro_reset_acceleration(
struct part *restrict p) {
#ifdef DEBUG_INTERACTIONS
for (int i = 0; i < NUM_OF_NEIGHBOURS; ++i) p->ids_ngbs_force[i] = -1;
p->num_ngb_force = 0;
#endif
/* Reset the acceleration. */
p->a_hydro[0] = 0.0f;
p->a_hydro[1] = 0.0f;
......
......@@ -103,6 +103,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj->density.rot_v[0] += facj * curlvr[0];
pj->density.rot_v[1] += facj * curlvr[1];
pj->density.rot_v[2] += facj * curlvr[2];
/* Update ngb counters */
#ifdef DEBUG_INTERACTIONS
if(pi->id == CHECK_PART_ID) pi->ids_ngbs_density[pi->num_ngb_density] = pj->id;
++pi->num_ngb_density;
if(pj->id == CHECK_PART_ID) pj->ids_ngbs_density[pj->num_ngb_density] = pi->id;
++pj->num_ngb_density;
#endif
}
/**
......@@ -151,6 +160,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.rot_v[0] += fac * curlvr[0];
pi->density.rot_v[1] += fac * curlvr[1];
pi->density.rot_v[2] += fac * curlvr[2];
#ifdef DEBUG_INTERACTIONS
/* Update ngb counters */
if(pi->id == CHECK_PART_ID) pi->ids_ngbs_density[pi->num_ngb_density] = pj->id;
++pi->num_ngb_density;
#endif
}
#ifdef WITH_VECTORIZATION
......@@ -480,6 +496,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
/* Change in entropy */
pi->entropy_dt += mj * visc_term * dvdr;
pj->entropy_dt += mi * visc_term * dvdr;
#ifdef DEBUG_INTERACTIONS
/* Update ngb counters */
if(pi->id == CHECK_PART_ID) {
pi->ids_ngbs_force[pi->num_ngb_force] = pj->id;
}
++pi->num_ngb_force;
if(pj->id == CHECK_PART_ID) {
pj->ids_ngbs_force[pj->num_ngb_force] = pi->id;
}
++pj->num_ngb_force;
#endif
}
/**
......@@ -569,6 +598,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
/* Change in entropy */
pi->entropy_dt += mj * visc_term * dvdr;
#ifdef DEBUG_INTERACTIONS
/* Update ngb counters */
if(pi->id == CHECK_PART_ID) {
pi->ids_ngbs_force[pi->num_ngb_force] = pj->id;
}
++pi->num_ngb_force;
#endif
}
#ifdef WITH_VECTORIZATION
......@@ -634,7 +672,7 @@ runner_iact_nonsym_1_vec_force(
hjd_inv = pow_dimension_plus_one_vec(hj_inv);
xj.v = vec_mul(r.v, hj_inv.v);
/* Calculate the kernel for two particles. */
/* Calculate the kernel. */
kernel_eval_dWdx_force_vec(&xj, &wj_dx);
wj_dr.v = vec_mul(hjd_inv.v, wj_dx.v);
......
......@@ -77,6 +77,10 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
*num_fields = 10;
#ifdef DEBUG_INTERACTIONS
*num_fields = 14;
#endif
/* List what we want to write */
list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH,
parts, x);
......@@ -99,6 +103,17 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
parts, rho, convert_u);
list[9] = io_make_output_field_convert_part(
"Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, rho, convert_P);
#ifdef DEBUG_INTERACTIONS
list[10] = io_make_output_field("Num_ngb_density", INT, 1, UNIT_CONV_NO_UNITS,
parts, num_ngb_density);
list[11] = io_make_output_field("Num_ngb_force", INT, 1, UNIT_CONV_NO_UNITS,
parts, num_ngb_force);
list[12] = io_make_output_field("Ids_ngb_density", LONGLONG, NUM_OF_NEIGHBOURS,
UNIT_CONV_NO_UNITS, parts, ids_ngbs_density);
list[13] = io_make_output_field("Ids_ngb_force", LONGLONG, NUM_OF_NEIGHBOURS,
UNIT_CONV_NO_UNITS, parts, ids_ngbs_force);
#endif
}
/**
......
......@@ -33,6 +33,10 @@
#include "cooling_struct.h"
#define NUM_OF_NEIGHBOURS 128
#define CHECK_PART_ID 999999999999999999999
//#define CHECK_PART_ID 5673486482283
/* Extra particle data not needed during the SPH loops over neighbours. */
struct xpart {
......@@ -143,6 +147,20 @@ struct part {
#endif
#ifdef DEBUG_INTERACTIONS
/*! List of interacting particles in the density SELF and PAIR */
long long ids_ngbs_density[NUM_OF_NEIGHBOURS];
/*! List of interacting particles in the force SELF and PAIR */
long long ids_ngbs_force[NUM_OF_NEIGHBOURS];
/*! Number of interactions in the density SELF and PAIR */
int num_ngb_density;
/*! Number of interactions in the force SELF and PAIR */
int num_ngb_force;
#endif
} SWIFT_STRUCT_ALIGN;
#endif /* SWIFT_GADGET2_HYDRO_PART_H */
......@@ -1885,7 +1885,7 @@ void *runner_main(void *data) {
runner_dopair1_branch_gradient(r, ci, cj);
#endif
else if (t->subtype == task_subtype_force)
runner_dopair2_force(r, ci, cj);
runner_dopair2_branch_force(r, ci, cj);
else if (t->subtype == task_subtype_grav)
runner_dopair_grav(r, ci, cj, 1);
else
......
......@@ -32,6 +32,9 @@
#define _DOPAIR1(f) PASTE(runner_dopair1, f)
#define DOPAIR1 _DOPAIR1(FUNCTION)
#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f)
#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION)
#define _DOPAIR2(f) PASTE(runner_dopair2, f)
#define DOPAIR2 _DOPAIR2(FUNCTION)
......@@ -778,37 +781,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
const struct entry *restrict sort_j = cj->sort[sid];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e",
ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
ci->dx_max_sort_old);
}
for (int pjd = 0; pjd < cj->count; pjd++) {
const struct part *p = &cj->parts[sort_j[pjd].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e",
cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
cj->dx_max_sort_old);
}
/* Some constants used to checks that the parts are in the right frame */
const float shift_threshold_x =
2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part);
......@@ -816,7 +788,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part);
const float shift_threshold_z =
2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part);
#endif /* SWIFT_DEBUG_CHECKS */
/* Get some other useful values. */
......@@ -1025,6 +996,43 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
cj->dx_max_sort_old > space_maxreldx * cj->dmin)
error("Interacting unsorted cells.");
#ifdef SWIFT_DEBUG_CHECKS
/* Pick-out the sorted lists. */
const struct entry *restrict sort_i = ci->sort[sid];
const struct entry *restrict sort_j = cj->sort[sid];
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e",
ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
ci->dx_max_sort_old);
}
for (int pjd = 0; pjd < cj->count; pjd++) {
const struct part *p = &cj->parts[sort_j[pjd].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e",
cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
cj->dx_max_sort_old);
}
#endif /* SWIFT_DEBUG_CHECKS */
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \
(DOPAIR1_BRANCH == runner_dopair1_density_branch)
if (!sort_is_corner(sid))
......@@ -1043,7 +1051,8 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
* @param ci The first #cell.
* @param cj The second #cell.
*/
void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
const double *shift) {
struct engine *restrict e = r->e;
......@@ -1054,25 +1063,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
TIMER_TIC;
/* Anything to do here? */
if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
error("Interacting undrifted cells.");
/* Get the shift ID. */
double shift[3] = {0.0, 0.0, 0.0};
const int sid = space_getsid(e->s, &ci, &cj, shift);
/* Have the cells been sorted? */
if (!(ci->sorted & (1 << sid)) ||
ci->dx_max_sort_old > space_maxreldx * ci->dmin)
error("Interacting unsorted cells.");
if (!(cj->sorted & (1 << sid)) ||
cj->dx_max_sort_old > space_maxreldx * cj->dmin)
error("Interacting unsorted cells.");
/* Get the cutoff shift. */
/* Get the cutoff shift. */
double rshift = 0.0;
for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
......@@ -1490,6 +1481,87 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
TIMER_TOC(TIMER_DOPAIR);
}
/**
* @brief Determine which version of DOPAIR2 needs to be called depending on the
* orientation of the cells or whether DOPAIR2 needs to be called at all.
*
* @param r #runner
* @param ci #cell ci
* @param cj #cell cj
*
*/
void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
const struct engine *restrict e = r->e;
/* Anything to do here? */
if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
/* Check that cells are drifted. */
if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
error("Interacting undrifted cells.");
/* Get the sort ID. */
double shift[3] = {0.0, 0.0, 0.0};
const int sid = space_getsid(e->s, &ci, &cj, shift);
/* Have the cells been sorted? */
if (!(ci->sorted & (1 << sid)) ||
ci->dx_max_sort_old > space_maxreldx * ci->dmin)
error("Interacting unsorted cells.");
if (!(cj->sorted & (1 << sid)) ||
cj->dx_max_sort_old > space_maxreldx * cj->dmin)
error("Interacting unsorted cells.");
#ifdef SWIFT_DEBUG_CHECKS
/* Pick-out the sorted lists. */
const struct entry *restrict sort_i = ci->sort[sid];
const struct entry *restrict sort_j = cj->sort[sid];
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];