Commit c145af2f authored by James Willis's avatar James Willis
Browse files

Created a branching function for DOPAIR1, so that the scalar version is called...

Created a branching function for DOPAIR1, so that the scalar version is called if it is a corner interaction when the code is vectorised. Also fixed a bug with runner_dopair1_density_vec so that hi_max and hj_max are used to find how many particles to read into the cache.
parent 9d66a8d1
......@@ -1818,13 +1818,8 @@ void *runner_main(void *data) {
break;
case task_type_pair:
if (t->subtype == task_subtype_density) {
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
runner_dopair1_density_vec(r, ci, cj);
#else
runner_dopair1_density(r, ci, cj);
#endif
}
if (t->subtype == task_subtype_density)
runner_dopair1_branch_density(r, ci, cj);
#ifdef EXTRA_HYDRO_LOOP
else if (t->subtype == task_subtype_gradient)
runner_dopair1_gradient(r, ci, cj);
......
......@@ -885,7 +885,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
* @param ci The first #cell.
* @param cj The second #cell.
*/
void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const double *shift) {
const struct engine *restrict e = r->e;
......@@ -900,22 +900,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
TIMER_TIC;
/* Anything to do here? */
if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
error("Interacting undrifted cells.");
/* Get the sort ID. */
double shift[3] = {0.0, 0.0, 0.0};
const int sid = space_getsid(e->s, &ci, &cj, shift);
/* Have the cells been sorted? */
if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin)
runner_do_sort(r, ci, (1 << sid), 1);
if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
runner_do_sort(r, cj, (1 << sid), 1);
/* Get the cutoff shift. */
double rshift = 0.0;
for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
......@@ -1116,6 +1100,49 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
TIMER_TOC(TIMER_DOPAIR);
}
/**
* @brief Determine which version of DOPAIR1 needs to be called depending on the orientation of the cells or whether DOPAIR1 needs to be called at all.
*
* @param r #runner
* @param ci #cell ci
* @param cj #cell cj
*
*/
void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
const struct engine *restrict e = r->e;
/* Anything to do here? */
if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
/* Check that cells are drifted. */
if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
error("Interacting undrifted cells.");
/* Get the sort ID. */
double shift[3] = {0.0, 0.0, 0.0};
const int sid = space_getsid(e->s, &ci, &cj, shift);
/* Have the cells been sorted? */
if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin)
runner_do_sort(r, ci, (1 << sid), 1);
if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
runner_do_sort(r, cj, (1 << sid), 1);
/* Have the cells been sorted? */
if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
error("Trying to interact unsorted cells.");
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && (DOPAIR1_BRANCH == runner_dopair1_density_branch)
if(!sort_is_corner(sid))
runner_dopair1_density_vec(r, ci, cj, sid, shift);
else
DOPAIR1(r, ci, cj, sid, shift);
#else
DOPAIR1(r, ci, cj, sid, shift);
#endif
}
/**
* @brief Compute the interactions between a cell pair (symmetric)
*
......@@ -2291,12 +2318,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
runner_do_sort(r, cj, (1 << sid), 1);
/* Compute the interactions. */
#if (DOPAIR1 == runner_dopair1_density) && defined(WITH_VECTORIZATION) && \
defined(GADGET2_SPH)
runner_dopair1_density_vec(r, ci, cj);
#else
DOPAIR1(r, ci, cj);
#endif
DOPAIR1_BRANCH(r, ci, cj);
}
if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
......
......@@ -284,7 +284,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
__attribute__((always_inline)) INLINE static void populate_max_d_no_cache(
const struct cell *ci, const struct cell *cj,
const struct entry *restrict sort_i, const struct entry *restrict sort_j,
const float dx_max, const float rshift, float *max_di, float *max_dj,
const float dx_max, const float rshift, const double hi_max, const double hj_max,
const double di_max, const double dj_min, float *max_di, float *max_dj,
int *init_pi, int *init_pj, const struct engine *e) {
struct part *restrict parts_i = ci->parts;
......@@ -293,10 +294,6 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache(
float h, d;
/* Get the distance of the last pi and the first pj on the sorted axis.*/
const float di_max = sort_i[ci->count - 1].d - rshift;
const float dj_min = sort_j[0].d;
int first_pi = 0, last_pj = cj->count - 1;
/* Find the first active particle in ci to interact with any particle in cj.
......@@ -306,13 +303,13 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache(
for (int k = ci->count - 1; k >= 0; k--) {
p = &parts_i[sort_i[k].i];
h = p->h;
d = sort_i[k].d + h * kernel_gamma + dx_max - rshift;
d = sort_i[k].d + dx_max;
max_di[k] = d;
max_di[k] = d + h * kernel_gamma - rshift;
/* If the particle is out of range set the index to
* the last active particle within range. */
if (d < dj_min) {
if (d + hi_max < dj_min) {
first_pi = active_id;
break;
} else {
......@@ -331,13 +328,14 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache(
for (int k = 0; k < cj->count; k++) {
p = &parts_j[sort_j[k].i];
h = p->h;
d = sort_j[k].d - h * kernel_gamma - dx_max - rshift;
d = sort_j[k].d - dx_max;
max_dj[k] = d;
/*TODO: don't think rshift should be taken off here, waiting on Pedro. */
max_dj[k] = d - h * kernel_gamma - rshift;
/* If the particle is out of range set the index to
* the last active particle within range. */
if (d > di_max) {
if (d - hj_max > di_max) {
last_pj = active_id;
break;
} else {
......@@ -613,7 +611,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
* @param cj The second #cell.
*/
void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
struct cell *cj) {
struct cell *cj, const int sid, const double *shift) {
#ifdef WITH_VECTORIZATION
const struct engine *restrict e = r->e;
......@@ -622,22 +620,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
TIMER_TIC;
/* Anything to do here? */
if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
error("Interacting undrifted cells.");
/* Get the sort ID. */
double shift[3] = {0.0, 0.0, 0.0};
const int sid = space_getsid(e->s, &ci, &cj, shift);
/* Have the cells been sorted? */
if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin)
runner_do_sort(r, ci, (1 << sid), 1);
if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
runner_do_sort(r, cj, (1 << sid), 1);
/* Get the cutoff shift. */
double rshift = 0.0;
for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
......@@ -726,8 +708,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
/* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
/* Also find the first pi that interacts with any particle in cj and the last
* pj that interacts with any particle in ci. */
populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, max_di,
max_dj, &first_pi, &last_pj, e);
populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, hi_max,
hj_max, di_max, dj_min, max_di, max_dj, &first_pi, &last_pj, e);
/* Find the maximum index into cj that is required by a particle in ci. */
/* Find the maximum index into ci that is required by a particle in cj. */
......@@ -777,6 +759,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
struct part *restrict pi = &parts_i[sort_i[pid].i];
if (!part_is_active(pi, e)) continue;
/* Set the cache index. */
int ci_cache_idx = pid - first_pi_align;
const float hi = ci_cache->h[ci_cache_idx];
const double di_test = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
if (di_test < dj_min) continue;
/* Determine the exit iteration of the interaction loop. */
dj = sort_j[max_ind_j].d;
while (max_ind_j > 0 && max_di[pid] < dj) {
......@@ -786,12 +775,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
}
int exit_iteration = max_ind_j + 1;
/* Set the cache index. */
int ci_cache_idx = pid - first_pi_align;
const float hi = ci_cache->h[ci_cache_idx];
const float hig2 = hi * hi * kernel_gamma2;
vector pix, piy, piz;
/* Fill particle pi vectors. */
......@@ -910,6 +895,14 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
struct part *restrict pj = &parts_j[sort_j[pjd].i];
if (!part_is_active(pj, e)) continue;
/* Set the cache index. */
int cj_cache_idx = pjd;
/*TODO: rshift term. */
const float hj = cj_cache->h[cj_cache_idx];
const double dj_test = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
if (dj_test > di_max) continue;
/* Determine the exit iteration of the interaction loop. */
di = sort_i[max_ind_i].d;
while (max_ind_i < count_i - 1 && max_dj[pjd] > di) {
......@@ -919,10 +912,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
}
int exit_iteration = max_ind_i;
/* Set the cache index. */
int cj_cache_idx = pjd;
const float hj = cj_cache->h[cj_cache_idx];
const float hjg2 = hj * hj * kernel_gamma2;
vector pjx, pjy, pjz;
......
......@@ -35,8 +35,7 @@
/* Function prototypes. */
void runner_doself1_density_vec(struct runner *r, struct cell *restrict c);
void runner_doself1_density_vec_2(struct runner *r, struct cell *restrict c);
void runner_dopair1_density_vec(struct runner *r, struct cell *restrict ci,
struct cell *restrict cj);
struct cell *restrict cj, const int sid, const double *shift);
#endif /* SWIFT_RUNNER_VEC_H */
......@@ -34,7 +34,8 @@
#if defined(WITH_VECTORIZATION)
#define DOSELF1 runner_doself1_density_vec
#define DOPAIR1 runner_dopair1_density_vec
//#define DOPAIR1 runner_dopair1_density_vec
#define DOPAIR1 runner_dopair1_branch_density
#define DOSELF1_NAME "runner_doself1_density_vec"
#define DOPAIR1_NAME "runner_dopair1_density_vec"
#endif
......@@ -45,7 +46,7 @@
#endif
#ifndef DOPAIR1
#define DOPAIR1 runner_dopair1_density
#define DOPAIR1 runner_dopair1_branch_density
#define DOPAIR1_NAME "runner_dopair1_density"
#endif
......@@ -312,9 +313,7 @@ int check_results(struct part *serial_parts, struct part *vec_parts, int count,
/* Just a forward declaration... */
void runner_doself1_density(struct runner *r, struct cell *ci);
void runner_doself1_density_vec(struct runner *r, struct cell *ci);
void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_dopair1_branch_density(struct runner *r, struct cell *ci, struct cell *cj);
/* And go... */
int main(int argc, char *argv[]) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment