Commit d9ebdcfe authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

DOPAIR1() should do its business in the frame of ci

parent 8087179a
......@@ -500,8 +500,6 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
struct part *restrict parts_i, int *restrict ind, int count,
struct cell *restrict cj) {
error("FUCK");
struct engine *e = r->e;
#ifdef WITH_OLD_VECTORIZATION
......@@ -799,17 +797,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
const struct engine *restrict e = r->e;
// DOPAIR1_NAIVE(r, ci, cj);
// return;
#ifdef WITH_OLD_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
float hjq[VEC_SIZE] __attribute__((aligned(16)));
float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
// DOPAIR1_NAIVE(r, ci, cj);
// return;
TIMER_TIC;
......@@ -873,28 +862,34 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
/* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[sort_i[pid].i];
if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
/* Skip inactive particles */
if (!part_is_active(pi, e)) continue;
/* Is there anything we need to interact with ? */
const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
if (di < dj_min) continue;
double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
/* Get some additional information about pi */
const float hig2 = hi * hi * kernel_gamma2;
const double pix = pi->x[0] - ci->loc[0] - shift[0];
const double piy = pi->x[1] - ci->loc[1] - shift[1];
const double piz = pi->x[2] - ci->loc[2] - shift[2];
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
/* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[sort_j[pjd].i];
/* Recover pj */
struct part *pj = &parts_j[sort_j[pjd].i];
const float hj = pj->h;
const float pjx = pj->x[0] - ci->loc[0];
const float pjy = pj->x[1] - ci->loc[1];
const float pjz = pj->x[2] - ci->loc[2];
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k] * dx[k];
}
float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
......@@ -907,37 +902,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
#else
/* Add this interaction to the queue. */
r2q[icount] = r2;
dxq[3 * icount + 0] = dx[0];
dxq[3 * icount + 1] = dx[1];
dxq[3 * icount + 2] = dx[2];
hiq[icount] = hi;
hjq[icount] = pj->h;
piq[icount] = pi;
pjq[icount] = pj;
icount += 1;
/* Flush? */
if (icount == VEC_SIZE) {
IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
icount = 0;
}
#endif
IACT_NONSYM(r2, dx, hi, hj, pi, pj);
}
} /* loop over the parts in cj. */
} /* loop over the parts in ci. */
} /* Cell ci is active */
} /* loop over the parts in ci. */
} /* Cell ci is active */
if (cell_is_active(cj, e)) {
......@@ -946,29 +915,35 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
pjd++) {
/* Get a hold of the jth part in cj. */
struct part *restrict pj = &parts_j[sort_j[pjd].i];
if (!part_is_active(pj, e)) continue;
struct part *pj = &parts_j[sort_j[pjd].i];
const float hj = pj->h;
/* Skip inactive particles */
if (!part_is_active(pj, e)) continue;
/* Is there anything we need to interact with ? */
const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift;
if (dj - rshift > di_max) continue;
double pjx[3];
for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
/* Get some additional information about pj */
const float hjg2 = hj * hj * kernel_gamma2;
const float pjx = pj->x[0] - ci->loc[0];
const float pjy = pj->x[1] - ci->loc[1];
const float pjz = pj->x[2] - ci->loc[2];
/* Loop over the parts in ci. */
for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
/* Get a pointer to the jth particle. */
struct part *restrict pi = &parts_i[sort_i[pid].i];
/* Recover pi */
struct part *pi = &parts_i[sort_i[pid].i];
const float hi = pi->h;
const float pix = pi->x[0] - ci->loc[0] - shift[0];
const float piy = pi->x[1] - ci->loc[1] - shift[1];
const float piz = pi->x[2] - ci->loc[2] - shift[2];
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pjx[k] - pi->x[k];
r2 += dx[k] * dx[k];
}
float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
......@@ -981,44 +956,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
/* Hit or miss? */
if (r2 < hjg2) {
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
#else
/* Add this interaction to the queue. */
r2q[icount] = r2;
dxq[3 * icount + 0] = dx[0];
dxq[3 * icount + 1] = dx[1];
dxq[3 * icount + 2] = dx[2];
hiq[icount] = hj;
hjq[icount] = pi->h;
piq[icount] = pj;
pjq[icount] = pi;
icount += 1;
/* Flush? */
if (icount == VEC_SIZE) {
IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
icount = 0;
}
#endif
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
}
} /* loop over the parts in ci. */
} /* loop over the parts in cj. */
} /* Cell cj is active */
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif
} /* loop over the parts in cj. */
} /* Cell cj is active */
TIMER_TOC(TIMER_DOPAIR);
}
......@@ -1233,7 +1175,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
first_pi -= 1;
if (first_pi < 0) first_pi = 0;
/* Get some additional information about pi */
/* Get some additional information about pj */
const float hjg2 = hj * hj * kernel_gamma2;
const float pjx = pj->x[0] - ci->loc[0];
const float pjy = pj->x[1] - ci->loc[1];
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment