Skip to content
Snippets Groups Projects
Commit bb14c7b4 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Symmetric version of the DOPAIR2 function.

parent 71ba3cd1
No related branches found
No related tags found
1 merge request!426Symmetric version of DOPAIR2
......@@ -897,8 +897,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
/* Pick-out the sorted lists. */
const struct entry *restrict sort_i = ci->sort[sid];
const struct entry *restrict sort_j = cj->sort[sid];
struct entry *restrict sort_i = ci->sort[sid];
struct entry *restrict sort_j = cj->sort[sid];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that the dx_max_sort values in the cell are indeed an upper
......@@ -948,54 +948,121 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
const double di_max = sort_i[count_i - 1].d;
const double dj_min = sort_j[0].d;
/* Upper bound on maximal distance in the other cell */
const double max_i = max(hi_max, hj_max) * kernel_gamma + dx_max - rshift;
const double max_j = max(hi_max, hj_max) * kernel_gamma + dx_max;
/* Shifts to apply to the particles to be in a good frame */
const double shift_i[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
cj->loc[2] + shift[2]};
const double shift_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
int count_active_i = 0, count_active_j = 0;
struct entry *restrict sort_active_i = NULL, *restrict sort_active_j = NULL;
if (cell_is_all_active(ci, e)) {
/* If everybody is active don't bother copying */
sort_active_i = sort_i;
count_active_i = count_i;
} else if (cell_is_active(ci, e)) {
if (posix_memalign((void *)&sort_active_i, SWIFT_CACHE_ALIGNMENT,
sizeof(struct entry) * count_i) != 0)
error("Failed to allocate active sortlists.");
/* Collect the active particles in ci */
for (int k = 0; k < count_i; k++) {
if (part_is_active(&parts_i[sort_i[k].i], e)) {
sort_active_i[count_active_i] = sort_i[k];
count_active_i++;
}
}
}
if (cell_is_active(ci, e)) {
if (cell_is_all_active(cj, e)) {
/* If everybody is active don't bother copying */
sort_active_j = sort_j;
count_active_j = count_j;
} else if (cell_is_active(cj, e)) {
if (posix_memalign((void *)&sort_active_j, SWIFT_CACHE_ALIGNMENT,
sizeof(struct entry) * count_j) != 0)
error("Failed to allocate active sortlists.");
/* Collect the active particles in cj */
for (int k = 0; k < count_j; k++) {
if (part_is_active(&parts_j[sort_j[k].i], e)) {
sort_active_j[count_active_j] = sort_j[k];
count_active_j++;
}
}
}
/* Loop over the parts in ci until nothing is within range in cj.
* We start from the centre and move outwards. */
for (int pid = count_i - 1; pid >= 0; pid--) {
/* Loop over *all* the parts in ci starting from the centre until
we are out of range of anything in cj (using the maximal hi). */
for (int pid = count_i - 1;
pid >= 0 &&
sort_i[pid].d + hi_max * kernel_gamma + dx_max - rshift > dj_min;
pid--) {
/* Did we go beyond the upper bound ? */
if (sort_i[pid].d + max_i < dj_min) break;
/* Get a hold of the ith part in ci. */
struct part *pi = &parts_i[sort_i[pid].i];
const float hi = pi->h;
/* Get a hold of the ith part in ci. */
struct part *pi = &parts_i[sort_i[pid].i];
const float hi = pi->h;
/* Is there anything we need to interact with (for this specific hi) ? */
const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
if (di < dj_min) continue;
/* Skip inactive particles */
if (!part_is_active(pi, e)) continue;
/* Get some additional information about pi */
const float hig2 = hi * hi * kernel_gamma2;
const float pix = pi->x[0] - shift_i[0];
const float piy = pi->x[1] - shift_i[1];
const float piz = pi->x[2] - shift_i[2];
/* Is there anything we need to interact with ? */
const double di =
sort_i[pid].d + max(hi, hj_max) * kernel_gamma + dx_max - rshift;
if (di < dj_min) continue;
/* Do we need to only check active parts in cj
(i.e. pi does not need updating) ? */
if (!part_is_active(pi, e)) {
/* Get some additional information about pi */
const float hig2 = hi * hi * kernel_gamma2;
const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
/* Loop over the *active* parts in cj within range of pi */
for (int pjd = 0; pjd < count_active_j && sort_active_j[pjd].d < di;
pjd++) {
/* Now loop over the relevant particles in cj */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; ++pjd) {
/* Recover pj */
struct part *pj = &parts_j[sort_active_j[pjd].i];
const float hj = pj->h;
/* Get the position of pj in the right frame */
const float pjx = pj->x[0] - shift_j[0];
const float pjy = pj->x[1] - shift_j[1];
const float pjz = pj->x[2] - shift_j[2];
/* Compute the pairwise distance. */
float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
#endif
/* Hit or miss?
(note that we will do the other condition in the reverse loop) */
if (r2 < hig2) {
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
}
} /* loop over the active parts in cj. */
}
else { /* pi is active, we may need to update pi and pj */
/* Loop over *all* the parts in cj in range of pi. */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
/* Recover pj */
struct part *pj = &parts_j[sort_j[pjd].i];
const float hj = pj->h;
/* Early abort for this pair now that we have the real hj? */
if (sort_i[pid].d + max(hi, hj) * kernel_gamma + dx_max - rshift <
dj_min)
continue;
/* Get the position of pj in the right frame */
const float hjg2 = hj * hj * kernel_gamma2;
const float pjx = pj->x[0] - cj->loc[0];
const float pjy = pj->x[1] - cj->loc[1];
const float pjz = pj->x[2] - cj->loc[2];
const float pjx = pj->x[0] - shift_j[0];
const float pjy = pj->x[1] - shift_j[1];
const float pjz = pj->x[2] - shift_j[2];
/* Compute the pairwise distance. */
float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
......@@ -1008,60 +1075,94 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
#endif
/* Hit or miss?
(note that we will do the other condition in the reverse loop) */
if (r2 < hig2) {
/* Are these particles actually neighbours? */
if (r2 < hig2 || r2 < hjg2) {
IACT_NONSYM(r2, dx, hi, hj, pi, pj);
/* Does pj need to be updated too? */
if (part_is_active(pj, e))
IACT(r2, dx, hi, hj, pi, pj);
else
IACT_NONSYM(r2, dx, hi, hj, pi, pj);
}
} /* Loop over the parts in cj */
} /* Loop over the parts in ci */
} /* Cell ci is active */
if (cell_is_active(cj, e)) {
} /* loop over the parts in cj. */
} /* Is pi active? */
} /* Loop over all ci */
/* Loop over *all* the parts in cj starting from the centre until
we are out of range of anything in ci (using the maximal hj). */
for (int pjd = 0;
pjd < count_j &&
sort_j[pjd].d - hj_max * kernel_gamma + dx_max < di_max - rshift;
pjd++) {
/* Get a hold of the jth part in cj. */
struct part *pj = &parts_j[sort_j[pjd].i];
const float hj = pj->h;
/* Is there anything we need to interact with (for this specific hj) ? */
const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max;
if (dj > di_max - rshift) continue;
/* Get some additional information about pj */
const float hjg2 = hj * hj * kernel_gamma2;
const float pjx = pj->x[0] - shift_j[0];
const float pjy = pj->x[1] - shift_j[1];
const float pjz = pj->x[2] - shift_j[2];
/* Do we need to only check active parts in ci
(i.e. pj does not need updating) ? */
if (!part_is_active(pj, e)) {
/* Loop over the *active* parts in ci. */
for (int pid = count_active_i - 1;
pid >= 0 && sort_active_i[pid].d - rshift > dj; pid--) {
/* Loop over the parts in cj until nothing is within range in ci.
* We start from the centre and move outwards. */
for (int pjd = 0; pjd < count_j; pjd++) {
/* Recover pi */
struct part *pi = &parts_i[sort_active_i[pid].i];
const float hi = pi->h;
const float hig2 = hi * hi * kernel_gamma2;
/* Did we go beyond the upper bound ? */
if (sort_j[pjd].d - max_j > di_max - rshift) break;
/* Get the position of pi in the right frame */
const float pix = pi->x[0] - shift_i[0];
const float piy = pi->x[1] - shift_i[1];
const float piz = pi->x[2] - shift_i[2];
/* Get a hold of the jth part in cj. */
struct part *pj = &parts_j[sort_j[pjd].i];
const float hj = pj->h;
/* Compute the pairwise distance. */
float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
/* Skip inactive particles */
if (!part_is_active(pj, e)) continue;
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
#endif
/* Is there anything we need to interact with ? */
const double dj = sort_j[pjd].d - max(hj, hi_max) * kernel_gamma - dx_max;
if (dj > di_max - rshift) continue;
/* Hit or miss?
(note that we must avoid the r2 < hig2 cases we already processed) */
if (r2 < hjg2 && r2 > hig2) {
IACT_NONSYM(r2, dx, hi, hj, pi, pj);
}
} /* loop over the active parts in ci. */
}
/* Get some additional information about pj */
const float hjg2 = hj * hj * kernel_gamma2;
const float pjx = pj->x[0] - cj->loc[0];
const float pjy = pj->x[1] - cj->loc[1];
const float pjz = pj->x[2] - cj->loc[2];
else { /* pj is active, we may need to update pj and pi */
/* Now loop over the relevant particles in ci */
/* Loop over *all* the parts in ci. */
for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d - rshift > dj;
--pid) {
pid--) {
/* Recover pi */
struct part *pi = &parts_i[sort_i[pid].i];
const float hi = pi->h;
/* Early abort for this pair now that we have the real hi? */
if (sort_j[pjd].d - max(hj, hi) * kernel_gamma - dx_max >
di_max - rshift)
continue;
const float hig2 = hi * hi * kernel_gamma2;
/* Get the position of pi in the right frame */
const float hig2 = hi * hi * kernel_gamma2;
const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
const float pix = pi->x[0] - shift_i[0];
const float piy = pi->x[1] - shift_i[1];
const float piz = pi->x[2] - shift_i[2];
/* Compute the pairwise distance. */
float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
......@@ -1075,14 +1176,23 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
error("Particle pj not drifted to current time");
#endif
/* Are these particles actually neighbours? */
if (r2 < hjg2 || r2 < hig2) {
/* Hit or miss?
(note that we must avoid the r2 < hig2 cases we already processed) */
if (r2 < hjg2 && r2 > hig2) {
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
/* Does pi need to be updated too? */
if (part_is_active(pi, e))
IACT(r2, dx, hj, hi, pj, pi);
else
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
}
} /* Loop over the parts in ci */
} /* Loop over the parts in cj */
} /* Cell cj is active */
} /* loop over the parts in ci. */
} /* Is pj active? */
} /* Loop over all cj */
/* Clean-up if necessary */
if (cell_is_active(ci, e) && !cell_is_all_active(ci, e)) free(sort_active_i);
if (cell_is_active(cj, e) && !cell_is_all_active(cj, e)) free(sort_active_j);
TIMER_TOC(TIMER_DOPAIR);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment