diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 48a8c633e8cddf82a3989c81528f885d40028251..d5120194c47862257e9a44ca4897f1e28c7bf2b2 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -897,8 +897,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->sort[sid]; - const struct entry *restrict sort_j = cj->sort[sid]; + struct entry *restrict sort_i = ci->sort[sid]; + struct entry *restrict sort_j = cj->sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Check that the dx_max_sort values in the cell are indeed an upper @@ -948,54 +948,121 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { const double di_max = sort_i[count_i - 1].d; const double dj_min = sort_j[0].d; - /* Upper bound on maximal distance in the other cell */ - const double max_i = max(hi_max, hj_max) * kernel_gamma + dx_max - rshift; - const double max_j = max(hi_max, hj_max) * kernel_gamma + dx_max; + /* Shifts to apply to the particles to be in a good frame */ + const double shift_i[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1], + cj->loc[2] + shift[2]}; + const double shift_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]}; + + int count_active_i = 0, count_active_j = 0; + struct entry *restrict sort_active_i = NULL, *restrict sort_active_j = NULL; + + if (cell_is_all_active(ci, e)) { + /* If everybody is active don't bother copying */ + sort_active_i = sort_i; + count_active_i = count_i; + } else if (cell_is_active(ci, e)) { + if (posix_memalign((void *)&sort_active_i, SWIFT_CACHE_ALIGNMENT, + sizeof(struct entry) * count_i) != 0) + error("Failed to allocate active sortlists."); + + /* Collect the active particles in ci */ + for (int k = 0; k < count_i; k++) { + if (part_is_active(&parts_i[sort_i[k].i], e)) { + sort_active_i[count_active_i] = sort_i[k]; + count_active_i++; + } + } + } - if (cell_is_active(ci, e)) { + if (cell_is_all_active(cj, e)) { + /* If everybody is active don't bother copying */ + sort_active_j = sort_j; + count_active_j = count_j; + } else if (cell_is_active(cj, e)) { + if (posix_memalign((void *)&sort_active_j, SWIFT_CACHE_ALIGNMENT, + sizeof(struct entry) * count_j) != 0) + error("Failed to allocate active sortlists."); + + /* Collect the active particles in cj */ + for (int k = 0; k < count_j; k++) { + if (part_is_active(&parts_j[sort_j[k].i], e)) { + sort_active_j[count_active_j] = sort_j[k]; + count_active_j++; + } + } + } - /* Loop over the parts in ci until nothing is within range in cj. - * We start from the centre and move outwards. */ - for (int pid = count_i - 1; pid >= 0; pid--) { + /* Loop over *all* the parts in ci starting from the centre until + we are out of range of anything in cj (using the maximal hi). */ + for (int pid = count_i - 1; + pid >= 0 && + sort_i[pid].d + hi_max * kernel_gamma + dx_max - rshift > dj_min; + pid--) { - /* Did we go beyond the upper bound ? */ - if (sort_i[pid].d + max_i < dj_min) break; + /* Get a hold of the ith part in ci. */ + struct part *pi = &parts_i[sort_i[pid].i]; + const float hi = pi->h; - /* Get a hold of the ith part in ci. */ - struct part *pi = &parts_i[sort_i[pid].i]; - const float hi = pi->h; + /* Is there anything we need to interact with (for this specific hi) ? */ + const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; + if (di < dj_min) continue; - /* Skip inactive particles */ - if (!part_is_active(pi, e)) continue; + /* Get some additional information about pi */ + const float hig2 = hi * hi * kernel_gamma2; + const float pix = pi->x[0] - shift_i[0]; + const float piy = pi->x[1] - shift_i[1]; + const float piz = pi->x[2] - shift_i[2]; - /* Is there anything we need to interact with ? */ - const double di = - sort_i[pid].d + max(hi, hj_max) * kernel_gamma + dx_max - rshift; - if (di < dj_min) continue; + /* Do we need to only check active parts in cj + (i.e. pi does not need updating) ? */ + if (!part_is_active(pi, e)) { - /* Get some additional information about pi */ - const float hig2 = hi * hi * kernel_gamma2; - const float pix = pi->x[0] - (cj->loc[0] + shift[0]); - const float piy = pi->x[1] - (cj->loc[1] + shift[1]); - const float piz = pi->x[2] - (cj->loc[2] + shift[2]); + /* Loop over the *active* parts in cj within range of pi */ + for (int pjd = 0; pjd < count_active_j && sort_active_j[pjd].d < di; + pjd++) { - /* Now loop over the relevant particles in cj */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; ++pjd) { + /* Recover pj */ + struct part *pj = &parts_j[sort_active_j[pjd].i]; + const float hj = pj->h; + + /* Get the position of pj in the right frame */ + const float pjx = pj->x[0] - shift_j[0]; + const float pjy = pj->x[1] - shift_j[1]; + const float pjz = pj->x[2] - shift_j[2]; + + /* Compute the pairwise distance. */ + float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? + (note that we will do the other condition in the reverse loop) */ + if (r2 < hig2) { + IACT_NONSYM(r2, dx, hj, hi, pj, pi); + } + } /* loop over the active parts in cj. */ + } + + else { /* pi is active, we may need to update pi and pj */ + + /* Loop over *all* the parts in cj in range of pi. */ + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { /* Recover pj */ struct part *pj = &parts_j[sort_j[pjd].i]; const float hj = pj->h; - /* Early abort for this pair now that we have the real hj? */ - if (sort_i[pid].d + max(hi, hj) * kernel_gamma + dx_max - rshift < - dj_min) - continue; - /* Get the position of pj in the right frame */ - const float hjg2 = hj * hj * kernel_gamma2; - const float pjx = pj->x[0] - cj->loc[0]; - const float pjy = pj->x[1] - cj->loc[1]; - const float pjz = pj->x[2] - cj->loc[2]; + const float pjx = pj->x[0] - shift_j[0]; + const float pjy = pj->x[1] - shift_j[1]; + const float pjz = pj->x[2] - shift_j[2]; /* Compute the pairwise distance. */ float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; @@ -1008,60 +1075,94 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif + /* Hit or miss? + (note that we will do the other condition in the reverse loop) */ + if (r2 < hig2) { - /* Are these particles actually neighbours? */ - if (r2 < hig2 || r2 < hjg2) { - - IACT_NONSYM(r2, dx, hi, hj, pi, pj); + /* Does pj need to be updated too? */ + if (part_is_active(pj, e)) + IACT(r2, dx, hi, hj, pi, pj); + else + IACT_NONSYM(r2, dx, hi, hj, pi, pj); } - } /* Loop over the parts in cj */ - } /* Loop over the parts in ci */ - } /* Cell ci is active */ - - if (cell_is_active(cj, e)) { + } /* loop over the parts in cj. */ + } /* Is pi active? */ + } /* Loop over all ci */ + + /* Loop over *all* the parts in cj starting from the centre until + we are out of range of anything in ci (using the maximal hj). */ + for (int pjd = 0; + pjd < count_j && + sort_j[pjd].d - hj_max * kernel_gamma + dx_max < di_max - rshift; + pjd++) { + + /* Get a hold of the jth part in cj. */ + struct part *pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; + + /* Is there anything we need to interact with (for this specific hj) ? */ + const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max; + if (dj > di_max - rshift) continue; + + /* Get some additional information about pj */ + const float hjg2 = hj * hj * kernel_gamma2; + const float pjx = pj->x[0] - shift_j[0]; + const float pjy = pj->x[1] - shift_j[1]; + const float pjz = pj->x[2] - shift_j[2]; + + /* Do we need to only check active parts in ci + (i.e. pj does not need updating) ? */ + if (!part_is_active(pj, e)) { + + /* Loop over the *active* parts in ci. */ + for (int pid = count_active_i - 1; + pid >= 0 && sort_active_i[pid].d - rshift > dj; pid--) { - /* Loop over the parts in cj until nothing is within range in ci. - * We start from the centre and move outwards. */ - for (int pjd = 0; pjd < count_j; pjd++) { + /* Recover pi */ + struct part *pi = &parts_i[sort_active_i[pid].i]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; - /* Did we go beyond the upper bound ? */ - if (sort_j[pjd].d - max_j > di_max - rshift) break; + /* Get the position of pi in the right frame */ + const float pix = pi->x[0] - shift_i[0]; + const float piy = pi->x[1] - shift_i[1]; + const float piz = pi->x[2] - shift_i[2]; - /* Get a hold of the jth part in cj. */ - struct part *pj = &parts_j[sort_j[pjd].i]; - const float hj = pj->h; + /* Compute the pairwise distance. */ + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Skip inactive particles */ - if (!part_is_active(pj, e)) continue; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif - /* Is there anything we need to interact with ? */ - const double dj = sort_j[pjd].d - max(hj, hi_max) * kernel_gamma - dx_max; - if (dj > di_max - rshift) continue; + /* Hit or miss? + (note that we must avoid the r2 < hig2 cases we already processed) */ + if (r2 < hjg2 && r2 > hig2) { + IACT_NONSYM(r2, dx, hi, hj, pi, pj); + } + } /* loop over the active parts in ci. */ + } - /* Get some additional information about pj */ - const float hjg2 = hj * hj * kernel_gamma2; - const float pjx = pj->x[0] - cj->loc[0]; - const float pjy = pj->x[1] - cj->loc[1]; - const float pjz = pj->x[2] - cj->loc[2]; + else { /* pj is active, we may need to update pj and pi */ - /* Now loop over the relevant particles in ci */ + /* Loop over *all* the parts in ci. */ for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d - rshift > dj; - --pid) { + pid--) { /* Recover pi */ struct part *pi = &parts_i[sort_i[pid].i]; const float hi = pi->h; - - /* Early abort for this pair now that we have the real hi? */ - if (sort_j[pjd].d - max(hj, hi) * kernel_gamma - dx_max > - di_max - rshift) - continue; + const float hig2 = hi * hi * kernel_gamma2; /* Get the position of pi in the right frame */ - const float hig2 = hi * hi * kernel_gamma2; - const float pix = pi->x[0] - (cj->loc[0] + shift[0]); - const float piy = pi->x[1] - (cj->loc[1] + shift[1]); - const float piz = pi->x[2] - (cj->loc[2] + shift[2]); + const float pix = pi->x[0] - shift_i[0]; + const float piy = pi->x[1] - shift_i[1]; + const float piz = pi->x[2] - shift_i[2]; /* Compute the pairwise distance. */ float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; @@ -1075,14 +1176,23 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { error("Particle pj not drifted to current time"); #endif - /* Are these particles actually neighbours? */ - if (r2 < hjg2 || r2 < hig2) { + /* Hit or miss? + (note that we must avoid the r2 < hig2 cases we already processed) */ + if (r2 < hjg2 && r2 > hig2) { - IACT_NONSYM(r2, dx, hj, hi, pj, pi); + /* Does pi need to be updated too? */ + if (part_is_active(pi, e)) + IACT(r2, dx, hj, hi, pj, pi); + else + IACT_NONSYM(r2, dx, hj, hi, pj, pi); } - } /* Loop over the parts in ci */ - } /* Loop over the parts in cj */ - } /* Cell cj is active */ + } /* loop over the parts in ci. */ + } /* Is pj active? */ + } /* Loop over all cj */ + + /* Clean-up if necessary */ + if (cell_is_active(ci, e) && !cell_is_all_active(ci, e)) free(sort_active_i); + if (cell_is_active(cj, e) && !cell_is_all_active(cj, e)) free(sort_active_j); TIMER_TOC(TIMER_DOPAIR); }