diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 4f7c3b02bdb7193769142a7808c9fa80eb729b4a..737418dabfb7a96c8c61600e2df09dce87cfc843 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -156,11 +156,11 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[pid]; const float hi = pi->h; - const int pi_active = part_is_active(pi, e); - - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - ci->loc[k] - shift[k]; const float hig2 = hi * hi * kernel_gamma2; + const int pi_active = part_is_active(pi, e); + const double pix[3] = {pi->x[0] - ci->loc[0] - shift[0], + pi->x[1] - ci->loc[1] - shift[1], + pi->x[2] - ci->loc[2] - shift[2]}; /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j; pjd++) { @@ -168,18 +168,14 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; const float hj = pj->h; + const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_active(pj, e); /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - double pjx[3]; - for (int k = 0; k < 3; k++) { - pjx[k] = pj->x[k] - ci->loc[k]; - dx[k] = pix[k] - pjx[k]; - r2 += dx[k] * dx[k]; - } - const float hjg2 = hj * hj * kernel_gamma2; + const double pjx[3] = {pj->x[0] - ci->loc[0], pj->x[1] - ci->loc[1], + pj->x[2] - ci->loc[2]}; + float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ if (r2 < hig2 && pi_active) { @@ -244,11 +240,11 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[pid]; const float hi = pi->h; - const int pi_active = part_is_active(pi, e); - - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - ci->loc[k] - shift[k]; const float hig2 = hi * hi * kernel_gamma2; + const int pi_active = part_is_active(pi, e); + const double pix[3] = {pi->x[0] - ci->loc[0] - shift[0], + pi->x[1] - ci->loc[1] - shift[1], + pi->x[2] - ci->loc[2] - shift[2]}; /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j; pjd++) { @@ -256,18 +252,14 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; const float hj = pj->h; + const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_active(pj, e); /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - double pjx[3]; - for (int k = 0; k < 3; k++) { - pjx[k] = pj->x[k] - ci->loc[k]; - dx[k] = pix[k] - pjx[k]; - r2 += dx[k] * dx[k]; - } - const float hjg2 = hj * hj * kernel_gamma2; + const double pjx[3] = {pj->x[0] - ci->loc[0], pj->x[1] - ci->loc[1], + pj->x[2] - ci->loc[2]}; + float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ if (r2 < hig2 || r2 < hjg2) { @@ -509,7 +501,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct cell *restrict cj) { error("FUCK"); - + struct engine *e = r->e; #ifdef WITH_OLD_VECTORIZATION @@ -807,8 +799,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const struct engine *restrict e = r->e; - //DOPAIR1_NAIVE(r, ci, cj); - //return; +// DOPAIR1_NAIVE(r, ci, cj); +// return; #ifdef WITH_OLD_VECTORIZATION int icount = 0; @@ -1031,198 +1023,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, TIMER_TOC(TIMER_DOPAIR); } -void DOPAIR1_old(struct runner *r, struct cell *ci, struct cell *cj) { - - struct engine *restrict e = r->e; - - DOPAIR1_NAIVE(r, ci, cj); - return; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); - - /* Get the shift ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || - ci->dx_max_sort_old > space_maxreldx * ci->dmin) - error("Interacting unsorted cells."); - if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort_old > space_maxreldx * cj->dmin) - error("Interacting unsorted cells."); - - /* Get the cutoff shift. */ - double rshift = 0.0; - for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; - - /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->sort[sid]; - const struct entry *restrict sort_j = cj->sort[sid]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that the dx_max_sort values in the cell are indeed an upper - bound on particle movement. */ - for (int pid = 0; pid < ci->count; pid++) { - const struct part *p = &ci->parts[sort_i[pid].i]; - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old)) - error( - "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " - "ci->dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, - ci->dx_max_sort_old); - } - for (int pjd = 0; pjd < cj->count; pjd++) { - const struct part *p = &cj->parts[sort_j[pjd].i]; - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > - 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old)) - error( - "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " - "cj->dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, - cj->dx_max_sort_old); - } -#endif /* SWIFT_DEBUG_CHECKS */ - - /* Get some other useful values. */ - const double hi_max = ci->h_max; - const double hj_max = cj->h_max; - const int count_i = ci->count; - const int count_j = cj->count; - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; - const double di_max = sort_i[count_i - 1].d; - const double dj_min = sort_j[0].d; - const double dx_max = (ci->dx_max_sort + cj->dx_max_sort); - - if (cell_is_active(ci, e)) { - - int last_pj = count_j - 1; - - /* Loop over the parts in ci until nothing is within range in cj. - * We start from the centre and move outwards. */ - for (int pid = count_i - 1; pid >= 0; pid--) { - - /* Get a hold of the ith part in ci. */ - struct part *pi = &parts_i[sort_i[pid].i]; - const float hi = pi->h; - - /* Skip inactive particles */ - if (!part_is_active(pi, e)) continue; - - /* Is there anything we need to interact with ? */ - const double di = - sort_i[pid].d + hj_max * kernel_gamma + dx_max - rshift; - if (di < dj_min) continue; - - /* Where do we have to stop when looping over cell j? */ - while (sort_j[last_pj].d > di && last_pj > 0) last_pj--; - - last_pj += 1; - if (last_pj >= count_j) last_pj = count_j - 1; - - /* Get some additional information about pi */ - const float hig2 = hi * hi * kernel_gamma2; - const float pix = pi->x[0] - ci->loc[0] - shift[0]; - const float piy = pi->x[1] - ci->loc[1] - shift[1]; - const float piz = pi->x[2] - ci->loc[2] - shift[2]; - - /* Now loop over the relevant particles in cj */ - for (int pjd = 0; pjd <= last_pj; ++pjd) { - - /* Recover pj */ - struct part *pj = &parts_j[sort_j[pjd].i]; - const float hj = pj->h; - const float pjx = pj->x[0] - ci->loc[0]; - const float pjy = pj->x[1] - ci->loc[1]; - const float pjz = pj->x[2] - ci->loc[2]; - - /* Compute the pairwise distance. */ - float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - - if (r2 < hig2) { - - IACT_NONSYM(r2, dx, hi, hj, pi, pj); - } - } /* Loop over the parts in cj */ - } /* Loop over the parts in ci */ - } /* Cell ci is active */ - - if (cell_is_active(cj, e)) { - - int first_pi = 0; - - /* Loop over the parts in cj until nothing is within range in ci. - * We start from the centre and move outwards. */ - for (int pjd = 0; pjd < count_j; pjd++) { - - /* Get a hold of the jth part in cj. */ - struct part *pj = &parts_j[sort_j[pjd].i]; - const float hj = pj->h; - - /* Skip inactive particles */ - if (!part_is_active(pj, e)) continue; - - /* Is there anything we need to interact with ? */ - const double dj = sort_j[pjd].d - hi_max * kernel_gamma - dx_max; - if (dj > di_max - rshift) continue; - - /* Where do we have to stop when looping over cell i? */ - while (sort_i[first_pi].d - rshift < dj && first_pi < count_i - 1) - first_pi++; - - first_pi -= 1; - if (first_pi < 0) first_pi = 0; - - /* Get some additional information about pi */ - const float hjg2 = hj * hj * kernel_gamma2; - const float pjx = pj->x[0] - ci->loc[0]; - const float pjy = pj->x[1] - ci->loc[1]; - const float pjz = pj->x[2] - ci->loc[2]; - - /* Now loop over the relevant particles in ci */ - for (int pid = count_i - 1; pid >= first_pi; --pid) { - - /* Recover pi */ - struct part *pi = &parts_i[sort_i[pid].i]; - const float hi = pi->h; - const float pix = pi->x[0] - ci->loc[0] - shift[0]; - const float piy = pi->x[1] - ci->loc[1] - shift[1]; - const float piz = pi->x[2] - ci->loc[2] - shift[2]; - - /* Compute the pairwise distance. */ - float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - - if (r2 < hjg2) { - - IACT_NONSYM(r2, dx, hj, hi, pj, pi); - } - } /* Loop over the parts in ci */ - } /* Loop over the parts in cj */ - } /* Cell cj is active */ - - TIMER_TOC(TIMER_DOPAIR); -} - - - /** * @brief Determine which version of DOPAIR1 needs to be called depending on the * orientation of the cells or whether DOPAIR1 needs to be called at all. @@ -1264,8 +1064,6 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { #else DOPAIR1(r, ci, cj, sid, shift); #endif - -// DOPAIR1(r, ci, cj); } /** @@ -1275,502 +1073,12 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { * @param ci The first #cell. * @param cj The second #cell. */ -void DOPAIR2_old(struct runner *r, struct cell *ci, struct cell *cj) { +void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; - DOPAIR2_NAIVE(r, ci, cj); - return; - -#ifdef WITH_OLD_VECTORIZATION - int icount1 = 0; - float r2q1[VEC_SIZE] __attribute__((aligned(16))); - float hiq1[VEC_SIZE] __attribute__((aligned(16))); - float hjq1[VEC_SIZE] __attribute__((aligned(16))); - float dxq1[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE]; - int icount2 = 0; - float r2q2[VEC_SIZE] __attribute__((aligned(16))); - float hiq2[VEC_SIZE] __attribute__((aligned(16))); - float hjq2[VEC_SIZE] __attribute__((aligned(16))); - float dxq2[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE]; -#endif - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); - - /* Get the shift ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || - ci->dx_max_sort_old > space_maxreldx * ci->dmin) - error("Interacting unsorted cells."); - if (!(cj->sorted & (1 << sid)) || - cj->dx_max_sort_old > space_maxreldx * cj->dmin) - error("Interacting unsorted cells."); - - /* Get the cutoff shift. */ - double rshift = 0.0; - for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; - - /* Pick-out the sorted lists. */ - struct entry *restrict sort_i = ci->sort[sid]; - struct entry *restrict sort_j = cj->sort[sid]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that the dx_max_sort values in the cell are indeed an upper - bound on particle movement. */ - for (int pid = 0; pid < ci->count; pid++) { - const struct part *p = &ci->parts[sort_i[pid].i]; - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->dx_max_sort_old)) - error( - "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e " - "ci->dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort, - ci->dx_max_sort_old); - } - for (int pjd = 0; pjd < cj->count; pjd++) { - const struct part *p = &cj->parts[sort_j[pjd].i]; - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > - 1.0e-4 * max(fabsf(d), cj->dx_max_sort_old)) - error( - "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e " - "cj->dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, - cj->dx_max_sort_old); - } -#endif /* SWIFT_DEBUG_CHECKS */ - - /* Get some other useful values. */ - const double hi_max = ci->h_max * kernel_gamma - rshift; - const double hj_max = cj->h_max * kernel_gamma; - const int count_i = ci->count; - const int count_j = cj->count; - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; - const double di_max = sort_i[count_i - 1].d - rshift; - const double dj_min = sort_j[0].d; - const double dx_max = (ci->dx_max_sort + cj->dx_max_sort); - - /* Collect the number of parts left and right below dt. */ - int countdt_i = 0, countdt_j = 0; - struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL; - if (cell_is_all_active(ci, e)) { - sortdt_i = sort_i; - countdt_i = count_i; - } else if (cell_is_active(ci, e)) { - if (posix_memalign((void *)&sortdt_i, VEC_SIZE * sizeof(float), - sizeof(struct entry) * count_i) != 0) - error("Failed to allocate dt sortlists."); - for (int k = 0; k < count_i; k++) - if (part_is_active(&parts_i[sort_i[k].i], e)) { - sortdt_i[countdt_i] = sort_i[k]; - countdt_i += 1; - } - } - if (cell_is_all_active(cj, e)) { - sortdt_j = sort_j; - countdt_j = count_j; - } else if (cell_is_active(cj, e)) { - if (posix_memalign((void *)&sortdt_j, VEC_SIZE * sizeof(float), - sizeof(struct entry) * count_j) != 0) - error("Failed to allocate dt sortlists."); - for (int k = 0; k < count_j; k++) - if (part_is_active(&parts_j[sort_j[k].i], e)) { - sortdt_j[countdt_j] = sort_j[k]; - countdt_j += 1; - } - } - - /* Loop over the parts in ci. */ - for (int pid = count_i - 1; - pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { - - /* Get a hold of the ith part in ci. */ - struct part *restrict pi = &parts_i[sort_i[pid].i]; - const float hi = pi->h; - const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; - if (di < dj_min) continue; - - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; - const float hig2 = hi * hi * kernel_gamma2; - - /* Look at valid dt parts only? */ - if (!part_is_active(pi, e)) { - - /* Loop over the parts in cj within dt. */ - for (int pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sortdt_j[pjd].i]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pj->x[k] - pix[k]; - r2 += dx[k] * dx[k]; - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - -#ifndef WITH_OLD_VECTORIZATION - - if (pj->id == 142801) message("PAIR"); - - IACT_NONSYM(r2, dx, hj, hi, pj, pi); - -#else - - /* Add this interaction to the queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hj; - hjq1[icount1] = hi; - piq1[icount1] = pj; - pjq1[icount1] = pi; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - -#endif - } - - } /* loop over the parts in cj. */ - - } - - /* Otherwise, look at all parts. */ - else { - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - -#ifndef WITH_OLD_VECTORIZATION - - /* Does pj need to be updated too? */ - if (part_is_active(pj, e)) { - if (pi->id == 142801) message("PAIR"); - - // IACT(r2, dx, hi, hj, pi, pj); - IACT_NONSYM(r2, dx, hi, hj, pi, pj); - dx[0] = -dx[0]; - dx[1] = -dx[1]; - dx[2] = -dx[2]; - - if (pj->id == 142801) message("PAIR"); - - IACT_NONSYM(r2, dx, hj, hi, pj, pi); - } else { - if (pi->id == 142801) message("PAIR"); - - IACT_NONSYM(r2, dx, hi, hj, pi, pj); - } - -#else - - /* Does pj need to be updated too? */ - if (part_is_active(pj, e)) { - - /* Add this interaction to the symmetric queue. */ - r2q2[icount2] = r2; - dxq2[3 * icount2 + 0] = dx[0]; - dxq2[3 * icount2 + 1] = dx[1]; - dxq2[3 * icount2 + 2] = dx[2]; - hiq2[icount2] = hi; - hjq2[icount2] = hj; - piq2[icount2] = pi; - pjq2[icount2] = pj; - icount2 += 1; - - /* Flush? */ - if (icount2 == VEC_SIZE) { - IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2); - icount2 = 0; - } - - } else { - - /* Add this interaction to the non-symmetric queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hi; - hjq1[icount1] = hj; - piq1[icount1] = pi; - pjq1[icount1] = pj; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - } - -#endif - } - - } /* loop over the parts in cj. */ - } - - } /* loop over the parts in ci. */ - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; - pjd++) { - - /* Get a hold of the jth part in cj. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - const float hj = pj->h; - const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; - if (dj - rshift > di_max) continue; - - double pjx[3]; - for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; - const float hjg2 = hj * hj * kernel_gamma2; - - /* Is this particle outside the dt? */ - if (!part_is_active(pj, e)) { - - /* Loop over the parts in ci. */ - for (int pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pi = &parts_i[sortdt_i[pid].i]; - const float hi = pi->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pi->x[k] - pjx[k]; - r2 += dx[k] * dx[k]; - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { - -#ifndef WITH_OLD_VECTORIZATION - - if (pi->id == 142801) message("PAIR"); - - IACT_NONSYM(r2, dx, hi, hj, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hi; - hjq1[icount1] = hj; - piq1[icount1] = pi; - pjq1[icount1] = pj; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - -#endif - } - - } /* loop over the parts in cj. */ - } - - /* Otherwise, interact with all particles in cj. */ - else { - - /* Loop over the parts in ci. */ - for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pi = &parts_i[sort_i[pid].i]; - const float hi = pi->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pjx[k] - pi->x[k]; - r2 += dx[k] * dx[k]; - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { - -#ifndef WITH_OLD_VECTORIZATION - - /* Does pi need to be updated too? */ - if (part_is_active(pi, e)) { - - if (pj->id == 142801) message("PAIR"); - - // IACT(r2, dx, hj, hi, pj, pi); - IACT_NONSYM(r2, dx, hj, hi, pj, pi); - dx[0] = -dx[0]; - dx[1] = -dx[1]; - dx[2] = -dx[2]; - - if (pi->id == 142801) message("PAIR"); - - IACT_NONSYM(r2, dx, hi, hj, pi, pj); - } else { - if (pj->id == 142801) message("PAIR"); - - IACT_NONSYM(r2, dx, hj, hi, pj, pi); - } -#else - - /* Does pi need to be updated too? */ - if (part_is_active(pi, e)) { - - /* Add this interaction to the symmetric queue. */ - r2q2[icount2] = r2; - dxq2[3 * icount2 + 0] = dx[0]; - dxq2[3 * icount2 + 1] = dx[1]; - dxq2[3 * icount2 + 2] = dx[2]; - hiq2[icount2] = hj; - hjq2[icount2] = hi; - piq2[icount2] = pj; - pjq2[icount2] = pi; - icount2 += 1; - - /* Flush? */ - if (icount2 == VEC_SIZE) { - IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2); - icount2 = 0; - } - - } else { - - /* Add this interaction to the non-symmetric queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hj; - hjq1[icount1] = hi; - piq1[icount1] = pj; - pjq1[icount1] = pi; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - } - -#endif - } - - } /* loop over the parts in cj. */ - } - - } /* loop over the parts in ci. */ - -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount1 > 0) - for (int k = 0; k < icount1; k++) - IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]); - if (icount2 > 0) - for (int k = 0; k < icount2; k++) - IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); -#endif - - /* Clean-up if necessary */ - if (cell_is_active(ci, e) && !cell_is_all_active(ci, e)) free(sortdt_i); - if (cell_is_active(cj, e) && !cell_is_all_active(cj, e)) free(sortdt_j); - - TIMER_TOC(TIMER_DOPAIR); -} - -void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { - - struct engine *restrict e = r->e; - - //DOPAIR2_NAIVE(r, ci, cj); - //return; + // DOPAIR2_NAIVE(r, ci, cj); + // return; TIMER_TIC;