diff --git a/src/runner_doiact.h b/src/runner_doiact.h index e78d87e4b803b441f86fdd4db4fdcb0b9f61e091..b27c8bc2412dc174d1c18e7850c876bcafffee8b 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -112,7 +112,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -167,7 +167,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT(r2, dx, hi, pj->h, pi, pj); @@ -197,7 +197,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -213,7 +213,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -256,7 +256,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT(r2, dx, hi, pj->h, pi, pj); @@ -286,7 +286,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -315,7 +315,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -365,7 +365,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -395,7 +395,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -422,7 +422,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -497,7 +497,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -562,7 +562,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -593,7 +593,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ } -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -616,7 +616,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts, int *restrict ind, int count) { -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -656,7 +656,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 > 0.0f && r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -686,7 +686,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -707,7 +707,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -791,7 +791,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -861,7 +861,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); @@ -891,7 +891,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -912,7 +912,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1035,7 +1035,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1094,7 +1094,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pj need to be updated too? */ if (part_is_active(pj, e)) @@ -1195,7 +1195,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, hj, pi, pj); @@ -1253,7 +1253,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pi need to be updated too? */ if (part_is_active(pi, e)) @@ -1311,7 +1311,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -1338,7 +1338,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1415,7 +1415,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1479,7 +1479,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || doj) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Which parts need to be updated? */ if (r2 < hig2 && doj) @@ -1562,7 +1562,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { } /* loop over all particles. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -1587,7 +1587,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1664,7 +1664,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1726,7 +1726,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pj need to be updated too? */ if (part_is_active(pj, e)) @@ -1784,7 +1784,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { } /* loop over all particles. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -2079,8 +2079,13 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { } /* Otherwise, compute self-interaction. */ - else + else { +#if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + runner_doself1_density_vec(r, ci); +#else DOSELF1(r, ci); +#endif + } if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF); }