diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 308764e755806a124f1cc234dfae253c57e0eda6..a54fb2b602a98413e46b206f3b23d6d4586d90c4 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -112,7 +112,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -167,7 +167,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT(r2, dx, hi, pj->h, pi, pj); @@ -197,7 +197,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -213,7 +213,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -256,7 +256,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT(r2, dx, hi, pj->h, pi, pj); @@ -286,7 +286,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -315,7 +315,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -365,7 +365,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -395,7 +395,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -422,7 +422,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -497,7 +497,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -562,7 +562,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -593,7 +593,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ } -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -616,7 +616,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts, int *restrict ind, int count) { -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -656,7 +656,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 > 0.0f && r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -686,7 +686,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -707,7 +707,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -785,7 +785,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -847,7 +847,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); @@ -877,7 +877,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -898,7 +898,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1015,7 +1015,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1066,7 +1066,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION /* Does pj need to be updated too? */ if (part_is_active(pj, e)) @@ -1159,7 +1159,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hi, hj, pi, pj); @@ -1209,7 +1209,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION /* Does pi need to be updated too? */ if (part_is_active(pi, e)) @@ -1267,7 +1267,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -1294,7 +1294,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1365,7 +1365,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1421,7 +1421,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || doj) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION /* Which parts need to be updated? */ if (r2 < hig2 && doj) @@ -1504,7 +1504,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { } /* loop over all particles. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -1529,7 +1529,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1600,7 +1600,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1654,7 +1654,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_BAD_VECTORIZATION /* Does pj need to be updated too? */ if (part_is_active(pj, e)) @@ -1712,7 +1712,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { } /* loop over all particles. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_BAD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++)