Commit a853b793 authored by James Willis's avatar James Willis
Browse files

Re-enable vectorisation, as bug was found in force interaction function and fixed.

parent 8cc3ff5b
......@@ -112,7 +112,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
error("Don't use in actual runs ! Slow code !");
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -167,7 +167,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT(r2, dx, hi, pj->h, pi, pj);
......@@ -197,7 +197,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -213,7 +213,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
error("Don't use in actual runs ! Slow code !");
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -256,7 +256,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT(r2, dx, hi, pj->h, pi, pj);
......@@ -286,7 +286,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
} /* loop over the parts in ci. */
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -315,7 +315,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
error("Don't use in actual runs ! Slow code !");
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -365,7 +365,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -395,7 +395,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -422,7 +422,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
struct engine *e = r->e;
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -497,7 +497,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -562,7 +562,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -593,7 +593,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */
}
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -616,7 +616,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
struct part *restrict parts, int *restrict ind, int count) {
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -656,7 +656,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 > 0.0f && r2 < hig2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -686,7 +686,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -707,7 +707,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
const struct engine *restrict e = r->e;
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -785,7 +785,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -847,7 +847,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hjg2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
......@@ -877,7 +877,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
} /* loop over the parts in ci. */
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -898,7 +898,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
struct engine *restrict e = r->e;
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount1 = 0;
float r2q1[VEC_SIZE] __attribute__((aligned(16)));
float hiq1[VEC_SIZE] __attribute__((aligned(16)));
......@@ -1015,7 +1015,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
......@@ -1066,7 +1066,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
/* Does pj need to be updated too? */
if (part_is_active(pj, e))
......@@ -1159,7 +1159,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, hj, pi, pj);
......@@ -1209,7 +1209,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
/* Does pi need to be updated too? */
if (part_is_active(pi, e))
......@@ -1267,7 +1267,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
} /* loop over the parts in ci. */
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount1 > 0)
for (int k = 0; k < icount1; k++)
......@@ -1294,7 +1294,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
const struct engine *e = r->e;
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount1 = 0;
float r2q1[VEC_SIZE] __attribute__((aligned(16)));
float hiq1[VEC_SIZE] __attribute__((aligned(16)));
......@@ -1365,7 +1365,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hj * hj * kernel_gamma2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
......@@ -1421,7 +1421,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hig2 || doj) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
/* Which parts need to be updated? */
if (r2 < hig2 && doj)
......@@ -1504,7 +1504,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
} /* loop over all particles. */
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount1 > 0)
for (int k = 0; k < icount1; k++)
......@@ -1529,7 +1529,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
const struct engine *e = r->e;
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
int icount1 = 0;
float r2q1[VEC_SIZE] __attribute__((aligned(16)));
float hiq1[VEC_SIZE] __attribute__((aligned(16)));
......@@ -1600,7 +1600,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
......@@ -1654,7 +1654,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
#ifndef WITH_BAD_VECTORIZATION
#ifndef WITH_VECTORIZATION
/* Does pj need to be updated too? */
if (part_is_active(pj, e))
......@@ -1712,7 +1712,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
} /* loop over all particles. */
#ifdef WITH_BAD_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if (icount1 > 0)
for (int k = 0; k < icount1; k++)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment