Commit 0d8e65e0 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'fix_vectorisation' into 'master'

Call runner_doself1_density_vec in DOSUB_SELF1 if vectorisation is enabled.

Changed all `WITH_VECTORIZATION` macros to WITH_OLD_VECTORIZATION in `runner_doiact.h` so Pedro's old vectorised functions are not called.

See merge request !308
parents 4d592c18 3305c5b8
......@@ -112,7 +112,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
error("Don't use in actual runs ! Slow code !");
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -167,7 +167,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT(r2, dx, hi, pj->h, pi, pj);
......@@ -197,7 +197,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -213,7 +213,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
error("Don't use in actual runs ! Slow code !");
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -256,7 +256,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT(r2, dx, hi, pj->h, pi, pj);
......@@ -286,7 +286,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
} /* loop over the parts in ci. */
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -315,7 +315,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
error("Don't use in actual runs ! Slow code !");
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -365,7 +365,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -395,7 +395,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -422,7 +422,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
struct engine *e = r->e;
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -497,7 +497,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -562,7 +562,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -593,7 +593,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */
}
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -616,7 +616,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
struct part *restrict parts, int *restrict ind, int count) {
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -656,7 +656,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */
if (r2 > 0.0f && r2 < hig2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -686,7 +686,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -707,7 +707,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
const struct engine *restrict e = r->e;
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16)));
......@@ -791,7 +791,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
......@@ -861,7 +861,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hjg2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
......@@ -891,7 +891,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
} /* loop over the parts in ci. */
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount > 0)
for (int k = 0; k < icount; k++)
......@@ -912,7 +912,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
struct engine *restrict e = r->e;
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount1 = 0;
float r2q1[VEC_SIZE] __attribute__((aligned(16)));
float hiq1[VEC_SIZE] __attribute__((aligned(16)));
......@@ -1035,7 +1035,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
......@@ -1094,7 +1094,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
/* Does pj need to be updated too? */
if (part_is_active(pj, e))
......@@ -1195,7 +1195,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hi, hj, pi, pj);
......@@ -1253,7 +1253,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
/* Hit or miss? */
if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
/* Does pi need to be updated too? */
if (part_is_active(pi, e))
......@@ -1311,7 +1311,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
} /* loop over the parts in ci. */
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount1 > 0)
for (int k = 0; k < icount1; k++)
......@@ -1338,7 +1338,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
const struct engine *e = r->e;
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount1 = 0;
float r2q1[VEC_SIZE] __attribute__((aligned(16)));
float hiq1[VEC_SIZE] __attribute__((aligned(16)));
......@@ -1415,7 +1415,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hj * hj * kernel_gamma2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
......@@ -1479,7 +1479,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hig2 || doj) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
/* Which parts need to be updated? */
if (r2 < hig2 && doj)
......@@ -1562,7 +1562,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) {
} /* loop over all particles. */
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount1 > 0)
for (int k = 0; k < icount1; k++)
......@@ -1587,7 +1587,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
const struct engine *e = r->e;
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
int icount1 = 0;
float r2q1[VEC_SIZE] __attribute__((aligned(16)));
float hiq1[VEC_SIZE] __attribute__((aligned(16)));
......@@ -1664,7 +1664,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hj, hi, pj, pi);
......@@ -1726,7 +1726,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */
if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {
#ifndef WITH_VECTORIZATION
#ifndef WITH_OLD_VECTORIZATION
/* Does pj need to be updated too? */
if (part_is_active(pj, e))
......@@ -1784,7 +1784,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) {
} /* loop over all particles. */
#ifdef WITH_VECTORIZATION
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
if (icount1 > 0)
for (int k = 0; k < icount1; k++)
......@@ -2079,8 +2079,13 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) {
}
/* Otherwise, compute self-interaction. */
else
else {
#if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
runner_doself1_density_vec(r, ci);
#else
DOSELF1(r, ci);
#endif
}
if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment