Commit 0d8d60f5 authored by James Willis's avatar James Willis
Browse files

Removed code that is not required in the master.

parent 64ca63cb
......@@ -275,56 +275,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.rot_v[2] += fac * curlvr[2];
}
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_density_jsw(
const float r2, const float hig2, const float dx, const float dy,
const float dz, const float h_inv, const float hj, const float vi_x,
const float vi_y, const float vi_z, const float vj_x, const float vj_y,
const float vj_z, const float mj, float *const restrict rho,
float *const restrict rho_dh, float *const restrict wcount,
float *const restrict wcount_dh, float *const restrict div_v,
float *const restrict curl_vx, float *const restrict curl_vy,
float *const restrict curl_vz) {
if (r2 < hig2) {
float wi, wi_dx;
/* Get r and r inverse. */
const float r = sqrtf(r2);
const float ri = 1.0f / r;
/* Compute kernel function */
const float u = r * h_inv;
kernel_deval(u, &wi, &wi_dx);
const float fac = mj * wi_dx * ri;
/* Compute dv dot r */
const float dv_x = vi_x - vj_x;
const float dv_y = vi_y - vj_y;
const float dv_z = vi_z - vj_z;
const float dvdr = dv_x * dx + dv_y * dy + dv_z * dz;
*div_v -= fac * dvdr;
/* Compute dv cross r */
const float curlvr_x = dv_y * dz - dv_z * dy;
const float curlvr_y = dv_z * dx - dv_x * dz;
const float curlvr_z = dv_x * dy - dv_y * dx;
/* Compute contribution to the density */
*rho += mj * wi;
*rho_dh -= mj * (3.0f * wi + u * wi_dx);
/* Compute contribution to the number of neighbours */
*wcount += wi;
*wcount_dh -= u * wi_dx;
*curl_vx += fac * curlvr_x;
*curl_vy += fac * curlvr_y;
*curl_vz += fac * curlvr_z;
}
}
/**
* @brief Density loop (non-symmetric vectorized version)
*/
......@@ -425,8 +375,13 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
}
#ifdef WITH_VECTORIZATION
/**
* @brief Density interaction computed using 1 vector
* (non-symmetric vectorized version).
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_intrinsic_vec_density(
runner_iact_nonsym_1_vec_density(
vector *r2, vector *dx, vector *dy, vector *dz, vector hi_inv, vector vix,
vector viy, vector viz, float *Vjx, float *Vjy, float *Vjz, float *Mj,
vector *rhoSum, vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum,
......@@ -518,220 +473,6 @@ runner_iact_nonsym_intrinsic_vec_density(
#endif
}
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_intrinsic_vec_2_density(
const struct cache *const cj_cache, const int *const indices, vector *r2,
vector *dx, vector *dy, vector *dz, vector hi_inv, vector vix, vector viy,
vector viz, vector *rhoSum, vector *rho_dhSum, vector *wcountSum,
vector *wcount_dhSum, vector *div_vSum, vector *curlvxSum,
vector *curlvySum, vector *curlvzSum, vector mask, int knlMask) {
// vector r, ri, r2, xi, wi, wi_dx;
vector r, ri, xi, wi, wi_dx;
vector mj;
// vector dx, dy, dz, dvx, dvy, dvz;
vector dvx, dvy, dvz;
vector vjx, vjy, vjz;
vector dvdr;
vector curlvrx, curlvry, curlvrz;
/* Fill the vectors. */
mj.v = vec_set(cj_cache->m[indices[0]], cj_cache->m[indices[1]],
cj_cache->m[indices[2]], cj_cache->m[indices[3]],
cj_cache->m[indices[4]], cj_cache->m[indices[5]],
cj_cache->m[indices[6]], cj_cache->m[indices[7]]);
vjx.v = vec_set(cj_cache->vx[indices[0]], cj_cache->vx[indices[1]],
cj_cache->vx[indices[2]], cj_cache->vx[indices[3]],
cj_cache->vx[indices[4]], cj_cache->vx[indices[5]],
cj_cache->vx[indices[6]], cj_cache->vx[indices[7]]);
vjy.v = vec_set(cj_cache->vy[indices[0]], cj_cache->vy[indices[1]],
cj_cache->vy[indices[2]], cj_cache->vy[indices[3]],
cj_cache->vy[indices[4]], cj_cache->vy[indices[5]],
cj_cache->vy[indices[6]], cj_cache->vy[indices[7]]);
vjz.v = vec_set(cj_cache->vz[indices[0]], cj_cache->vz[indices[1]],
cj_cache->vz[indices[2]], cj_cache->vz[indices[3]],
cj_cache->vz[indices[4]], cj_cache->vz[indices[5]],
cj_cache->vz[indices[6]], cj_cache->vz[indices[7]]);
// dx.v = vec_load(Dx);
// dy.v = vec_load(Dy);
// dz.v = vec_load(Dz);
/* Get the radius and inverse radius. */
// r2.v = vec_load(R2);
ri = vec_reciprocal_sqrt(*r2);
r.v = vec_mul(r2->v, ri.v);
xi.v = vec_mul(r.v, hi_inv.v);
/* Calculate the kernel for two particles. */
kernel_deval_1_vec(&xi, &wi, &wi_dx);
/* Compute dv. */
dvx.v = vec_sub(vix.v, vjx.v);
dvy.v = vec_sub(viy.v, vjy.v);
dvz.v = vec_sub(viz.v, vjz.v);
/* Compute dv dot r */
dvdr.v = vec_fma(dvx.v, dx->v, vec_fma(dvy.v, dy->v, vec_mul(dvz.v, dz->v)));
dvdr.v = vec_mul(dvdr.v, ri.v);
/* Compute dv cross r */
curlvrx.v =
vec_fma(dvy.v, dz->v, vec_mul(vec_set1(-1.0f), vec_mul(dvz.v, dy->v)));
curlvry.v =
vec_fma(dvz.v, dx->v, vec_mul(vec_set1(-1.0f), vec_mul(dvx.v, dz->v)));
curlvrz.v =
vec_fma(dvx.v, dy->v, vec_mul(vec_set1(-1.0f), vec_mul(dvy.v, dx->v)));
curlvrx.v = vec_mul(curlvrx.v, ri.v);
curlvry.v = vec_mul(curlvry.v, ri.v);
curlvrz.v = vec_mul(curlvrz.v, ri.v);
/* Mask updates to intermediate vector sums for particle pi. */
#ifdef HAVE_AVX512_F
rhoSum->v =
_mm512_mask_add_ps(rhoSum->v, knlMask, vec_mul(mj.v, wi.v), rhoSum->v);
rho_dhSum->v =
_mm512_mask_sub_ps(rho_dhSum->v, knlMask, rho_dhSum->v,
vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(xi.v, wi_dx.v))));
wcountSum->v = _mm512_mask_add_ps(wcountSum->v, knlMask, wi.v, wcountSum->v);
wcount_dhSum->v = _mm512_mask_sub_ps(wcount_dhSum->v, knlMask,
wcount_dhSum->v, vec_mul(xi.v, wi_dx.v));
div_vSum->v = _mm512_mask_sub_ps(div_vSum->v, knlMask, div_vSum->v,
vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)));
curlvxSum->v = _mm512_mask_add_ps(curlvxSum->v, knlMask,
vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)),
curlvxSum->v);
curlvySum->v = _mm512_mask_add_ps(curlvySum->v, knlMask,
vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)),
curlvySum->v);
curlvzSum->v = _mm512_mask_add_ps(curlvzSum->v, knlMask,
vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)),
curlvzSum->v);
#else
rhoSum->v += vec_and(vec_mul(mj.v, wi.v), mask.v);
rho_dhSum->v -= vec_and(vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(xi.v, wi_dx.v))),
mask.v);
wcountSum->v += vec_and(wi.v, mask.v);
wcount_dhSum->v -= vec_and(vec_mul(xi.v, wi_dx.v), mask.v);
div_vSum->v -= vec_and(vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)), mask.v);
curlvxSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), mask.v);
curlvySum->v += vec_and(vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)), mask.v);
curlvzSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)), mask.v);
#endif
}
/**
* @brief Density interaction computed using 2 interleaved vectors
* (non-symmetric vectorized version).
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_1_vec_density(
float *R2, float *Dx, float *Dy, float *Dz, vector hi_inv, vector vix,
vector viy, vector viz, float *Vjx, float *Vjy, float *Vjz, float *Mj,
vector *rhoSum, vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum,
vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum,
vector mask, vector mask2, int knlMask, int knlMask2) {
vector r, ri, r2, xi, wi, wi_dx;
vector mj;
vector dx, dy, dz, dvx, dvy, dvz;
vector vjx, vjy, vjz;
vector dvdr;
vector curlvrx, curlvry, curlvrz;
/* Fill the vectors. */
mj.v = vec_load(Mj);
vjx.v = vec_load(Vjx);
vjy.v = vec_load(Vjy);
vjz.v = vec_load(Vjz);
dx.v = vec_load(Dx);
dy.v = vec_load(Dy);
dz.v = vec_load(Dz);
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
ri = vec_reciprocal_sqrt(r2);
r.v = vec_mul(r2.v, ri.v);
xi.v = vec_mul(r.v, hi_inv.v);
/* Calculate the kernel for two particles. */
kernel_deval_1_vec(&xi, &wi, &wi_dx);
/* Compute dv. */
dvx.v = vec_sub(vix.v, vjx.v);
dvy.v = vec_sub(viy.v, vjy.v);
dvz.v = vec_sub(viz.v, vjz.v);
/* Compute dv dot r */
dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_mul(dvz.v, dz.v)));
dvdr.v = vec_mul(dvdr.v, ri.v);
/* Compute dv cross r */
curlvrx.v =
vec_fma(dvy.v, dz.v, vec_mul(vec_set1(-1.0f), vec_mul(dvz.v, dy.v)));
curlvry.v =
vec_fma(dvz.v, dx.v, vec_mul(vec_set1(-1.0f), vec_mul(dvx.v, dz.v)));
curlvrz.v =
vec_fma(dvx.v, dy.v, vec_mul(vec_set1(-1.0f), vec_mul(dvy.v, dx.v)));
curlvrx.v = vec_mul(curlvrx.v, ri.v);
curlvry.v = vec_mul(curlvry.v, ri.v);
curlvrz.v = vec_mul(curlvrz.v, ri.v);
/* Mask updates to intermediate vector sums for particle pi. */
#ifdef HAVE_AVX512_F
rhoSum->v =
_mm512_mask_add_ps(rhoSum->v, knlMask, vec_mul(mj.v, wi.v), rhoSum->v);
rho_dhSum->v =
_mm512_mask_sub_ps(rho_dhSum->v, knlMask, rho_dhSum->v,
vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(xi.v, wi_dx.v))));
wcountSum->v = _mm512_mask_add_ps(wcountSum->v, knlMask, wi.v, wcountSum->v);
wcount_dhSum->v = _mm512_mask_sub_ps(wcount_dhSum->v, knlMask,
wcount_dhSum->v, vec_mul(xi.v, wi_dx.v));
div_vSum->v = _mm512_mask_sub_ps(div_vSum->v, knlMask, div_vSum->v,
vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)));
curlvxSum->v = _mm512_mask_add_ps(curlvxSum->v, knlMask,
vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)),
curlvxSum->v);
curlvySum->v = _mm512_mask_add_ps(curlvySum->v, knlMask,
vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)),
curlvySum->v);
curlvzSum->v = _mm512_mask_add_ps(curlvzSum->v, knlMask,
vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)),
curlvzSum->v);
#else
rhoSum->v += vec_and(vec_mul(mj.v, wi.v), mask.v);
rho_dhSum->v -= vec_and(vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(xi.v, wi_dx.v))),
mask.v);
wcountSum->v += vec_and(wi.v, mask.v);
wcount_dhSum->v -= vec_and(vec_mul(xi.v, wi_dx.v), mask.v);
div_vSum->v -= vec_and(vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)), mask.v);
curlvxSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), mask.v);
curlvySum->v += vec_and(vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)), mask.v);
curlvzSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)), mask.v);
#endif
}
#endif
#ifdef WITH_VECTORIZATION
/**
* @brief Density interaction computed using 2 interleaved vectors
* (non-symmetric vectorized version).
......
This diff is collapsed.
......@@ -39,7 +39,5 @@ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c);
void runner_doself1_density_vec_2(struct runner *r, struct cell *restrict c);
void runner_dopair1_density_vec(struct runner *r, struct cell *restrict ci,
struct cell *restrict cj);
void runner_dopair1_density_vec_2(struct runner *r, struct cell *restrict ci,
struct cell *restrict cj);
#endif /* SWIFT_RUNNER_VEC_H */
......@@ -23,8 +23,6 @@
/* Have I already read this file? */
#ifndef VEC_MACRO
/*TODO: Tidy this file up with comments.*/
#include "../config.h"
#ifdef WITH_VECTORIZATION
......
......@@ -34,7 +34,9 @@
#if defined(WITH_VECTORIZATION)
#define DOSELF1 runner_doself1_density_vec
#define DOPAIR1 runner_dopair1_density_vec
#define DOSELF1_NAME "runner_doself1_density_vec"
#define DOPAIR1_NAME "runner_dopair1_density_vec"
#endif
#ifndef DOSELF1
......@@ -42,41 +44,6 @@
#define DOSELF1_NAME "runner_doself1_density"
#endif
#if defined(WITH_VECTORIZATION) && defined(DOPAIR1_VEC)
#define DOPAIR1 runner_dopair1_density_vec
#define DOPAIR1_NAME "runner_dopair1_density_vec"
#endif
#if defined(WITH_VECTORIZATION) && defined(DOPAIR1_VEC_1)
#define DOPAIR1 runner_dopair1_density_vec_1
#define DOPAIR1_NAME "runner_dopair1_density_vec_1"
#endif
#if defined(WITH_VECTORIZATION) && defined(DOPAIR1_VEC_2)
#define DOPAIR1 runner_dopair1_density_vec_2
#define DOPAIR1_NAME "runner_dopair1_density_vec_2"
#endif
#if defined(WITH_VECTORIZATION) && defined(DOPAIR1_VEC_3)
#define DOPAIR1 runner_dopair1_density_vec_3
#define DOPAIR1_NAME "runner_dopair1_density_vec_3"
#endif
#if defined(WITH_VECTORIZATION) && defined(DOPAIR1_VEC_4)
#define DOPAIR1 runner_dopair1_density_vec_4
#define DOPAIR1_NAME "runner_dopair1_density_vec_4"
#endif
#if defined(WITH_VECTORIZATION) && defined(DOPAIR1_AUTO_VEC)
#define DOPAIR1 runner_dopair1_density_auto_vec
#define DOPAIR1_NAME "runner_dopair1_density_auto_vec"
#endif
#if defined(DOPAIR1_NOSORT_JSW)
#define DOPAIR1 runner_dopair1_nosort_density
#define DOPAIR1_NAME "runner_dopair1_nosort_density"
#endif
#ifndef DOPAIR1
#define DOPAIR1 runner_dopair1_density
#define DOPAIR1_NAME "runner_dopair1_density"
......@@ -334,23 +301,11 @@ int check_results(struct part *serial_parts, struct part *vec_parts, int count,
}
/* Just a forward declaration... */
void runner_doself1_density(struct runner *r, struct cell *ci);
void runner_doself1_density_vec(struct runner *r, struct cell *ci);
void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
void runner_dopair1_nosort_density(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_dopair1_density_vec_1(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_dopair1_density_vec_2(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_dopair1_density_vec_3(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_dopair1_density_vec_4(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_doself1_density(struct runner *r, struct cell *ci);
void runner_doself1_density_vec(struct runner *r, struct cell *ci);
/* And go... */
int main(int argc, char *argv[]) {
......@@ -492,8 +447,6 @@ int main(int argc, char *argv[]) {
cache_init(&runner.ci_cache, 512);
runner.cj_cache.count = 0;
cache_init(&runner.cj_cache, 512);
// cj_cache.count = 0;
// cache_init(&cj_cache, 512);
#endif
/* Run all the pairs */
......@@ -582,9 +535,9 @@ int main(int argc, char *argv[]) {
dump_particle_fields(outputFileName, main_cell, cells);
/* Check serial results against the vectorised results. */
// if (check_results(main_cell->parts, vec_parts, main_cell->count,
// threshold))
// message("Differences found...");
if (check_results(main_cell->parts, vec_parts, main_cell->count,
threshold))
message("Differences found...");
/* Output timing */
message("Brute force calculation took : %15lli ticks.", toc - tic);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment