Commit bebc572a authored by James Willis's avatar James Willis
Browse files

Created an inline function to perform vector loads.

parent ea3ac791
......@@ -170,17 +170,15 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz,
mask_t mask) {
vector r, ri, ui, wi, wi_dx;
vector mj;
vector dvx, dvy, dvz;
vector vjx, vjy, vjz;
vector dvdr;
vector curlvrx, curlvry, curlvrz;
/* Fill the vectors. */
mj.v = vec_load(Mj);
vjx.v = vec_load(Vjx);
vjy.v = vec_load(Vjy);
vjz.v = vec_load(Vjz);
const vector mj = vector_load(Mj);
const vector vjx = vector_load(Vjx);
const vector vjy = vector_load(Vjy);
const vector vjz = vector_load(Vjz);
/* Get the radius and inverse radius. */
ri = vec_reciprocal_sqrt(*r2);
......@@ -245,38 +243,34 @@ runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz,
vector *curlvySum, vector *curlvzSum,
mask_t mask, mask_t mask2, short mask_cond) {
vector r, ri, r2, ui, wi, wi_dx;
vector mj;
vector dx, dy, dz, dvx, dvy, dvz;
vector vjx, vjy, vjz;
vector r, ri, ui, wi, wi_dx;
vector dvx, dvy, dvz;
vector dvdr;
vector curlvrx, curlvry, curlvrz;
vector r_2, ri2, r2_2, ui2, wi2, wi_dx2;
vector mj2;
vector dx2, dy2, dz2, dvx2, dvy2, dvz2;
vector vjx2, vjy2, vjz2;
vector r_2, ri2, ui2, wi2, wi_dx2;
vector dvx2, dvy2, dvz2;
vector dvdr2;
vector curlvrx2, curlvry2, curlvrz2;
/* Fill the vectors. */
mj.v = vec_load(Mj);
mj2.v = vec_load(&Mj[VEC_SIZE]);
vjx.v = vec_load(Vjx);
vjx2.v = vec_load(&Vjx[VEC_SIZE]);
vjy.v = vec_load(Vjy);
vjy2.v = vec_load(&Vjy[VEC_SIZE]);
vjz.v = vec_load(Vjz);
vjz2.v = vec_load(&Vjz[VEC_SIZE]);
dx.v = vec_load(Dx);
dx2.v = vec_load(&Dx[VEC_SIZE]);
dy.v = vec_load(Dy);
dy2.v = vec_load(&Dy[VEC_SIZE]);
dz.v = vec_load(Dz);
dz2.v = vec_load(&Dz[VEC_SIZE]);
const vector mj = vector_load(Mj);
const vector mj2 = vector_load(&Mj[VEC_SIZE]);
const vector vjx = vector_load(Vjx);
const vector vjx2 = vector_load(&Vjx[VEC_SIZE]);
const vector vjy = vector_load(Vjy);
const vector vjy2 = vector_load(&Vjy[VEC_SIZE]);
const vector vjz = vector_load(Vjz);
const vector vjz2 = vector_load(&Vjz[VEC_SIZE]);
const vector dx = vector_load(Dx);
const vector dx2 = vector_load(&Dx[VEC_SIZE]);
const vector dy = vector_load(Dy);
const vector dy2 = vector_load(&Dy[VEC_SIZE]);
const vector dz = vector_load(Dz);
const vector dz2 = vector_load(&Dz[VEC_SIZE]);
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
r2_2.v = vec_load(&R2[VEC_SIZE]);
const vector r2 = vector_load(R2);
const vector r2_2 = vector_load(&R2[VEC_SIZE]);
ri = vec_reciprocal_sqrt(r2);
ri2 = vec_reciprocal_sqrt(r2_2);
r.v = vec_mul(r2.v, ri.v);
......@@ -592,8 +586,7 @@ runner_iact_nonsym_1_vec_force(
#ifdef WITH_VECTORIZATION
vector r, ri;
vector vjx, vjy, vjz, dvx, dvy, dvz;
vector pjrho, grad_hj, pjPOrho2, balsara_j, cj, mj;
vector dvx, dvy, dvz;
vector xi, xj;
vector hid_inv, hjd_inv;
vector wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
......@@ -604,16 +597,15 @@ runner_iact_nonsym_1_vec_force(
vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt;
/* Fill vectors. */
vjx.v = vec_load(Vjx);
vjy.v = vec_load(Vjy);
vjz.v = vec_load(Vjz);
mj.v = vec_load(Mj);
pjrho.v = vec_load(Pjrho);
grad_hj.v = vec_load(Grad_hj);
pjPOrho2.v = vec_load(PjPOrho2);
balsara_j.v = vec_load(Balsara_j);
cj.v = vec_load(Cj);
const vector vjx = vector_load(Vjx);
const vector vjy = vector_load(Vjy);
const vector vjz = vector_load(Vjz);
const vector mj = vector_load(Mj);
const vector pjrho = vector_load(Pjrho);
const vector grad_hj = vector_load(Grad_hj);
const vector pjPOrho2 = vector_load(PjPOrho2);
const vector balsara_j = vector_load(Balsara_j);
const vector cj = vector_load(Cj);
fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */
......@@ -720,10 +712,8 @@ runner_iact_nonsym_2_vec_force(
#ifdef WITH_VECTORIZATION
vector r, r2, ri;
vector dx, dy, dz, dvx, dvy, dvz;
vector vjx, vjy, vjz;
vector pjrho, grad_hj, pjPOrho2, balsara_j, cj, mj, hj_inv;
vector r, ri;
vector dvx, dvy, dvz;
vector ui, uj;
vector hid_inv, hjd_inv;
vector wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
......@@ -733,10 +723,8 @@ runner_iact_nonsym_2_vec_force(
vector omega_ij, mu_ij, fac_mu, balsara;
vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt;
vector r_2, r2_2, ri_2;
vector dx_2, dy_2, dz_2, dvx_2, dvy_2, dvz_2;
vector vjx_2, vjy_2, vjz_2;
vector pjrho_2, grad_hj_2, pjPOrho2_2, balsara_j_2, cj_2, mj_2, hj_inv_2;
vector r_2, ri_2;
vector dvx_2, dvy_2, dvz_2;
vector ui_2, uj_2;
vector hjd_inv_2;
vector wi_dx_2, wj_dx_2, wi_dr_2, wj_dr_2, dvdr_2;
......@@ -747,42 +735,42 @@ runner_iact_nonsym_2_vec_force(
vector rho_ij_2, visc_2, visc_term_2, sph_term_2, acc_2, entropy_dt_2;
/* Fill vectors. */
mj.v = vec_load(Mj);
mj_2.v = vec_load(&Mj[VEC_SIZE]);
vjx.v = vec_load(Vjx);
vjx_2.v = vec_load(&Vjx[VEC_SIZE]);
vjy.v = vec_load(Vjy);
vjy_2.v = vec_load(&Vjy[VEC_SIZE]);
vjz.v = vec_load(Vjz);
vjz_2.v = vec_load(&Vjz[VEC_SIZE]);
dx.v = vec_load(Dx);
dx_2.v = vec_load(&Dx[VEC_SIZE]);
dy.v = vec_load(Dy);
dy_2.v = vec_load(&Dy[VEC_SIZE]);
dz.v = vec_load(Dz);
dz_2.v = vec_load(&Dz[VEC_SIZE]);
const vector mj = vector_load(Mj);
const vector mj_2 = vector_load(&Mj[VEC_SIZE]);
const vector vjx = vector_load(Vjx);
const vector vjx_2 = vector_load(&Vjx[VEC_SIZE]);
const vector vjy = vector_load(Vjy);
const vector vjy_2 = vector_load(&Vjy[VEC_SIZE]);
const vector vjz = vector_load(Vjz);
const vector vjz_2 = vector_load(&Vjz[VEC_SIZE]);
const vector dx = vector_load(Dx);
const vector dx_2 = vector_load(&Dx[VEC_SIZE]);
const vector dy = vector_load(Dy);
const vector dy_2 = vector_load(&Dy[VEC_SIZE]);
const vector dz = vector_load(Dz);
const vector dz_2 = vector_load(&Dz[VEC_SIZE]);
/* Get the radius and inverse radius. */
r2.v = vec_load(R2);
r2_2.v = vec_load(&R2[VEC_SIZE]);
const vector r2 = vector_load(R2);
const vector r2_2 = vector_load(&R2[VEC_SIZE]);
ri = vec_reciprocal_sqrt(r2);
ri_2 = vec_reciprocal_sqrt(r2_2);
r.v = vec_mul(r2.v, ri.v);
r_2.v = vec_mul(r2_2.v, ri_2.v);
/* Get remaining properties. */
pjrho.v = vec_load(Pjrho);
pjrho_2.v = vec_load(&Pjrho[VEC_SIZE]);
grad_hj.v = vec_load(Grad_hj);
grad_hj_2.v = vec_load(&Grad_hj[VEC_SIZE]);
pjPOrho2.v = vec_load(PjPOrho2);
pjPOrho2_2.v = vec_load(&PjPOrho2[VEC_SIZE]);
balsara_j.v = vec_load(Balsara_j);
balsara_j_2.v = vec_load(&Balsara_j[VEC_SIZE]);
cj.v = vec_load(Cj);
cj_2.v = vec_load(&Cj[VEC_SIZE]);
hj_inv.v = vec_load(Hj_inv);
hj_inv_2.v = vec_load(&Hj_inv[VEC_SIZE]);
const vector pjrho = vector_load(Pjrho);
const vector pjrho_2 = vector_load(&Pjrho[VEC_SIZE]);
const vector grad_hj = vector_load(Grad_hj);
const vector grad_hj_2 = vector_load(&Grad_hj[VEC_SIZE]);
const vector pjPOrho2 = vector_load(PjPOrho2);
const vector pjPOrho2_2 = vector_load(&PjPOrho2[VEC_SIZE]);
const vector balsara_j = vector_load(Balsara_j);
const vector balsara_j_2 = vector_load(&Balsara_j[VEC_SIZE]);
const vector cj = vector_load(Cj);
const vector cj_2 = vector_load(&Cj[VEC_SIZE]);
const vector hj_inv = vector_load(Hj_inv);
const vector hj_inv_2 = vector_load(&Hj_inv[VEC_SIZE]);
fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */
......
......@@ -615,21 +615,18 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
}
}
vector v_pjx, v_pjy, v_pjz;
vector v_pjx2, v_pjy2, v_pjz2;
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) {
/* Load 2 sets of vectors from the particle cache. */
v_pjx.v = vec_load(&cell_cache->x[pjd]);
v_pjy.v = vec_load(&cell_cache->y[pjd]);
v_pjz.v = vec_load(&cell_cache->z[pjd]);
const vector v_pjx = vector_load(&cell_cache->x[pjd]);
const vector v_pjy = vector_load(&cell_cache->y[pjd]);
const vector v_pjz = vector_load(&cell_cache->z[pjd]);
v_pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]);
v_pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]);
v_pjz2.v = vec_load(&cell_cache->z[pjd + VEC_SIZE]);
const vector v_pjx2 = vector_load(&cell_cache->x[pjd + VEC_SIZE]);
const vector v_pjy2 = vector_load(&cell_cache->y[pjd + VEC_SIZE]);
const vector v_pjz2 = vector_load(&cell_cache->z[pjd + VEC_SIZE]);
/* Compute the pairwise distance. */
vector v_dx, v_dy, v_dz;
......@@ -848,11 +845,11 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) {
/* Load 1 set of vectors from the particle cache. */
vector v_pjx, v_pjy, v_pjz, hj, hjg2;
v_pjx.v = vec_load(&cell_cache->x[pjd]);
v_pjy.v = vec_load(&cell_cache->y[pjd]);
v_pjz.v = vec_load(&cell_cache->z[pjd]);
hj.v = vec_load(&cell_cache->h[pjd]);
vector hjg2;
const vector v_pjx = vector_load(&cell_cache->x[pjd]);
const vector v_pjy = vector_load(&cell_cache->y[pjd]);
const vector v_pjz = vector_load(&cell_cache->z[pjd]);
const vector hj = vector_load(&cell_cache->h[pjd]);
hjg2.v = vec_mul(vec_mul(hj.v, hj.v), kernel_gamma2_vec.v);
/* Compute the pairwise distance. */
......@@ -1094,7 +1091,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
/* Get the cache index to the jth particle. */
const int cj_cache_idx = pjd;
vector v_pjx, v_pjy, v_pjz;
vector v_dx, v_dy, v_dz, v_r2;
#ifdef SWIFT_DEBUG_CHECKS
......@@ -1106,9 +1102,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
#endif
/* Load 2 sets of vectors from the particle cache. */
v_pjx.v = vec_load(&cj_cache->x[cj_cache_idx]);
v_pjy.v = vec_load(&cj_cache->y[cj_cache_idx]);
v_pjz.v = vec_load(&cj_cache->z[cj_cache_idx]);
const vector v_pjx = vector_load(&cj_cache->x[cj_cache_idx]);
const vector v_pjy = vector_load(&cj_cache->y[cj_cache_idx]);
const vector v_pjz = vector_load(&cj_cache->z[cj_cache_idx]);
/* Compute the pairwise distance. */
v_dx.v = vec_sub(v_pix.v, v_pjx.v);
......@@ -1224,13 +1220,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
}
#endif
vector v_pix, v_piy, v_piz;
vector v_dx, v_dy, v_dz, v_r2;
/* Load 2 sets of vectors from the particle cache. */
v_pix.v = vec_load(&ci_cache->x[ci_cache_idx]);
v_piy.v = vec_load(&ci_cache->y[ci_cache_idx]);
v_piz.v = vec_load(&ci_cache->z[ci_cache_idx]);
const vector v_pix = vector_load(&ci_cache->x[ci_cache_idx]);
const vector v_piy = vector_load(&ci_cache->y[ci_cache_idx]);
const vector v_piz = vector_load(&ci_cache->z[ci_cache_idx]);
/* Compute the pairwise distance. */
v_dx.v = vec_sub(v_pjx.v, v_pix.v);
......@@ -1469,8 +1464,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Get the cache index to the jth particle. */
const int cj_cache_idx = pjd;
vector v_dx, v_dy, v_dz;
vector v_pjx, v_pjy, v_pjz, v_hj, v_hjg2, v_r2;
vector v_dx, v_dy, v_dz, v_r2;
vector v_hjg2;
#ifdef SWIFT_DEBUG_CHECKS
if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
......@@ -1481,10 +1476,10 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
#endif
/* Load 2 sets of vectors from the particle cache. */
v_pjx.v = vec_load(&cj_cache->x[cj_cache_idx]);
v_pjy.v = vec_load(&cj_cache->y[cj_cache_idx]);
v_pjz.v = vec_load(&cj_cache->z[cj_cache_idx]);
v_hj.v = vec_load(&cj_cache->h[cj_cache_idx]);
const vector v_pjx = vector_load(&cj_cache->x[cj_cache_idx]);
const vector v_pjy = vector_load(&cj_cache->y[cj_cache_idx]);
const vector v_pjz = vector_load(&cj_cache->z[cj_cache_idx]);
const vector v_hj = vector_load(&cj_cache->h[cj_cache_idx]);
v_hjg2.v = vec_mul(vec_mul(v_hj.v, v_hj.v), kernel_gamma2_vec.v);
/* Compute the pairwise distance. */
......@@ -1610,14 +1605,14 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
}
#endif
vector v_pix, v_piy, v_piz, v_hi, v_hig2;
vector v_hig2;
vector v_dx, v_dy, v_dz, v_r2;
/* Load 2 sets of vectors from the particle cache. */
v_pix.v = vec_load(&ci_cache->x[ci_cache_idx]);
v_piy.v = vec_load(&ci_cache->y[ci_cache_idx]);
v_piz.v = vec_load(&ci_cache->z[ci_cache_idx]);
v_hi.v = vec_load(&ci_cache->h[ci_cache_idx]);
const vector v_pix = vector_load(&ci_cache->x[ci_cache_idx]);
const vector v_piy = vector_load(&ci_cache->y[ci_cache_idx]);
const vector v_piz = vector_load(&ci_cache->z[ci_cache_idx]);
const vector v_hi = vector_load(&ci_cache->h[ci_cache_idx]);
v_hig2.v = vec_mul(vec_mul(v_hi.v, v_hi.v), kernel_gamma2_vec.v);
/* Compute the pairwise distance. */
......
......@@ -431,6 +431,21 @@ __attribute__((always_inline)) INLINE vector vec_reciprocal_sqrt(vector x) {
return x_inv;
}
/**
* @brief Loads a vector from memory.
*
* @param *x memory to load from.
* @return temp loaded #vector.
*/
__attribute__((always_inline)) INLINE vector vector_load(float *const x) {
vector temp;
temp.v = vec_load(x);
return temp;
}
#else
/* Needed for cache alignment. */
#define VEC_SIZE 8
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment