Commit 25dc403c authored by James Willis's avatar James Willis
Browse files

Formatting.

parent 050c2040
......@@ -433,13 +433,12 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz,
curlvrz.v = vec_mul(curlvrz.v, ri.v);
vector scaleFactor;
scaleFactor.v = vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(ui.v, wi_dx.v));
scaleFactor.v =
vec_fma(vec_set1(hydro_dimension), wi.v, vec_mul(ui.v, wi_dx.v));
/* Mask updates to intermediate vector sums for particle pi. */
/* Mask updates to intermediate vector sums for particle pi. */
rhoSum->v = vec_mask_add(rhoSum->v, vec_mul(mj.v, wi.v), mask);
rho_dhSum->v = vec_mask_sub(
rho_dhSum->v, vec_mul(mj.v, scaleFactor.v), mask);
rho_dhSum->v = vec_mask_sub(rho_dhSum->v, vec_mul(mj.v, scaleFactor.v), mask);
wcountSum->v = vec_mask_add(wcountSum->v, wi.v, mask);
wcount_dhSum->v = vec_mask_sub(wcount_dhSum->v, scaleFactor.v, mask);
div_vSum->v =
......@@ -457,11 +456,13 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz,
* (non-symmetric vectorized version).
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_2_vec_density(
float *R2, float *Dx, float *Dy, float *Dz, vector hi_inv, vector vix,
vector viy, vector viz, float *Vjx, float *Vjy, float *Vjz, float *Mj,
vector *rhoSum, vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum,
vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum,
runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz,
vector hi_inv, vector vix, vector viy,
vector viz, float *Vjx, float *Vjy, float *Vjz,
float *Mj, vector *rhoSum, vector *rho_dhSum,
vector *wcountSum, vector *wcount_dhSum,
vector *div_vSum, vector *curlvxSum,
vector *curlvySum, vector *curlvzSum,
mask_t mask, mask_t mask2, short mask_cond) {
vector r, ri, r2, ui, wi, wi_dx;
......@@ -543,20 +544,20 @@ runner_iact_nonsym_2_vec_density(
curlvrz2.v = vec_mul(curlvrz2.v, ri2.v);
vector scaleFactor, scaleFactor2;
scaleFactor.v = vec_fma(vec_set1(hydro_dimension), wi.v,
vec_mul(ui.v, wi_dx.v));
scaleFactor2.v = vec_fma(vec_set1(hydro_dimension), wi2.v,
vec_mul(ui2.v, wi_dx2.v));
scaleFactor.v =
vec_fma(vec_set1(hydro_dimension), wi.v, vec_mul(ui.v, wi_dx.v));
scaleFactor2.v =
vec_fma(vec_set1(hydro_dimension), wi2.v, vec_mul(ui2.v, wi_dx2.v));
/* Mask updates to intermediate vector sums for particle pi. */
/* Mask only when needed. */
if (mask_cond) {
rhoSum->v = vec_mask_add(rhoSum->v, vec_mul(mj.v, wi.v), mask);
rhoSum->v = vec_mask_add(rhoSum->v, vec_mul(mj2.v, wi2.v), mask2);
rho_dhSum->v = vec_mask_sub(
rho_dhSum->v, vec_mul(mj.v, scaleFactor.v), mask);
rho_dhSum->v = vec_mask_sub(
rho_dhSum->v, vec_mul(mj2.v, scaleFactor2.v), mask2);
rho_dhSum->v =
vec_mask_sub(rho_dhSum->v, vec_mul(mj.v, scaleFactor.v), mask);
rho_dhSum->v =
vec_mask_sub(rho_dhSum->v, vec_mul(mj2.v, scaleFactor2.v), mask2);
wcountSum->v = vec_mask_add(wcountSum->v, wi.v, mask);
wcountSum->v = vec_mask_add(wcountSum->v, wi2.v, mask2);
wcount_dhSum->v = vec_mask_sub(wcount_dhSum->v, scaleFactor.v, mask);
......@@ -587,13 +588,20 @@ runner_iact_nonsym_2_vec_density(
wcount_dhSum->v = vec_sub(wcount_dhSum->v, scaleFactor.v);
wcount_dhSum->v = vec_sub(wcount_dhSum->v, scaleFactor2.v);
div_vSum->v = vec_sub(div_vSum->v, vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)));
div_vSum->v = vec_sub(div_vSum->v, vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)));
curlvxSum->v = vec_add(curlvxSum->v, vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)));
curlvxSum->v = vec_add(curlvxSum->v, vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)));
curlvySum->v = vec_add(curlvySum->v, vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)));
curlvySum->v = vec_add(curlvySum->v, vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)));
curlvzSum->v = vec_add(curlvzSum->v, vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)));
curlvzSum->v = vec_add(curlvzSum->v, vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)));
div_vSum->v =
vec_sub(div_vSum->v, vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)));
curlvxSum->v =
vec_add(curlvxSum->v, vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)));
curlvxSum->v =
vec_add(curlvxSum->v, vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)));
curlvySum->v =
vec_add(curlvySum->v, vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)));
curlvySum->v =
vec_add(curlvySum->v, vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)));
curlvzSum->v =
vec_add(curlvzSum->v, vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)));
curlvzSum->v =
vec_add(curlvzSum->v, vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)));
}
}
#endif
......
......@@ -373,7 +373,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_vec(
/* Load x and get the interval id. */
vector ind;
ind.m = vec_ftoi(vec_fmin(vec_mul(x.v, kernel_ivals_vec.v), kernel_ivals_vec.v));
ind.m =
vec_ftoi(vec_fmin(vec_mul(x.v, kernel_ivals_vec.v), kernel_ivals_vec.v));
/* load the coefficients. */
vector c[kernel_degree + 1];
......@@ -392,8 +393,10 @@ __attribute__((always_inline)) INLINE static void kernel_deval_vec(
}
/* Return everything */
w->v = vec_mul(w->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v));
dw_dx->v = vec_mul(dw_dx->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_plus_one_vec.v));
w->v =
vec_mul(w->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v));
dw_dx->v = vec_mul(dw_dx->v, vec_mul(kernel_constant_vec.v,
kernel_gamma_inv_dim_plus_one_vec.v));
}
/* Define constant vectors for the Wendland C2 and Cubic Spline kernel
......@@ -615,14 +618,16 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec(
w2->v = vec_blend(mask_reg2_v2, w2->v, w2_2.v);
dw_dx->v = vec_blend(mask_reg2, dw_dx->v, dw_dx_2.v);
dw_dx2->v = vec_blend(mask_reg2_v2, dw_dx2->v, dw_dx2_2.v);
/* Return everything */
w->v = vec_mul(w->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v));
w2->v = vec_mul(w2->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v));
dw_dx->v =
vec_mul(dw_dx->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_plus_one_vec.v));
dw_dx2->v =
vec_mul(dw_dx2->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_plus_one_vec.v));
w->v =
vec_mul(w->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v));
w2->v = vec_mul(w2->v,
vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v));
dw_dx->v = vec_mul(dw_dx->v, vec_mul(kernel_constant_vec.v,
kernel_gamma_inv_dim_plus_one_vec.v));
dw_dx2->v = vec_mul(dw_dx2->v, vec_mul(kernel_constant_vec.v,
kernel_gamma_inv_dim_plus_one_vec.v));
#endif
}
......@@ -658,7 +663,7 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u,
/* Form a mask for each part of the kernel. */
vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
......@@ -739,7 +744,7 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec(
/* Mask out result for particles that lie outside of the kernel function. */
mask_t mask;
vec_create_mask(mask, vec_cmp_lt(x.v, vec_set1(1.f))); /* x < 1 */
vec_create_mask(mask, vec_cmp_lt(x.v, vec_set1(1.f))); /* x < 1 */
dw_dx->v = vec_and_mask(dw_dx->v, mask);
......@@ -787,9 +792,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec(
mask_t mask_reg2_v2;
/* Form a mask for each part of the kernel. */
vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */
vec_create_mask(mask_reg2, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */
vec_create_mask(mask_reg2_v2, vec_cmp_gte(x_2.v, cond.v)); /* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
......@@ -815,8 +820,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec(
/* Mask out result for particles that lie outside of the kernel function. */
mask_t mask, mask_2;
vec_create_mask(mask, vec_cmp_lt(x.v, vec_set1(1.f))); /* x < 1 */
vec_create_mask(mask_2, vec_cmp_lt(x_2.v, vec_set1(1.f))); /* x < 1 */
vec_create_mask(mask, vec_cmp_lt(x.v, vec_set1(1.f))); /* x < 1 */
vec_create_mask(mask_2, vec_cmp_lt(x_2.v, vec_set1(1.f))); /* x < 1 */
dw_dx->v = vec_and_mask(dw_dx->v, mask);
dw_dx_2->v = vec_and_mask(dw_dx_2->v, mask_2);
......
......@@ -83,7 +83,8 @@
#define vec_form_int_mask(a) a
#define vec_and(a, b) _mm512_and_ps(a, b)
#define vec_mask_and(a, b) _mm512_kand(a, b)
#define vec_and_mask(a, mask) _mm512_maskz_expand_ps(mask, a) /* TODO: Alternative needs to be found. */
#define vec_and_mask(a, mask) \
_mm512_maskz_expand_ps(mask, a) /* TODO: Alternative needs to be found. */
#define vec_init_mask(mask) mask = 0xFFFF
#define vec_zero_mask(mask) mask = 0
#define vec_create_mask(mask, cond) mask = cond
......
......@@ -42,13 +42,12 @@
* @param hdfFileName
* @return the basename part of hdfFileName.
*/
static const char *xmf_basename(const char *hdfFileName) {
static const char* xmf_basename(const char* hdfFileName) {
static char buffer[FILENAME_BUFFER_SIZE];
strcpy(buffer, hdfFileName);
return basename(buffer);
}
/**
* @brief Prepare the XMF file corresponding to a snapshot.
*
......@@ -277,7 +276,7 @@ void xmf_write_line(FILE* xmfFile, const char* fileName,
fprintf(xmfFile,
"<DataItem Dimensions=\"%zu %d\" NumberType=\"%s\" "
"Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n",
N, dim, xmf_type(type), xmf_precision(type),
xmf_basename(fileName), partTypeGroupName, name);
N, dim, xmf_type(type), xmf_precision(type), xmf_basename(fileName),
partTypeGroupName, name);
fprintf(xmfFile, "</Attribute>\n");
}
......@@ -68,7 +68,7 @@ int main() {
vx.f[j] = (i + j) * 2.25f / numPoints;
}
vx_h.v = vec_mul(vx.v, vec_set1(1.f/h));
vx_h.v = vec_mul(vx.v, vec_set1(1.f / h));
kernel_deval_1_vec(&vx_h, &W_vec, &dW_vec);
......@@ -106,8 +106,8 @@ int main() {
vx_2.f[j] = (i + j) * 2.25f / numPoints;
}
vx_h.v = vec_mul(vx.v, vec_set1(1.f/h));
vx_h_2.v = vec_mul(vx_2.v, vec_set1(1.f/h));
vx_h.v = vec_mul(vx.v, vec_set1(1.f / h));
vx_h_2.v = vec_mul(vx_2.v, vec_set1(1.f / h));
kernel_deval_2_vec(&vx_h, &W_vec, &dW_vec, &vx_h_2, &W_vec_2, &dW_vec_2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment