diff --git a/src/dimension.h b/src/dimension.h index 92b56ac25a1027891cecaca696186c985e01a48b..11234fd2c167db8c04351aaec6537a9fefceb170 100644 --- a/src/dimension.h +++ b/src/dimension.h @@ -105,8 +105,15 @@ __attribute__((always_inline)) INLINE static float pow_dimension_plus_one( #endif } +/* ------------------------------------------------------------------------- */ #ifdef WITH_VECTORIZATION +/** + * @brief Returns the argument to the power given by the dimension (vector + * version) + * + * Computes \f$x^d\f$. + */ __attribute__((always_inline)) INLINE static vector pow_dimension_vec( vector x) { @@ -130,6 +137,12 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_vec( #endif } +/** + * @brief Returns the argument to the power given by the dimension plus one + * (vector version) + * + * Computes \f$x^{d+1}\f$. + */ __attribute__((always_inline)) INLINE static vector pow_dimension_plus_one_vec( vector x) { diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index 7fe78f75fc00569ee8d34ea30a822bac394531e2..7abcfefbe8d1320bce70cc4601ef7e61b696f9e5 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -173,33 +173,33 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( /* Some smoothing length multiples. */ const float h = p->h; - const float ih = 1.0f / h; - const float ih2 = ih * ih; - const float ih4 = ih2 * ih2; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + const float h_inv_dim_plus_one = h_inv_dim * h_inv; /* 1/h^(d+1) */ /* Final operation on the density (add self-contribution). */ p->rho += p->mass * kernel_root; - p->rho_dh -= 3.0f * p->mass * kernel_root; + p->rho_dh -= hydro_dimension * p->mass * kernel_root; p->density.wcount += kernel_root; /* Finish the calculation by inserting the missing h-factors */ - p->rho *= ih * ih2; - p->rho_dh *= ih4; + p->rho *= h_inv_dim; + p->rho_dh *= h_inv_dim_plus_one; p->density.wcount *= kernel_norm; - p->density.wcount_dh *= ih * kernel_gamma * kernel_norm; + p->density.wcount_dh *= h_inv * kernel_gamma * kernel_norm; const float irho = 1.f / p->rho; /* Compute the derivative term */ - p->rho_dh = 1.f / (1.f + 0.33333333f * p->h * p->rho_dh * irho); + p->rho_dh = 1.f / (1.f + hydro_dimension_inv * p->h * p->rho_dh * irho); /* Finish calculation of the velocity curl components */ - p->density.rot_v[0] *= ih4 * irho; - p->density.rot_v[1] *= ih4 * irho; - p->density.rot_v[2] *= ih4 * irho; + p->density.rot_v[0] *= hydro_dimension_inv * irho; + p->density.rot_v[1] *= hydro_dimension_inv * irho; + p->density.rot_v[2] *= hydro_dimension_inv * irho; /* Finish calculation of the velocity divergence */ - p->density.div_v *= ih4 * irho; + p->density.div_v *= hydro_dimension_inv * irho; } /** @@ -217,7 +217,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Some smoothing length multiples. */ const float h = p->h; - const float ih = 1.0f / h; + const float h_inv = 1.0f / h; /* Pre-compute some stuff for the balsara switch. */ const float normDiv_v = fabs(p->density.div_v); @@ -231,11 +231,11 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( sqrtf(hydro_gamma * hydro_gamma_minus_one * u); /* Compute the P/Omega/rho2. */ - xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho; + xp->omega = 1.0f + hydro_dimension_inv * h * p->rho_dh / p->rho; p->force.P_over_rho2 = u * hydro_gamma_minus_one / (p->rho * xp->omega); /* Balsara switch */ - p->force.balsara = normDiv_v / (normDiv_v + normRot_v + 0.0001f * fc * ih); + p->force.balsara = normDiv_v / (normDiv_v + normRot_v + 0.0001f * fc * h_inv); /* Viscosity parameter decay time */ const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed); @@ -309,7 +309,7 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( */ __attribute__((always_inline)) INLINE static void hydro_end_force( struct part *restrict p) { - p->force.h_dt *= p->h * 0.333333333f; + p->force.h_dt *= p->h * hydro_dimension_inv; } /** diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 141ab8f884da7f120f2b636933904f0d795a5d8b..4975cac35aaa3d4b4b9f99319dfd061998014453 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -75,7 +75,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0f * wi + xi * wi_dx); + pi->rho_dh -= mj * (hydro_dimension * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; @@ -88,7 +88,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( kernel_deval(xj, &wj, &wj_dx); pj->rho += mi * wj; - pj->rho_dh -= mi * (3.0f * wj + xj * wj_dx); + pj->rho_dh -= mi * (hydro_dimension * wj + xj * wj_dx); pj->density.wcount += wj; pj->density.wcount_dh -= xj * wj_dx; @@ -174,14 +174,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(3.0f) * wi.v + xi.v * wi_dx.v); + rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v); wcounti.v = wi.v; wcounti_dh.v = xi.v * wi_dx.v; div_vi.v = mj.v * dvdr.v * wi_dx.v; for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; rhoj.v = mi.v * wj.v; - rhoj_dh.v = mi.v * (vec_set1(3.0f) * wj.v + xj.v * wj_dx.v); + rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + xj.v * wj_dx.v); wcountj.v = wj.v; wcountj_dh.v = xj.v * wj_dx.v; div_vj.v = mi.v * dvdr.v * wj_dx.v; @@ -250,7 +250,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0f * wi + xi * wi_dx); + pi->rho_dh -= mj * (hydro_dimension * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; @@ -327,7 +327,7 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(3.0f) * wi.v + xi.v * wi_dx.v); + rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v); wcounti.v = wi.v; wcounti_dh.v = xi.v * wi_dx.v; div_vi.v = mj.v * dvdr.v * wi_dx.v; @@ -358,8 +358,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( float r = sqrtf(r2), ri = 1.0f / r; float xi, xj; - float hi_inv, hi2_inv; - float hj_inv, hj2_inv; + float hi_inv, hid_inv; + float hj_inv, hjd_inv; float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; float mi, mj, POrho2i, POrho2j, rhoi, rhoj; float v_sig, omega_ij, Pi_ij, alpha_ij, tc, v_sig_u; @@ -376,17 +376,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Get the kernel for hi. */ hi_inv = 1.0f / hi; - hi2_inv = hi_inv * hi_inv; + hid_inv = pow_dimension_plus_one(hi_inv); /* 1/h^(d+1) */ xi = r * hi_inv; kernel_deval(xi, &wi, &wi_dx); - wi_dr = hi2_inv * hi2_inv * wi_dx; + wi_dr = hid_inv * wi_dx; /* Get the kernel for hj. */ hj_inv = 1.0f / hj; - hj2_inv = hj_inv * hj_inv; + hjd_inv = pow_dimension_plus_one(hj_inv); /* 1/h^(d+1) */ xj = r * hj_inv; kernel_deval(xj, &wj, &wj_dx); - wj_dr = hj2_inv * hj2_inv * wj_dx; + wj_dr = hjd_inv * wj_dx; /* Compute dv dot r. */ dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + @@ -457,7 +457,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( vector r, r2, ri; vector xi, xj; vector hi, hj, hi_inv, hj_inv; - vector hi2_inv, hj2_inv; + vector hid_inv, hjd_inv; vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; vector w; vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju; @@ -564,19 +564,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( hi.v = vec_load(Hi); hi_inv.v = vec_rcp(hi.v); hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hi2_inv.v = hi_inv.v * hi_inv.v; + hid_inv = pow_dimension_plus_one_vec(hi_inv); xi.v = r.v * hi_inv.v; kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; + wi_dr.v = hid_inv.v * wi_dx.v; /* Get the kernel for hj. */ hj.v = vec_load(Hj); hj_inv.v = vec_rcp(hj.v); hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hj2_inv.v = hj_inv.v * hj_inv.v; + hjd_inv = pow_dimension_plus_one_vec(hj_inv); xj.v = r.v * hj_inv.v; kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v; + wj_dr.v = hjd_inv.v * wj_dx.v; /* Compute dv dot r. */ dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + @@ -659,8 +659,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( float r = sqrtf(r2), ri = 1.0f / r; float xi, xj; - float hi_inv, hi2_inv; - float hj_inv, hj2_inv; + float hi_inv, hid_inv; + float hj_inv, hjd_inv; float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; float /*mi,*/ mj, POrho2i, POrho2j, rhoi, rhoj; float v_sig, omega_ij, Pi_ij, alpha_ij, tc, v_sig_u; @@ -677,17 +677,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Get the kernel for hi. */ hi_inv = 1.0f / hi; - hi2_inv = hi_inv * hi_inv; + hid_inv = pow_dimension_plus_one(hi_inv); /* 1/h^(d+1) */ xi = r * hi_inv; kernel_deval(xi, &wi, &wi_dx); - wi_dr = hi2_inv * hi2_inv * wi_dx; + wi_dr = hid_inv * wi_dx; /* Get the kernel for hj. */ hj_inv = 1.0f / hj; - hj2_inv = hj_inv * hj_inv; + hjd_inv = pow_dimension_plus_one(hj_inv); /* 1/h^(d+1) */ xj = r * hj_inv; kernel_deval(xj, &wj, &wj_dx); - wj_dr = hj2_inv * hj2_inv * wj_dx; + wj_dr = hjd_inv * wj_dx; /* Compute dv dot r. */ dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + @@ -752,7 +752,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( vector r, r2, ri; vector xi, xj; vector hi, hj, hi_inv, hj_inv; - vector hi2_inv, hj2_inv; + vector hid_inv, hjd_inv; vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; vector w; vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju; @@ -856,19 +856,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( hi.v = vec_load(Hi); hi_inv.v = vec_rcp(hi.v); hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hi2_inv.v = hi_inv.v * hi_inv.v; + hid_inv = pow_dimension_plus_one_vec(hi_inv); xi.v = r.v * hi_inv.v; kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; + wi_dr.v = hid_inv.v * wi_dx.v; /* Get the kernel for hj. */ hj.v = vec_load(Hj); hj_inv.v = vec_rcp(hj.v); hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hj2_inv.v = hj_inv.v * hj_inv.v; + hjd_inv = pow_dimension_plus_one_vec(hj_inv); xj.v = r.v * hj_inv.v; kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v; + wj_dr.v = hjd_inv.v * wj_dx.v; /* Compute dv dot r. */ dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index 444920ab2bec940af9c10ea0a6ba471b2ab79762..1433136ad906beba491636716f38b98da2699b26 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -182,25 +182,25 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( /* Some smoothing length multiples. */ const float h = p->h; - const float ih = 1.0f / h; - const float ih2 = ih * ih; - const float ih4 = ih2 * ih2; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + const float h_inv_dim_plus_one = h_inv_dim * h_inv; /* 1/h^(d+1) */ /* Final operation on the density (add self-contribution). */ p->rho += p->mass * kernel_root; - p->rho_dh -= 3.0f * p->mass * kernel_root; + p->rho_dh -= hydro_dimension * p->mass * kernel_root; p->density.wcount += kernel_root; /* Finish the calculation by inserting the missing h-factors */ - p->rho *= ih * ih2; - p->rho_dh *= ih4; + p->rho *= h_inv_dim; + p->rho_dh *= h_inv_dim_plus_one; p->density.wcount *= kernel_norm; - p->density.wcount_dh *= ih * kernel_gamma * kernel_norm; + p->density.wcount_dh *= h_inv * kernel_gamma * kernel_norm; const float irho = 1.f / p->rho; /* Compute the derivative term */ - p->rho_dh = 1.f / (1.f + 0.33333333f * p->h * p->rho_dh * irho); + p->rho_dh = 1.f / (1.f + hydro_dimension_inv * p->h * p->rho_dh * irho); } /** @@ -281,7 +281,7 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( __attribute__((always_inline)) INLINE static void hydro_end_force( struct part *restrict p) { - p->force.h_dt *= p->h * 0.333333333f; + p->force.h_dt *= p->h * hydro_dimension_inv; } /** diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 311bdc90744059ea1272c2ca6cf419e5fbc1ba82..9eda45760248ebc0dde18e503abe0b1288659518 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -48,7 +48,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.f * wi + xi * wi_dx); + pi->rho_dh -= mj * (hydro_dimension * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; @@ -58,7 +58,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( kernel_deval(xj, &wj, &wj_dx); pj->rho += mi * wj; - pj->rho_dh -= mi * (3.f * wj + xj * wj_dx); + pj->rho_dh -= mi * (hydro_dimension * wj + xj * wj_dx); pj->density.wcount += wj; pj->density.wcount_dh -= xj * wj_dx; } @@ -93,7 +93,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.f * wi + xi * wi_dx); + pi->rho_dh -= mj * (hydro_dimension * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; } @@ -128,19 +128,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Get the kernel for hi. */ const float hi_inv = 1.0f / hi; - const float hi2_inv = hi_inv * hi_inv; + const float hid_inv = pow_dimension_plus_one(hi_inv); /* 1/h^(d+1) */ const float xi = r * hi_inv; float wi, wi_dx; kernel_deval(xi, &wi, &wi_dx); - const float wi_dr = hi2_inv * hi2_inv * wi_dx; + const float wi_dr = hid_inv * wi_dx; /* Get the kernel for hj. */ const float hj_inv = 1.0f / hj; - const float hj2_inv = hj_inv * hj_inv; + const float hjd_inv = pow_dimension_plus_one(hj_inv); /* 1/h^(d+1) */ const float xj = r * hj_inv; float wj, wj_dx; kernel_deval(xj, &wj, &wj_dx); - const float wj_dr = hj2_inv * hj2_inv * wj_dx; + const float wj_dr = hjd_inv * wj_dx; /* Compute gradient terms */ const float P_over_rho_i = pressurei / (rhoi * rhoi) * pi->rho_dh; @@ -211,19 +211,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Get the kernel for hi. */ const float hi_inv = 1.0f / hi; - const float hi2_inv = hi_inv * hi_inv; + const float hid_inv = pow_dimension_plus_one(hi_inv); /* 1/h^(d+1) */ const float xi = r * hi_inv; float wi, wi_dx; kernel_deval(xi, &wi, &wi_dx); - const float wi_dr = hi2_inv * hi2_inv * wi_dx; + const float wi_dr = hid_inv * wi_dx; /* Get the kernel for hj. */ const float hj_inv = 1.0f / hj; - const float hj2_inv = hj_inv * hj_inv; + const float hjd_inv = pow_dimension_plus_one(hj_inv); /* 1/h^(d+1) */ const float xj = r * hj_inv; float wj, wj_dx; kernel_deval(xj, &wj, &wj_dx); - const float wj_dr = hj2_inv * hj2_inv * wj_dx; + const float wj_dr = hjd_inv * wj_dx; /* Compute gradient terms */ const float P_over_rho_i = pressurei / (rhoi * rhoi) * pi->rho_dh;