diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index b18634b377719fa9c2f81273c06beb4fb50b480b..4543b18aabfa7762f98d79937f8f08ddbfe93538 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -17,6 +17,8 @@ * ******************************************************************************/ +#include "approx_math.h" + /** * @brief Computes the hydro time-step of a given particle * @@ -28,14 +30,9 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( struct part* p, struct xpart* xp) { /* CFL condition */ - float dt_cfl = 2.f * const_cfl * kernel_gamma * p->h / p->force.v_sig; - - /* Limit change in u */ - float dt_u_change = (p->force.u_dt != 0.0f) - ? fabsf(const_max_u_change * p->u / p->force.u_dt) - : FLT_MAX; + const float dt_cfl = 2.f * const_cfl * kernel_gamma * p->h / p->force.v_sig; - return fminf(dt_cfl, fminf(dt_h_change, dt_u_change)); + return dt_cfl; } /** @@ -52,10 +49,6 @@ __attribute__((always_inline)) p->density.wcount_dh = 0.f; p->rho = 0.f; p->rho_dh = 0.f; - p->density.div_v = 0.f; - p->density.curl_v[0] = 0.f; - p->density.curl_v[1] = 0.f; - p->density.curl_v[2] = 0.f; } /** @@ -76,13 +69,16 @@ __attribute__((always_inline)) const float ih2 = ih * ih; const float ih4 = ih2 * ih2; - /* Final operation on the density. */ - p->rho = ih * ih2 * (p->rho + p->mass * kernel_root); - p->rho_dh = (p->rho_dh - 3.0f * p->mass * kernel_root) * ih4; - p->density.wcount = - (p->density.wcount + kernel_root) * (4.0f / 3.0 * M_PI * kernel_gamma3); - p->density.wcount_dh = - p->density.wcount_dh * ih * (4.0f / 3.0 * M_PI * kernel_gamma3); + /* Final operation on the density (add self-contribution). */ + p->rho += p->mass * kernel_root; + p->rho_dh -= 3.0f * p->mass * kernel_root * kernel_igamma; + p->density.wcount += kernel_root; + + /* Finish the calculation by inserting the missing h-factors */ + p->rho *= ih * ih2; + p->rho_dh *= ih4; + p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3); + p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma3); } /** @@ -94,46 +90,10 @@ __attribute__((always_inline)) * @param xp The extended particle data to act upon * @param time The current time */ -__attribute__((always_inline)) -INLINE static void hydro_prepare_force(struct part* p, struct xpart* xp, float time) { - - /* Some smoothing length multiples. */ - const float h = p->h; - const float ih = 1.0f / h; - const float ih2 = ih * ih; - const float ih4 = ih2 * ih2; - - /* Pre-compute some stuff for the balsara switch. */ - const float normDiv_v = fabs(p->density.div_v / p->rho * ih4); - const float normCurl_v = sqrtf(p->density.curl_v[0] * p->density.curl_v[0] + - p->density.curl_v[1] * p->density.curl_v[1] + - p->density.curl_v[2] * p->density.curl_v[2]) / - p->rho * ih4; - - /* Compute this particle's sound speed. */ - const float u = p->u; - const float fc = p->force.c = - sqrtf(const_hydro_gamma * (const_hydro_gamma - 1.0f) * u); - - /* Compute the P/Omega/rho2. */ - xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho; - p->force.POrho2 = u * (const_hydro_gamma - 1.0f) / (p->rho * xp->omega); - - /* Balsara switch */ - p->force.balsara = normDiv_v / (normDiv_v + normCurl_v + 0.0001f * fc * ih); - - /* Viscosity parameter decay time */ - const float tau = h / (2.f * const_viscosity_length * p->force.c); - - /* Viscosity source term */ - const float S = fmaxf(-normDiv_v, 0.f); - - /* Compute the particle's viscosity parameter time derivative */ - const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau + - (const_viscosity_alpha_max - p->alpha) * S; +__attribute__((always_inline)) INLINE static void hydro_prepare_force( + struct part* p, struct xpart* xp, float time) { - /* Update particle's viscosity paramter */ - p->alpha += alpha_dot * (p->t_end - p->t_begin); + p->force.pressure = p->rho * p->u * (const_hydro_gamma - 1.f); } /** @@ -153,7 +113,7 @@ __attribute__((always_inline)) p->a_hydro[2] = 0.0f; /* Reset the time derivatives. */ - p->force.u_dt = 0.0f; + p->u_dt = 0.0f; p->h_dt = 0.0f; p->force.v_sig = 0.0f; } @@ -168,20 +128,18 @@ __attribute__((always_inline)) */ __attribute__((always_inline)) INLINE static void hydro_predict_extra( struct part* p, struct xpart* xp, float t0, float t1) { - float u, w; const float dt = t1 - t0; /* Predict internal energy */ - w = p->force.u_dt / p->u * dt; - if (fabsf(w) < 0.01f) /* 1st order expansion of exp(w) */ - u = p->u *= - 1.0f + w * (1.0f + w * (0.5f + w * (1.0f / 6.0f + 1.0f / 24.0f * w))); + const float w = p->u_dt / p->u * dt; + if (fabsf(w) < 0.2f) + p->u *= approx_expf(w); /* 4th order expansion of exp(w) */ else - u = p->u *= expf(w); + p->u *= expf(w); - /* Predict gradient term */ - p->force.POrho2 = u * (const_hydro_gamma - 1.0f) / (p->rho * xp->omega); + /* Need to recompute the pressure as well */ + p->force.pressure = p->rho * p->u * (const_hydro_gamma - 1.f); } /** diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index b5b631501b2f9c398cf1f7e5ee32fd5c962ba86e..02436c2f541cc8b33972b77c4be1ef45aaf1a589 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -48,32 +48,16 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2), ri = 1.0f / r; + float r = sqrtf(r2); float xi, xj; float h_inv; float wi, wj, wi_dx, wj_dx; float mi, mj; - float dvdr; - float dv[3], curlvr[3]; - int k; /* Get the masses. */ mi = pi->mass; mj = pj->mass; - /* Compute dv dot r */ - dv[0] = pi->v[0] - pj->v[0]; - dv[1] = pi->v[1] - pj->v[1]; - dv[2] = pi->v[2] - pj->v[2]; - dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - dvdr *= ri; - - /* Compute dv cross r */ - curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; - curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2]; - curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; - for (k = 0; k < 3; k++) curlvr[k] *= ri; - /* Compute density of pi. */ h_inv = 1.0 / hi; xi = r * h_inv; @@ -84,11 +68,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; - pi->density.div_v += mj * dvdr * wi_dx; - for (k = 0; k < 3; k++) pi->density.curl_v[k] += mj * curlvr[k] * wi_dx; - /* Compute density of pj. */ - h_inv = 1.0 / hj; + h_inv = 1.f / hj; xj = r * h_inv; kernel_deval(xj, &wj, &wj_dx); @@ -96,123 +77,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->rho_dh -= mi * (3.0 * wj + xj * wj_dx); pj->density.wcount += wj; pj->density.wcount_dh -= xj * wj_dx; - - pj->density.div_v += mi * dvdr * wj_dx; - for (k = 0; k < 3; k++) pj->density.curl_v[k] += mi * curlvr[k] * wj_dx; -} - -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef VECTORIZE - - vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; - vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; - vector mi, mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr, div_vi, div_vj; - vector curlvr[3], curl_vi[3], curl_vj[3]; - int k, j; - -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); - xi.v = r.v * hi_inv.v; - - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f)); - xj.v = r.v * hj_inv.v; - - kernel_deval_vec(&xi, &wi, &wi_dx); - kernel_deval_vec(&xj, &wj, &wj_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(3.0f) * wi.v + xi.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - rhoj.v = mi.v * wj.v; - rhoj_dh.v = mi.v * (vec_set1(3.0f) * wj.v + xj.v * wj_dx.v); - wcountj.v = wj.v; - wcountj_dh.v = xj.v * wj_dx.v; - div_vj.v = mi.v * dvdr.v * wj_dx.v; - for (k = 0; k < 3; k++) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; - - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v += div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.curl_v[j] += curl_vi[j].f[k]; - pj[k]->rho += rhoj.f[k]; - pj[k]->rho_dh -= rhoj_dh.f[k]; - pj[k]->density.wcount += wcountj.f[k]; - pj[k]->density.wcount_dh -= wcountj_dh.f[k]; - pj[k]->density.div_v += div_vj.f[k]; - for (j = 0; j < 3; j++) pj[k]->density.curl_v[j] += curl_vj[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif } /** @@ -222,36 +86,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r, ri; + float r; float xi; float h_inv; float wi, wi_dx; float mj; - float dvdr; - float dv[3], curlvr[3]; - int k; /* Get the masses. */ mj = pj->mass; /* Get r and r inverse. */ r = sqrtf(r2); - ri = 1.0f / r; - - /* Compute dv dot r */ - dv[0] = pi->v[0] - pj->v[0]; - dv[1] = pi->v[1] - pj->v[1]; - dv[2] = pi->v[2] - pj->v[2]; - dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - dvdr *= ri; - /* Compute dv cross r */ - curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; - curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2]; - curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; - for (k = 0; k < 3; k++) curlvr[k] *= ri; - - h_inv = 1.0 / hi; + h_inv = 1.f / hi; xi = r * h_inv; kernel_deval(xi, &wi, &wi_dx); @@ -259,104 +106,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; - - pi->density.div_v += mj * dvdr * wi_dx; - for (k = 0; k < 3; k++) pi->density.curl_v[k] += mj * curlvr[k] * wi_dx; -} - -/** - * @brief Density loop (non-symmetric vectorized version) - */ - -__attribute__((always_inline)) - INLINE static void runner_iact_nonsym_vec_density(float *R2, float *Dx, - float *Hi, float *Hj, - struct part **pi, - struct part **pj) { - -#ifdef VECTORIZE - - vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx; - vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; - vector mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr; - vector curlvr[3], curl_vi[3]; - int k, j; - -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); - xi.v = r.v * hi_inv.v; - - kernel_deval_vec(&xi, &wi, &wi_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(3.0f) * wi.v + xi.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v += div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.curl_v[j] += curl_vi[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_nonsym_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif } /** @@ -366,314 +115,77 @@ __attribute__((always_inline)) __attribute__((always_inline)) INLINE static void runner_iact_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2), ri = 1.0f / r; - float xi, xj; - float hi_inv, hi2_inv; - float hj_inv, hj2_inv; - float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; - float mi, mj, POrho2i, POrho2j, rhoi, rhoj; - float v_sig, omega_ij, Pi_ij, alpha_ij, tc, v_sig_u; - float f; - int k; - - /* Get some values in local variables. */ - mi = pi->mass; - mj = pj->mass; - rhoi = pi->rho; - rhoj = pj->rho; - POrho2i = pi->force.POrho2; - POrho2j = pj->force.POrho2; + const float r = sqrtf(r2); + const float r_inv = 1.0f / r; + + /* Recover some data */ + const float mi = pi->mass; + const float mj = pj->mass; + const float rhoi = pi->rho; + const float rhoj = pj->rho; + const float pressurei = pi->force.pressure; + const float pressurej = pj->force.pressure; /* Get the kernel for hi. */ - hi_inv = 1.0f / hi; - hi2_inv = hi_inv * hi_inv; - xi = r * hi_inv; + const float hi_inv = 1.0f / hi; + const float hi2_inv = hi_inv * hi_inv; + const float xi = r * hi_inv; + float wi, wi_dx; kernel_deval(xi, &wi, &wi_dx); - wi_dr = hi2_inv * hi2_inv * wi_dx; + const float wi_dr = hi2_inv * hi2_inv * wi_dx; /* Get the kernel for hj. */ - hj_inv = 1.0f / hj; - hj2_inv = hj_inv * hj_inv; - xj = r * hj_inv; + const float hj_inv = 1.0f / hj; + const float hj2_inv = hj_inv * hj_inv; + const float xj = r * hj_inv; + float wj, wj_dx; kernel_deval(xj, &wj, &wj_dx); - wj_dr = hj2_inv * hj2_inv * wj_dx; + const float wj_dr = hj2_inv * hj2_inv * wj_dx; + + /* Compute gradient terms */ + const float P_over_rho_i = pressurei / (rhoi * rhoi) * pi->rho_dh; + const float P_over_rho_j = pressurej / (rhoj * rhoj) * pj->rho_dh; /* Compute dv dot r. */ - dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2]; - dvdr *= ri; + float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + + (pi->v[2] - pj->v[2]) * dx[2]; + dvdr *= r_inv; /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ - omega_ij = fminf(dvdr, 0.f); + const float omega_ij = fminf(dvdr, 0.f); - /* Compute signal velocity */ - v_sig = pi->force.c + pj->force.c - 2.0f * omega_ij; + /* Compute sound speeds */ + const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi); + const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj); + float v_sig = ci + cj + 3.f * omega_ij; - /* Compute viscosity parameter */ - alpha_ij = -0.5f * (pi->alpha + pj->alpha); + /* SPH acceleration term */ + const float sph_term = + mj * (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; - /* Compute viscosity tensor */ - Pi_ij = alpha_ij * v_sig * omega_ij / (rhoi + rhoj); + /* Use the force Luke ! */ + pi->a_hydro[0] -= sph_term * dx[0]; + pi->a_hydro[1] -= sph_term * dx[1]; + pi->a_hydro[2] -= sph_term * dx[2]; - /* Apply balsara switch */ - Pi_ij *= (pi->force.balsara + pj->force.balsara); - - /* Thermal conductivity */ - v_sig_u = sqrtf(2.f * (const_hydro_gamma - 1.f) * - fabs(rhoi * pi->u - rhoj * pj->u) / (rhoi + rhoj)); - tc = const_conductivity_alpha * v_sig_u / (rhoi + rhoj); - tc *= (wi_dr + wj_dr); - - /* Get the common factor out. */ - w = ri * - ((POrho2i * wi_dr + POrho2j * wj_dr) + 0.25f * Pi_ij * (wi_dr + wj_dr)); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f = dx[k] * w; - pi->a_hydro[k] -= mj * f; - pj->a_hydro[k] += mi * f; - } + pj->a_hydro[0] += sph_term * dx[0]; + pj->a_hydro[1] += sph_term * dx[1]; + pj->a_hydro[2] += sph_term * dx[2]; /* Get the time derivative for u. */ - pi->force.u_dt += - mj * dvdr * (POrho2i * wi_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); - pj->force.u_dt += - mi * dvdr * (POrho2j * wj_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); - - /* Add the thermal conductivity */ - pi->force.u_dt += mj * tc * (pi->u - pj->u); - pj->force.u_dt += mi * tc * (pj->u - pi->u); + pi->u_dt += P_over_rho_i * mj * dvdr * wi_dr; + pj->u_dt += P_over_rho_j * mi * dvdr * wj_dr; /* Get the time derivative for h. */ - pi->h_dt -= mj * dvdr / rhoj * wi_dr; - pj->h_dt -= mi * dvdr / rhoi * wj_dr; + pi->h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; + pj->h_dt -= mi * dvdr * r_inv / rhoi * wj_dr; /* Update the signal velocity. */ pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized version) - */ - -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef VECTORIZE - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hi2_inv, hj2_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector w; - vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju; - vector mi, mj; - vector f; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3], pja[3]; - vector piu_dt, pju_dt; - vector pih_dt, pjh_dt; - vector ci, cj, v_sig, vi_sig, vj_sig; - vector omega_ij, Pi_ij, balsara; - vector pialpha, pjalpha, alpha_ij, v_sig_u, tc; - int j, k; - -/* Load stuff. */ -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = - vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, pi[2]->force.POrho2, - pi[3]->force.POrho2, pi[4]->force.POrho2, pi[5]->force.POrho2, - pi[6]->force.POrho2, pi[7]->force.POrho2); - pjPOrho2.v = - vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, pj[2]->force.POrho2, - pj[3]->force.POrho2, pj[4]->force.POrho2, pj[5]->force.POrho2, - pj[6]->force.POrho2, pj[7]->force.POrho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u, - pi[6]->u, pi[7]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, - pj[6]->u, pj[7]->u); - ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c, - pi[4]->force.c, pi[5]->force.c, pi[6]->force.c, pi[7]->force.c); - cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c, - pj[4]->force.c, pj[5]->force.c, pj[6]->force.c, pj[7]->force.c); - vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, - pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, - pi[6]->force.v_sig, pi[7]->force.v_sig); - vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, - pj[3]->force.v_sig, pj[4]->force.v_sig, pj[5]->force.v_sig, - pj[6]->force.v_sig, pj[7]->force.v_sig); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha, - pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha, - pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, - pi[2]->force.POrho2, pi[3]->force.POrho2); - pjPOrho2.v = vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, - pj[2]->force.POrho2, pj[3]->force.POrho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); - ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c); - cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c); - vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, - pi[3]->force.v_sig); - vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, - pj[3]->force.v_sig); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha); -#else -#error -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hi2_inv.v = hi_inv.v * hi_inv.v; - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hj2_inv.v = hj_inv.v * hj_inv.v; - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Get the time derivative for h. */ - pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v; - pjh_dt.v = mi.v / pirho.v * dvdr.v * wj_dr.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v; - - /* Compute viscosity parameter */ - alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v); - - /* Compute viscosity tensor */ - Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v); - Pi_ij.v *= (wi_dr.v + wj_dr.v); - - /* Thermal conductivity */ - v_sig_u.v = vec_sqrt(vec_set1(2.f * (const_hydro_gamma - 1.f)) * - vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) / - (pirho.v + pjrho.v)); - tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v); - tc.v *= (wi_dr.v + wj_dr.v); - - /* Get the common factor out. */ - w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) + - vec_set1(0.25f) * Pi_ij.v); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * w.v; - pia[k].v = mj.v * f.v; - pja[k].v = mi.v * f.v; - } - - /* Get the time derivative for u. */ - piu_dt.v = - mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v); - pju_dt.v = - mi.v * dvdr.v * (pjPOrho2.v * wj_dr.v + vec_set1(0.125f) * Pi_ij.v); - - /* Add the thermal conductivity */ - piu_dt.v += mj.v * tc.v * (piu.v - pju.v); - pju_dt.v += mi.v * tc.v * (pju.v - piu.v); - - /* compute the signal velocity (this is always symmetrical). */ - vi_sig.v = vec_fmax(vi_sig.v, v_sig.v); - vj_sig.v = vec_fmax(vj_sig.v, v_sig.v); - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->force.u_dt += piu_dt.f[k]; - pj[k]->force.u_dt += pju_dt.f[k]; - pi[k]->h_dt -= pih_dt.f[k]; - pj[k]->h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = vi_sig.f[k]; - pj[k]->force.v_sig = vj_sig.f[k]; - for (j = 0; j < 3; j++) { - pi[k]->a[j] -= pia[j].f[k]; - pj[k]->a[j] += pja[j].f[k]; - } - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - /** * @brief Force loop (non-symmetric version) */ @@ -681,294 +193,68 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2), ri = 1.0f / r; - float xi, xj; - float hi_inv, hi2_inv; - float hj_inv, hj2_inv; - float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; - float /*mi,*/ mj, POrho2i, POrho2j, rhoi, rhoj; - float v_sig, omega_ij, Pi_ij, alpha_ij, tc, v_sig_u; - float f; - int k; - - /* Get some values in local variables. */ - // mi = pi->mass; - mj = pj->mass; - rhoi = pi->rho; - rhoj = pj->rho; - POrho2i = pi->force.POrho2; - POrho2j = pj->force.POrho2; + const float r = sqrtf(r2); + const float r_inv = 1.0f / r; + + /* Recover some data */ + // const float mi = pi->mass; + const float mj = pj->mass; + const float rhoi = pi->rho; + const float rhoj = pj->rho; + const float pressurei = pi->force.pressure; + const float pressurej = pj->force.pressure; /* Get the kernel for hi. */ - hi_inv = 1.0f / hi; - hi2_inv = hi_inv * hi_inv; - xi = r * hi_inv; + const float hi_inv = 1.0f / hi; + const float hi2_inv = hi_inv * hi_inv; + const float xi = r * hi_inv; + float wi, wi_dx; kernel_deval(xi, &wi, &wi_dx); - wi_dr = hi2_inv * hi2_inv * wi_dx; + const float wi_dr = hi2_inv * hi2_inv * wi_dx; /* Get the kernel for hj. */ - hj_inv = 1.0f / hj; - hj2_inv = hj_inv * hj_inv; - xj = r * hj_inv; + const float hj_inv = 1.0f / hj; + const float hj2_inv = hj_inv * hj_inv; + const float xj = r * hj_inv; + float wj, wj_dx; kernel_deval(xj, &wj, &wj_dx); - wj_dr = hj2_inv * hj2_inv * wj_dx; + const float wj_dr = hj2_inv * hj2_inv * wj_dx; + + /* Compute gradient terms */ + const float P_over_rho_i = pressurei / (rhoi * rhoi) * pi->rho_dh; + const float P_over_rho_j = pressurej / (rhoj * rhoj) * pj->rho_dh; /* Compute dv dot r. */ - dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2]; - dvdr *= ri; + float dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + + (pi->v[2] - pj->v[2]) * dx[2]; + dvdr *= r_inv; /* Compute the relative velocity. (This is 0 if the particles move away from * each other and negative otherwise) */ - omega_ij = fminf(dvdr, 0.f); - - /* Compute signal velocity */ - v_sig = pi->force.c + pj->force.c - 2.0f * omega_ij; - - /* Compute viscosity parameter */ - alpha_ij = -0.5f * (pi->alpha + pj->alpha); + const float omega_ij = fminf(dvdr, 0.f); - /* Compute viscosity tensor */ - Pi_ij = alpha_ij * v_sig * omega_ij / (rhoi + rhoj); + /* Compute sound speeds */ + const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi); + const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj); + float v_sig = ci + cj + 3.f * omega_ij; - /* Apply balsara switch */ - Pi_ij *= (pi->force.balsara + pj->force.balsara); + /* SPH acceleration term */ + const float sph_term = + mj * (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; - /* Thermal conductivity */ - v_sig_u = sqrtf(2.f * (const_hydro_gamma - 1.f) * - fabs(rhoi * pi->u - rhoj * pj->u) / (rhoi + rhoj)); - tc = const_conductivity_alpha * v_sig_u / (rhoi + rhoj); - tc *= (wi_dr + wj_dr); - - /* Get the common factor out. */ - w = ri * - ((POrho2i * wi_dr + POrho2j * wj_dr) + 0.25f * Pi_ij * (wi_dr + wj_dr)); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f = dx[k] * w; - pi->a_hydro[k] -= mj * f; - } + /* Use the force Luke ! */ + pi->a_hydro[0] -= sph_term * dx[0]; + pi->a_hydro[1] -= sph_term * dx[1]; + pi->a_hydro[2] -= sph_term * dx[2]; /* Get the time derivative for u. */ - pi->force.u_dt += - mj * dvdr * (POrho2i * wi_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); - - /* Add the thermal conductivity */ - pi->force.u_dt += mj * tc * (pi->u - pj->u); + pi->u_dt += P_over_rho_i * mj * dvdr * wi_dr; /* Get the time derivative for h. */ - pi->h_dt -= mj * dvdr / rhoj * wi_dr; + pi->h_dt -= mj * dvdr * r_inv / rhoj * wi_dr; /* Update the signal velocity. */ pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); - pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); -} - -/** - * @brief Force loop (Vectorized non-symmetric version) - */ - -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef VECTORIZE - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hi2_inv, hj2_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector w; - vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju; - vector mj; - vector f; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3]; - vector piu_dt; - vector pih_dt; - vector ci, cj, v_sig, vi_sig, vj_sig; - vector omega_ij, Pi_ij, balsara; - vector pialpha, pjalpha, alpha_ij, v_sig_u, tc; - int j, k; - -/* Load stuff. */ -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = - vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, pi[2]->force.POrho2, - pi[3]->force.POrho2, pi[4]->force.POrho2, pi[5]->force.POrho2, - pi[6]->force.POrho2, pi[7]->force.POrho2); - pjPOrho2.v = - vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, pj[2]->force.POrho2, - pj[3]->force.POrho2, pj[4]->force.POrho2, pj[5]->force.POrho2, - pj[6]->force.POrho2, pj[7]->force.POrho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u, - pi[6]->u, pi[7]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, - pj[6]->u, pj[7]->u); - ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c, - pi[4]->force.c, pi[5]->force.c, pi[6]->force.c, pi[7]->force.c); - cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c, - pj[4]->force.c, pj[5]->force.c, pj[6]->force.c, pj[7]->force.c); - vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, - pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, - pi[6]->force.v_sig, pi[7]->force.v_sig); - vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, - pj[3]->force.v_sig, pj[4]->force.v_sig, pj[5]->force.v_sig, - pj[6]->force.v_sig, pj[7]->force.v_sig); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha, - pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha, - pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, - pi[2]->force.POrho2, pi[3]->force.POrho2); - pjPOrho2.v = vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, - pj[2]->force.POrho2, pj[3]->force.POrho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); - ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c); - cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c); - vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, - pi[3]->force.v_sig); - vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, - pj[3]->force.v_sig); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha); -#else -#error -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hi2_inv.v = hi_inv.v * hi_inv.v; - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hj2_inv.v = hj_inv.v * hj_inv.v; - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Get the time derivative for h. */ - pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v; - - /* Compute viscosity parameter */ - alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v); - - /* Compute viscosity tensor */ - Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v); - Pi_ij.v *= (wi_dr.v + wj_dr.v); - - /* Thermal conductivity */ - v_sig_u.v = vec_sqrt(vec_set1(2.f * (const_hydro_gamma - 1.f)) * - vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) / - (pirho.v + pjrho.v)); - tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v); - tc.v *= (wi_dr.v + wj_dr.v); - - /* Get the common factor out. */ - w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) + - vec_set1(0.25f) * Pi_ij.v); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * w.v; - pia[k].v = mj.v * f.v; - } - - /* Get the time derivative for u. */ - piu_dt.v = - mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v); - - /* Add the thermal conductivity */ - piu_dt.v += mj.v * tc.v * (piu.v - pju.v); - - /* compute the signal velocity (this is always symmetrical). */ - vi_sig.v = vec_fmax(vi_sig.v, v_sig.v); - vj_sig.v = vec_fmax(vj_sig.v, v_sig.v); - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->force.u_dt += piu_dt.f[k]; - pi[k]->h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = vi_sig.f[k]; - pj[k]->force.v_sig = vj_sig.f[k]; - for (j = 0; j < 3; j++) pi[k]->a[j] -= pia[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_nonsym_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif } #endif /* SWIFT_RUNNER_IACT_H */ diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index b84fc2700b353e1aadc597b6fa7fe1e30676a26d..eef9c3df3ee075e7153b8ed36ebb46c65a2e5435 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -26,9 +26,6 @@ struct xpart { /* Velocity at the last full step. */ float v_full[3]; - /* Old density. */ - float omega; - } __attribute__((aligned(xpart_align))); /* Data of a single particle. */ @@ -43,6 +40,9 @@ struct part { /* Particle acceleration. */ float a_hydro[3]; + /* Particle mass. */ + float mass; + /* Particle cutoff radius. */ float h; @@ -58,6 +58,9 @@ struct part { /* Particle internal energy. */ float u; + /* Thermal energy time derivative */ + float u_dt; + /* Particle density. */ float rho; @@ -65,50 +68,29 @@ struct part { */ float rho_dh; - /* Particle viscosity parameter */ - float alpha; - /* Store density/force specific stuff. */ - // union { - - struct { + union { - /* Particle velocity divergence. */ - float div_v; + struct { - /* Derivative of particle number density. */ - float wcount_dh; + /* Particle number density. */ + float wcount; - /* Particle velocity curl. */ - float curl_v[3]; + /* Derivative of particle number density. */ + float wcount_dh; - /* Particle number density. */ - float wcount; + } density; - } density; + struct { - struct { + /* Pressure */ + float pressure; - /* Balsara switch */ - float balsara; + /* Signal velocity */ + float v_sig; - /* Aggregate quantities. */ - float POrho2; - - /* Change in particle energy over time. */ - float u_dt; - - /* Signal velocity */ - float v_sig; - - /* Sound speed */ - float c; - - } force; - //}; - - /* Particle mass. */ - float mass; + } force; + }; /* Particle ID. */ unsigned long long id;