Skip to content
Snippets Groups Projects
Commit 036faeff authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

First iteration of the "improved SPH" implementation. The code now uses a...

First iteration of the "improved SPH" implementation. The code now uses a (Rosswog et al, 2000) viscosity term with a (Balsara , 1995) switch and the parameters given by (Price, 2004). 
Thermal conductivity is implemented following (Price, 2008) without the switch.
Both scalar and vectorized versions have been implemented. 
The code now gives a much better answer on the Sod shock. More testing will be required on more advanced tests to tune the parameters.

When "LEGACY_GADGET2_SPH" is defined, the code defaults to the old gadget-2 version of SPH.




Former-commit-id: 840aa8ecfea0a60816f64b8e5818162f2f9006d2
parent 0da1f81f
Branches
Tags
No related merge requests found
......@@ -20,8 +20,16 @@
/* Hydrodynamical constants. */
#define const_hydro_gamma (5.0f/3.0f)
#define const_viscosity_alpha 0.8f
#define const_hydro_gamma (5.0f/3.0f)
/* SPH Viscosity constants. */
#define const_viscosity_alpha 0.8f /* Used in the legacy gadget-2 SPH mode only */
#define const_viscosity_alpha_min 0.1f /* Values taken from (Price,2004), not used in legacy gadget mode */
#define const_viscosity_alpha_max 2.0f /* Values taken from (Price,2004), not used in legacy gadget mode */
#define const_viscosity_length 0.1f /* Values taken from (Price,2004), not used in legacy gadget mode */
/* SPH Thermal conductivity constants. */
#define const_conductivity_alpha 1.f /* Value taken from (Price,2008), not used in legacy gadget mode */
/* Time integration constants. */
#define const_cfl 0.3f
......@@ -32,3 +40,6 @@
#define const_eta_kernel 1.2349f /* Corresponds to 48 ngbs with the cubic spline kernel */
#define const_delta_nwneigh 1.f
#define CUBIC_SPLINE_KERNEL
/* SPH variant to use */
#define LEGACY_GADGET2_SPH
......@@ -48,9 +48,15 @@
#include "scheduler.h"
#include "engine.h"
#include "runner.h"
#include "runner_iact.h"
#include "error.h"
#ifdef LEGACY_GADGET2_SPH
#include "runner_iact_legacy.h"
#else
#include "runner_iact.h"
#endif
/* Convert cell location to ID. */
#define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) )
......
......@@ -738,7 +738,22 @@ void write_output (struct engine *e)
writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh);
writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh);
writeAttribute_f(h_grpsph, "Hydro gamma", const_hydro_gamma);
#ifdef LEGACY_GADGET2_SPH
writeAttribute_s(h_grpsph, "Thermal Conductivity Model", "(No treatment) Legacy Gadget-2 as in Springel (2005)");
writeAttribute_s(h_grpsph, "Viscosity Model", "Legacy Gadget-2 as in Springel (2005)");
writeAttribute_f(h_grpsph, "Viscosity alpha", const_viscosity_alpha);
writeAttribute_f(h_grpsph, "Viscosity beta", 3.f);
#else
writeAttribute_s(h_grpsph, "Thermal Conductivity Model", "Price (2008) without switch");
writeAttribute_f(h_grpsph, "Thermal Conductivity alpha", const_conductivity_alpha);
writeAttribute_s(h_grpsph, "Viscosity Model", "Morris & Monaghan (1997), Rosswog, Davies, Thielemann & Piran (2000) with additional Balsara (1995) switch");
writeAttribute_f(h_grpsph, "Viscosity alpha_min", const_viscosity_alpha_min);
writeAttribute_f(h_grpsph, "Viscosity alpha_max", const_viscosity_alpha_max);
writeAttribute_f(h_grpsph, "Viscosity beta", 2.f);
writeAttribute_f(h_grpsph, "Viscosity decay length", const_viscosity_length);
#endif
writeAttribute_f(h_grpsph, "CFL parameter", const_cfl);
writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt", const_ln_max_h_change);
writeAttribute_f(h_grpsph, "Maximal Delta h change over dt", exp(const_ln_max_h_change));
......
......@@ -72,7 +72,12 @@ struct part {
/* Derivative of the density with respect to this particle's smoothing length. */
float rho_dh;
#ifndef LEGACY_GADGET2_SPH
/* Particle viscosity parameter */
float alpha;
#endif
/* Store density/force specific stuff. */
union {
......
......@@ -44,9 +44,15 @@
#include "scheduler.h"
#include "engine.h"
#include "runner.h"
#include "runner_iact.h"
#include "error.h"
/* Include the right variant of the SPH interactions */
#ifdef LEGACY_GADGET2_SPH
#include "runner_iact_legacy.h"
#else
#include "runner_iact.h"
#endif
/* Convert cell location to ID. */
#define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) )
......@@ -336,6 +342,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
int *pid;
float h, ih, ih2, ih4, h_corr, rho, wcount, rho_dh, wcount_dh, u, fc;
float normDiv_v, normCurl_v;
#ifndef LEGACY_GADGET2_SPH
float alpha_dot, tau, S;
#endif
float dt_step = r->e->dt_step;
TIMER_TIC
......@@ -376,13 +385,13 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Is this part within the timestep? */
if ( p->dt <= dt_step ) {
/* Some smoothing length multiples. */
h = p->h;
/* Some smoothing length multiples. */
h = p->h;
ih = 1.0f / h;
ih2 = ih * ih;
ih4 = ih2 * ih2;
/* Final operation on the density. */
/* Final operation on the density. */
p->rho = rho = ih * ih2 * ( p->rho + p->mass*kernel_root );
p->rho_dh = rho_dh = ( p->rho_dh - 3.0f*p->mass*kernel_root ) * ih4;
wcount = ( p->density.wcount + kernel_root ) * ( 4.0f / 3.0 * M_PI * kernel_gamma3 );
......@@ -416,15 +425,15 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
p->density.wcount_dh = 0.0;
p->rho = 0.0;
p->rho_dh = 0.0;
p->density.div_v = 0.0;
for ( k=0 ; k < 3 ; k++)
p->density.curl_v[k] = 0.0;
p->density.div_v = 0.0;
for ( k=0 ; k < 3 ; k++)
p->density.curl_v[k] = 0.0;
continue;
}
/* Pre-compute some stuff for the balsara switch. */
normDiv_v = fabs( p->density.div_v / rho * ih4 );
normCurl_v = sqrtf( p->density.curl_v[0] * p->density.curl_v[0] + p->density.curl_v[1] * p->density.curl_v[1] + p->density.curl_v[2] * p->density.curl_v[2] ) / rho * ih4;
normDiv_v = fabs( p->density.div_v / rho * ih4 );
normCurl_v = sqrtf( p->density.curl_v[0] * p->density.curl_v[0] + p->density.curl_v[1] * p->density.curl_v[1] + p->density.curl_v[2] * p->density.curl_v[2] ) / rho * ih4;
/* As of here, particle force variables will be set. Do _NOT_
try to read any particle density variables! */
......@@ -436,9 +445,24 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Compute the P/Omega/rho2. */
p->force.POrho2 = u * ( const_hydro_gamma - 1.0f ) / ( rho + h * rho_dh / 3.0f );
/* Balsara switch */
p->force.balsara = normDiv_v / ( normDiv_v + normCurl_v + 0.0001f * fc * ih );
/* Balsara switch */
p->force.balsara = normDiv_v / ( normDiv_v + normCurl_v + 0.0001f * fc * ih );
#ifndef LEGACY_GADGET2_SPH
/* Viscosity parameter decay time */
tau = h / ( 2.f * const_viscosity_length * p->force.c );
/* Viscosity source term */
S = fmaxf( -normDiv_v, 0.f );
/* Compute the particle's viscosity parameter time derivative */
alpha_dot = ( const_viscosity_alpha_min - p->alpha ) / tau + ( const_viscosity_alpha_max - p->alpha ) * S;
/* Update particle's viscosity paramter */
p->alpha += alpha_dot * p->dt;
#endif
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
......
......@@ -359,7 +359,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
float hj_inv, hj2_inv;
float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
float mi, mj, POrho2i, POrho2j, rhoi, rhoj;
float v_sig, omega_ij, Pi_ij;
float v_sig, omega_ij, Pi_ij, alpha_ij, tc, v_sig_u;
// float dt_max;
float f;
int k;
......@@ -392,14 +392,22 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
omega_ij = fminf( dvdr , 0.f );
/* Compute signal velocity */
v_sig = pi->force.c + pj->force.c - 3.0f*omega_ij;
v_sig = pi->force.c + pj->force.c - 2.0f*omega_ij;
/* Compute viscosity parameter */
alpha_ij = -0.5f * ( pi->alpha + pj->alpha );
/* Compute viscosity tensor */
Pi_ij = -const_viscosity_alpha * v_sig * omega_ij / ( rhoi + rhoj );
Pi_ij = alpha_ij * v_sig * omega_ij / ( rhoi + rhoj );
/* Apply balsara switch */
Pi_ij *= ( pi->force.balsara + pj->force.balsara );
/* Termal conductivity */
v_sig_u = sqrtf( 2.f * ( const_hydro_gamma - 1.f ) * fabs( rhoi * pi->u - rhoj * pj->u ) / ( rhoi + rhoj ) );
tc = const_conductivity_alpha * v_sig_u / ( rhoi + rhoj );
tc *= ( wi_dr + wj_dr );
/* Get the common factor out. */
w = ri * ( ( POrho2i * wi_dr + POrho2j * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
......@@ -409,10 +417,14 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
pi->a[k] -= mj * f;
pj->a[k] += mi * f;
}
/* Get the time derivative for u. */
pi->force.u_dt += mj * dvdr * ( POrho2i * wi_dr + 0.125f * Pi_ij * ( wi_dr + wj_dr ) );
pi->force.u_dt += mj * dvdr * ( POrho2i * wi_dr + 0.125f * Pi_ij * ( wi_dr + wj_dr ) );
pj->force.u_dt += mi * dvdr * ( POrho2j * wj_dr + 0.125f * Pi_ij * ( wi_dr + wj_dr ) );
/* Add the thermal conductivity */
pi->force.u_dt += mj * tc * ( pi->u - pj->u );
pj->force.u_dt += mi * tc * ( pj->u - pi->u );
/* Get the time derivative for h. */
pi->force.h_dt -= mj * dvdr / rhoj * wi_dr;
......@@ -439,7 +451,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
vector hi2_inv, hj2_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho;
vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju;
vector mi, mj;
vector f;
vector dx[3];
......@@ -449,6 +461,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
vector pih_dt, pjh_dt;
vector ci, cj, v_sig, vi_sig, vj_sig;
vector omega_ij, Pi_ij, balsara;
vector pialpha, pjalpha, alpha_ij, v_sig_u, tc;
int j, k;
/* Load stuff. */
......@@ -459,6 +472,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
pjPOrho2.v = vec_set( pj[0]->force.POrho2 , pj[1]->force.POrho2 , pj[2]->force.POrho2 , pj[3]->force.POrho2 , pj[4]->force.POrho2 , pj[5]->force.POrho2 , pj[6]->force.POrho2 , pj[7]->force.POrho2 );
pirho.v = vec_set( pi[0]->rho , pi[1]->rho , pi[2]->rho , pi[3]->rho , pi[4]->rho , pi[5]->rho , pi[6]->rho , pi[7]->rho );
pjrho.v = vec_set( pj[0]->rho , pj[1]->rho , pj[2]->rho , pj[3]->rho , pj[4]->rho , pj[5]->rho , pj[6]->rho , pj[7]->rho );
piu.v = vec_set( pi[0]->u , pi[1]->u , pi[2]->u , pi[3]->u , pi[4]->u , pi[5]->u , pi[6]->u , pi[7]->u );
pju.v = vec_set( pj[0]->u , pj[1]->u , pj[2]->u , pj[3]->u , pj[4]->u , pj[5]->u , pj[6]->u , pj[7]->u );
ci.v = vec_set( pi[0]->force.c , pi[1]->force.c , pi[2]->force.c , pi[3]->force.c , pi[4]->force.c , pi[5]->force.c , pi[6]->force.c , pi[7]->force.c );
cj.v = vec_set( pj[0]->force.c , pj[1]->force.c , pj[2]->force.c , pj[3]->force.c , pj[4]->force.c , pj[5]->force.c , pj[6]->force.c , pj[7]->force.c );
vi_sig.v = vec_set( pi[0]->force.v_sig , pi[1]->force.v_sig , pi[2]->force.v_sig , pi[3]->force.v_sig , pi[4]->force.v_sig , pi[5]->force.v_sig , pi[6]->force.v_sig , pi[7]->force.v_sig );
......@@ -469,8 +484,10 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
}
for ( k = 0 ; k < 3 ; k++ )
dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] , Dx[12+k] , Dx[15+k] , Dx[18+k] , Dx[21+k] );
balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara , pi[4]->force.balsara , pi[5]->force.balsara , pi[6]->force.balsara , pi[7]->force.balsara ) +
vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara , pj[4]->force.balsara , pj[5]->force.balsara , pj[6]->force.balsara , pj[7]->force.balsara );
balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara , pi[4]->force.balsara , pi[5]->force.balsara , pi[6]->force.balsara , pi[7]->force.balsara ) +
vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara , pj[4]->force.balsara , pj[5]->force.balsara , pj[6]->force.balsara , pj[7]->force.balsara );
pialpha.v = vec_set( pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha, pi[4]->alpha, pi[5]->alpha , pi[6]->alpha, pi[7]->alpha );
pjalpha.v = vec_set( pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha, pj[4]->alpha, pj[5]->alpha , pj[6]->alpha, pj[7]->alpha );
#elif VEC_SIZE==4
mi.v = vec_set( pi[0]->mass , pi[1]->mass , pi[2]->mass , pi[3]->mass );
mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass );
......@@ -478,6 +495,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
pjPOrho2.v = vec_set( pj[0]->force.POrho2 , pj[1]->force.POrho2 , pj[2]->force.POrho2 , pj[3]->force.POrho2 );
pirho.v = vec_set( pi[0]->rho , pi[1]->rho , pi[2]->rho , pi[3]->rho );
pjrho.v = vec_set( pj[0]->rho , pj[1]->rho , pj[2]->rho , pj[3]->rho );
piu.v = vec_set( pi[0]->u , pi[1]->u , pi[2]->u , pi[3]->u );
pju.v = vec_set( pj[0]->u , pj[1]->u , pj[2]->u , pj[3]->u );
ci.v = vec_set( pi[0]->force.c , pi[1]->force.c , pi[2]->force.c , pi[3]->force.c );
cj.v = vec_set( pj[0]->force.c , pj[1]->force.c , pj[2]->force.c , pj[3]->force.c );
vi_sig.v = vec_set( pi[0]->force.v_sig , pi[1]->force.v_sig , pi[2]->force.v_sig , pi[3]->force.v_sig );
......@@ -490,6 +509,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] );
balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara ) +
vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara );
pialpha.v = vec_set( pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha );
pjalpha.v = vec_set( pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha );
#else
#error
#endif
......@@ -530,12 +551,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
omega_ij.v = vec_fmin( dvdr.v , vec_set1( 0.0f ) );
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1( 3.0f )*omega_ij.v;
v_sig.v = ci.v + cj.v - vec_set1( 2.0f )*omega_ij.v;
/* Compute viscosity parameter */
alpha_ij.v = vec_set1(-0.5f) * ( pialpha.v + pjalpha.v );
/* Compute viscosity tensor */
Pi_ij.v = -balsara.v * vec_set1( const_viscosity_alpha ) * v_sig.v * omega_ij.v / (pirho.v + pjrho.v);
Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v);
Pi_ij.v *= ( wi_dr.v + wj_dr.v );
/* Termal conductivity */
v_sig_u.v = vec_sqrt( vec_set1( 2.f * ( const_hydro_gamma - 1.f ) ) * vec_fabs( pirho.v * piu.v - pjrho.v * pju.v ) / ( pirho.v + pjrho.v ) );
tc.v = vec_set1( const_conductivity_alpha ) * v_sig_u.v / ( pirho.v + pjrho.v );
tc.v *= ( wi_dr.v + wj_dr.v );
/* Get the common factor out. */
w.v = ri.v * ( ( piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v ) + vec_set1( 0.25f ) * Pi_ij.v );
......@@ -549,6 +578,10 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
/* Get the time derivative for u. */
piu_dt.v = mj.v * dvdr.v * ( piPOrho2.v * wi_dr.v + vec_set1( 0.125f ) * Pi_ij.v );
pju_dt.v = mi.v * dvdr.v * ( pjPOrho2.v * wj_dr.v + vec_set1( 0.125f ) * Pi_ij.v );
/* Add the thermal conductivity */
piu_dt.v += mj.v * tc.v * ( piu.v - pju.v );
pju_dt.v += mi.v * tc.v * ( pju.v - piu.v );
/* compute the signal velocity (this is always symmetrical). */
vi_sig.v = vec_fmax( vi_sig.v , v_sig.v );
......@@ -590,7 +623,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
float hj_inv, hj2_inv;
float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
float /*mi,*/ mj, POrho2i, POrho2j, rhoi, rhoj;
float v_sig, omega_ij, Pi_ij;
float v_sig, omega_ij, Pi_ij, alpha_ij, tc, v_sig_u;
// float dt_max;
float f;
int k;
......@@ -624,14 +657,22 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
omega_ij = fminf( dvdr , 0.f );
/* Compute signal velocity */
v_sig = pi->force.c + pj->force.c - 3.0f*omega_ij;
v_sig = pi->force.c + pj->force.c - 2.0f*omega_ij;
/* Compute viscosity parameter */
alpha_ij = -0.5f * ( pi->alpha + pj->alpha );
/* Compute viscosity tensor */
Pi_ij = -const_viscosity_alpha * v_sig * omega_ij / ( rhoi + rhoj );
Pi_ij = alpha_ij * v_sig * omega_ij / ( rhoi + rhoj );
/* Apply balsara switch */
Pi_ij *= ( pi->force.balsara + pj->force.balsara );
/* Termal conductivity */
v_sig_u = sqrtf( 2.f * ( const_hydro_gamma - 1.f ) * fabs( rhoi * pi->u - rhoj * pj->u ) / ( rhoi + rhoj ) );
tc = const_conductivity_alpha * v_sig_u / ( rhoi + rhoj );
tc *= ( wi_dr + wj_dr );
/* Get the common factor out. */
w = ri * ( ( POrho2i * wi_dr + POrho2j * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
......@@ -644,6 +685,9 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
/* Get the time derivative for u. */
pi->force.u_dt += mj * dvdr * ( POrho2i * wi_dr + 0.125f * Pi_ij * ( wi_dr + wj_dr ) );
/* Add the thermal conductivity */
pi->force.u_dt += mj * tc * ( pi->u - pj->u );
/* Get the time derivative for h. */
pi->force.h_dt -= mj * dvdr / rhoj * wi_dr;
......@@ -668,7 +712,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
vector hi2_inv, hj2_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho;
vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju;
vector mj;
vector f;
vector dx[3];
......@@ -678,6 +722,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
vector pih_dt;
vector ci, cj, v_sig, vi_sig, vj_sig;
vector omega_ij, Pi_ij, balsara;
vector pialpha, pjalpha, alpha_ij, v_sig_u, tc;
int j, k;
/* Load stuff. */
......@@ -687,6 +732,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
pjPOrho2.v = vec_set( pj[0]->force.POrho2 , pj[1]->force.POrho2 , pj[2]->force.POrho2 , pj[3]->force.POrho2 , pj[4]->force.POrho2 , pj[5]->force.POrho2 , pj[6]->force.POrho2 , pj[7]->force.POrho2 );
pirho.v = vec_set( pi[0]->rho , pi[1]->rho , pi[2]->rho , pi[3]->rho , pi[4]->rho , pi[5]->rho , pi[6]->rho , pi[7]->rho );
pjrho.v = vec_set( pj[0]->rho , pj[1]->rho , pj[2]->rho , pj[3]->rho , pj[4]->rho , pj[5]->rho , pj[6]->rho , pj[7]->rho );
piu.v = vec_set( pi[0]->u , pi[1]->u , pi[2]->u , pi[3]->u , pi[4]->u , pi[5]->u , pi[6]->u , pi[7]->u );
pju.v = vec_set( pj[0]->u , pj[1]->u , pj[2]->u , pj[3]->u , pj[4]->u , pj[5]->u , pj[6]->u , pj[7]->u );
ci.v = vec_set( pi[0]->force.c , pi[1]->force.c , pi[2]->force.c , pi[3]->force.c , pi[4]->force.c , pi[5]->force.c , pi[6]->force.c , pi[7]->force.c );
cj.v = vec_set( pj[0]->force.c , pj[1]->force.c , pj[2]->force.c , pj[3]->force.c , pj[4]->force.c , pj[5]->force.c , pj[6]->force.c , pj[7]->force.c );
vi_sig.v = vec_set( pi[0]->force.v_sig , pi[1]->force.v_sig , pi[2]->force.v_sig , pi[3]->force.v_sig , pi[4]->force.v_sig , pi[5]->force.v_sig , pi[6]->force.v_sig , pi[7]->force.v_sig );
......@@ -699,12 +746,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] , Dx[12+k] , Dx[15+k] , Dx[18+k] , Dx[21+k] );
balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara , pi[4]->force.balsara , pi[5]->force.balsara , pi[6]->force.balsara , pi[7]->force.balsara ) +
vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara , pj[4]->force.balsara , pj[5]->force.balsara , pj[6]->force.balsara , pj[7]->force.balsara );
pialpha.v = vec_set( pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha, pi[4]->alpha, pi[5]->alpha , pi[6]->alpha, pi[7]->alpha );
pjalpha.v = vec_set( pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha, pj[4]->alpha, pj[5]->alpha , pj[6]->alpha, pj[7]->alpha );
#elif VEC_SIZE==4
mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass );
piPOrho2.v = vec_set( pi[0]->force.POrho2 , pi[1]->force.POrho2 , pi[2]->force.POrho2 , pi[3]->force.POrho2 );
pjPOrho2.v = vec_set( pj[0]->force.POrho2 , pj[1]->force.POrho2 , pj[2]->force.POrho2 , pj[3]->force.POrho2 );
pirho.v = vec_set( pi[0]->rho , pi[1]->rho , pi[2]->rho , pi[3]->rho );
pjrho.v = vec_set( pj[0]->rho , pj[1]->rho , pj[2]->rho , pj[3]->rho );
piu.v = vec_set( pi[0]->u , pi[1]->u , pi[2]->u , pi[3]->u );
pju.v = vec_set( pj[0]->u , pj[1]->u , pj[2]->u , pj[3]->u );
ci.v = vec_set( pi[0]->force.c , pi[1]->force.c , pi[2]->force.c , pi[3]->force.c );
cj.v = vec_set( pj[0]->force.c , pj[1]->force.c , pj[2]->force.c , pj[3]->force.c );
vi_sig.v = vec_set( pi[0]->force.v_sig , pi[1]->force.v_sig , pi[2]->force.v_sig , pi[3]->force.v_sig );
......@@ -717,6 +768,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] );
balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara ) +
vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara );
pialpha.v = vec_set( pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha );
pjalpha.v = vec_set( pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha );
#else
#error
#endif
......@@ -756,12 +809,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
omega_ij.v = vec_fmin( dvdr.v , vec_set1( 0.0f ) );
/* Compute signal velocity */
v_sig.v = ci.v + cj.v - vec_set1( 3.0f )*omega_ij.v;
v_sig.v = ci.v + cj.v - vec_set1( 2.0f )*omega_ij.v;
/* Compute viscosity parameter */
alpha_ij.v = vec_set1(-0.5f) * ( pialpha.v + pjalpha.v );
/* Compute viscosity tensor */
Pi_ij.v = -balsara.v * vec_set1( const_viscosity_alpha ) * v_sig.v * omega_ij.v / (pirho.v + pjrho.v);
Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v);
Pi_ij.v *= ( wi_dr.v + wj_dr.v );
/* Termal conductivity */
v_sig_u.v = vec_sqrt( vec_set1( 2.f * ( const_hydro_gamma - 1.f ) ) * vec_fabs( pirho.v * piu.v - pjrho.v * pju.v ) / ( pirho.v + pjrho.v ) );
tc.v = vec_set1( const_conductivity_alpha ) * v_sig_u.v / ( pirho.v + pjrho.v );
tc.v *= ( wi_dr.v + wj_dr.v );
/* Get the common factor out. */
w.v = ri.v * ( ( piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v ) + vec_set1( 0.25f ) * Pi_ij.v );
......@@ -773,6 +834,9 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
/* Get the time derivative for u. */
piu_dt.v = mj.v * dvdr.v * ( piPOrho2.v * wi_dr.v + vec_set1( 0.125f ) * Pi_ij.v );
/* Add the thermal conductivity */
piu_dt.v += mj.v * tc.v * ( piu.v - pju.v );
/* compute the signal velocity (this is always symmetrical). */
vi_sig.v = vec_fmax( vi_sig.v , v_sig.v );
......
This diff is collapsed.
......@@ -33,7 +33,12 @@
#include "space.h"
#include "queue.h"
#include "runner.h"
#include "runner_iact.h"
#include "engine.h"
#include "io.h"
#include "debug.h"
#ifdef LEGACY_GADGET2_SPH
#include "runner_iact_legacy.h"
#else
#include "runner_iact.h"
#endif
......@@ -43,6 +43,7 @@
#define vec_ftoi(a) _mm256_cvttps_epi32(a)
#define vec_fmin(a,b) _mm256_min_ps(a,b)
#define vec_fmax(a,b) _mm256_max_ps(a,b)
#define vec_fabs(a) _mm256_andnot_ps(_mm256_set1_ps(-0.f), a)
#define vec_todbl_lo(a) _mm256_cvtps_pd(_mm256_extract128_ps(a,0))
#define vec_todbl_hi(a) _mm256_cvtps_pd(_mm256_extract128_ps(a,1))
#define vec_dbl_tofloat(a,b) _mm256_insertf128( _mm256_castps128_ps256(a) , b , 1 )
......@@ -54,7 +55,7 @@
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
#elif defined( NO__SSE2__ )
#elif defined( __SSE2__ )
#define VECTORIZE
#define VEC_SIZE 4
#define VEC_FLOAT __m128
......@@ -70,6 +71,7 @@
#define vec_ftoi(a) _mm_cvttps_epi32(a)
#define vec_fmin(a,b) _mm_min_ps(a,b)
#define vec_fmax(a,b) _mm_max_ps(a,b)
#define vec_fabs(a) _mm_andnot_ps(_mm_set1_ps(-0.f), a)
#define vec_todbl_lo(a) _mm_cvtps_pd(a)
#define vec_todbl_hi(a) _mm_cvtps_pd(_mm_movehl_ps(a,a))
#define vec_dbl_tofloat(a,b) _mm_movelh_ps( _mm_cvtpd_ps(a) , _mm_cvtpd_ps(b) )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment