Commit aa89030c authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

several bug fixes.


Former-commit-id: 896c798930a15c2269b8d990aa54c7b3e10788fb
parent 0dea4d65
......@@ -176,7 +176,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
struct engine *e = (struct engine *)data;
float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt;
float dt_min, dt_max, h_max, dx, h2dx_max, dx_max;
float a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3];
float a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3], w;
double x[3], x_old[3];
struct part *restrict p;
struct xpart *restrict xp;
......@@ -235,12 +235,16 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
p->v[0] = v[0] += dt * a[0];
p->v[1] = v[1] += dt * a[1];
p->v[2] = v[2] += dt * a[2];
p->u = u += u_dt * dt;
// p->h = h += h_dt * dt;
{
float w = h_dt / h *dt;
p->h = h *= 1.0f + w*( -1.0f + w*( 0.5f + w*(-1.0f/6.0f + 1.0f/24.0*w ) ) );
}
w = u_dt / u * dt;
if ( w > 0.9f && w < 1.1f )
p->u = u *= 1.0f + w*( -1.0f + w*( 0.5f - w*1.0f/6.0f ) );
else
p->u = u *= expf( w );
w = h_dt / h * dt;
if ( w > 0.9f && w < 1.1f )
p->h = h *= 1.0f + w*( -1.0f + w*( 0.5f - w*1.0f/6.0f ) );
else
p->h = h *= expf( w );
h_max = fmaxf( h_max , h );
......@@ -421,7 +425,6 @@ void engine_step ( struct engine *e , int sort_queues ) {
float dt = e->dt, dt_step, dt_max = 0.0f, dt_min = FLT_MAX;
float epot = 0.0, ekin = 0.0, mom[3] = { 0.0 , 0.0 , 0.0 };
float ang[3] = { 0.0 , 0.0 , 0.0 };
// double lent, ent = 0.0;
int count = 0;
struct cell *c;
......@@ -518,6 +521,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
else {
if ( e->dt == 0 ) {
// e->nullstep += 1;
e->dt = e->dt_orig;
while ( dt_min < e->dt )
e->dt *= 0.5;
......@@ -540,7 +544,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
/* Set the system time. */
e->time = e->dt * e->step;
e->time = e->dt * (e->step - e->nullstep);
TIMER_TOC2(timer_step);
......@@ -573,6 +577,8 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
e->nr_queues = nr_queues;
e->policy = policy;
e->step = 0;
e->nullstep = 0;
e->time = 0.0;
/* First of all, init the barrier and lock it. */
if ( pthread_mutex_init( &e->barrier_mutex , NULL ) != 0 )
......
......@@ -61,7 +61,7 @@ struct engine {
float dt, dt_orig;
/* The current step number. */
int step;
int step, nullstep;
/* The current system time. */
float time;
......
......@@ -432,7 +432,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Balsara switch */
p->force.balsara = normCurl_v / ( normDiv_v + normCurl_v + 0.0001f * fc * ihg );
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
......
......@@ -362,16 +362,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
int k;
/* Get the kernel for hi. */
hi_inv = 1.0f / hi;
hi_inv = kernel_igamma / hi;
hi2_inv = hi_inv * hi_inv;
xi = r * hi_inv * kernel_igamma;
xi = r * hi_inv;
kernel_deval( xi , &wi , &wi_dx );
wi_dr = hi2_inv * hi2_inv * wi_dx;
/* Get the kernel for hj. */
hj_inv = 1.0f / hj;
hj_inv = kernel_igamma / hj;
hj2_inv = hj_inv * hj_inv;
xj = r * hj_inv * kernel_igamma;
xj = r * hj_inv;
kernel_deval( xj , &wj , &wj_dx );
wj_dr = hj2_inv * hj2_inv * wj_dx;
......@@ -389,7 +389,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
Pi_ij = -const_viscosity_alpha * v_sig * omega_ij / (pi->rho + pj->rho);
/* Apply balsara switch */
Pi_ij *= (pi->force.balsara + pj->force.balsara);
Pi_ij *= ( pi->force.balsara + pj->force.balsara );
/* Get the common factor out. */
w = ri * ( ( pi->force.POrho2 * wi_dr + pj->force.POrho2 * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
......@@ -434,8 +434,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hig_inv, hi2_inv;
vector hjg_inv, hj2_inv;
vector hi2_inv, hj2_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho;
......@@ -500,22 +499,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
r.v = r2.v * ri.v;
/* Get the kernel for hi. */
hi.v = vec_load( Hi );
hi.v = vec_set1( kernel_gamma ) * vec_load( Hi );
hi_inv.v = vec_rcp( hi.v );
hi_inv.v = hi_inv.v - hi_inv.v * ( hi.v * hi_inv.v - vec_set1( 1.0f ) );
hig_inv.v = vec_set1( kernel_igamma ) * hi_inv.v;
hi2_inv.v = hi_inv.v * hi_inv.v;
xi.v = r.v * hig_inv.v;
xi.v = r.v * hi_inv.v;
kernel_deval_vec( &xi , &wi , &wi_dx );
wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load( Hj );
hj.v = vec_set1( kernel_gamma ) * vec_load( Hj );
hj_inv.v = vec_rcp( hj.v );
hj_inv.v = hj_inv.v - hj_inv.v * ( hj.v * hj_inv.v - vec_set1( 1.0f ) );
hjg_inv.v = vec_set1( kernel_igamma ) * hj_inv.v;
hj2_inv.v = hj_inv.v * hj_inv.v;
xj.v = r.v * hjg_inv.v;
xj.v = r.v * hj_inv.v;
kernel_deval_vec( &xj , &wj , &wj_dx );
wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v;
......@@ -595,16 +592,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
int k;
/* Get the kernel for hi. */
hi_inv = 1.0f / hi;
hi_inv = kernel_igamma / hi;
hi2_inv = hi_inv * hi_inv;
xi = r * hi_inv * kernel_igamma;
xi = r * hi_inv;
kernel_deval( xi , &wi , &wi_dx );
wi_dr = hi2_inv * hi2_inv * wi_dx;
/* Get the kernel for hj. */
hj_inv = 1.0f / hj;
hj_inv = kernel_igamma / hj;
hj2_inv = hj_inv * hj_inv;
xj = r * hj_inv * kernel_igamma;
xj = r * hj_inv;
kernel_deval( xj , &wj , &wj_dx );
wj_dr = hj2_inv * hj2_inv * wj_dx;
......@@ -664,8 +661,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
vector r, r2, ri;
vector xi, xj;
vector hi, hj, hi_inv, hj_inv;
vector hig_inv, hi2_inv;
vector hjg_inv, hj2_inv;
vector hi2_inv, hj2_inv;
vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
vector w;
vector piPOrho2, pjPOrho2, pirho, pjrho;
......@@ -728,22 +724,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
r.v = r2.v * ri.v;
/* Get the kernel for hi. */
hi.v = vec_load( Hi );
hi.v = vec_set1( kernel_gamma ) * vec_load( Hi );
hi_inv.v = vec_rcp( hi.v );
hi_inv.v = hi_inv.v - hi_inv.v * ( hi.v * hi_inv.v - vec_set1( 1.0f ) );
hig_inv.v = vec_set1( kernel_igamma ) * hi_inv.v;
hi2_inv.v = hi_inv.v * hi_inv.v;
xi.v = r.v * hig_inv.v;
xi.v = r.v * hi_inv.v;
kernel_deval_vec( &xi , &wi , &wi_dx );
wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v;
/* Get the kernel for hj. */
hj.v = vec_load( Hj );
hj.v = vec_set1( kernel_gamma ) * vec_load( Hj );
hj_inv.v = vec_rcp( hj.v );
hj_inv.v = hj_inv.v - hj_inv.v * ( hj.v * hj_inv.v - vec_set1( 1.0f ) );
hjg_inv.v = vec_set1( kernel_igamma ) * hj_inv.v;
hj2_inv.v = hj_inv.v * hj_inv.v;
xj.v = r.v * hjg_inv.v;
xj.v = r.v * hj_inv.v;
kernel_deval_vec( &xj , &wj , &wj_dx );
wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v;
......
......@@ -27,7 +27,7 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */
#ifdef NO__AVX__
#ifdef __AVX__
#define VECTORIZE
#define VEC_SIZE 8
#define VEC_FLOAT __m256
......@@ -54,7 +54,7 @@
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
#elif defined( NO__SSE2__ )
#elif defined( __SSE2__ )
#define VECTORIZE
#define VEC_SIZE 4
#define VEC_FLOAT __m128
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment