diff --git a/src/engine.c b/src/engine.c index da540a5ed656f4ac393933042fc1cf99a2d6da54..11a56b450b85826474d0123895e3eaccd1179234 100644 --- a/src/engine.c +++ b/src/engine.c @@ -431,8 +431,8 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f; /* Loop over all particle pairs (force). */ - // for ( k = 0 ; k < N ; k++ ) { - for ( k = N-1 ; k >= 0 ; k-- ) { + for ( k = 0 ; k < N ; k++ ) { + // for ( k = N-1 ; k >= 0 ; k-- ) { if ( parts[k].id == p.id ) continue; for ( i = 0 ; i < 3 ; i++ ) { @@ -447,13 +447,7 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest } r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2]; if ( r2 < p.h*p.h*kernel_gamma2 || r2 < parts[k].h*parts[k].h*kernel_gamma2 ) { - p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f; - p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f; runner_iact_nonsym_force( r2 , fdx , p.h , parts[k].h , &p , &parts[k] ); - /* printf( "pairs_simple: interacting particles %lli and %lli (rho=%.3e,r=%e): a=[%.3e,%.3e,%.3e], u_dt=%.3e, h_dt=%.3e, v_sig=%.3e.\n" , - pid , p2.id , p2.rho , sqrtf(r2) , - p.a[0] , p.a[1] , p.a[2] , - p.force.u_dt , p.force.h_dt , p.force.v_sig ); */ } } @@ -520,7 +514,7 @@ void engine_step ( struct engine *e , int sort_queues ) { } } - // engine_single_density( e->s->dim , 6178 , e->s->parts , e->s->nr_parts , e->s->periodic ); + // engine_single_density( e->s->dim , 494748 , e->s->parts , e->s->nr_parts , e->s->periodic ); /* Start the clock. */ TIMER_TIC_ND @@ -538,12 +532,12 @@ void engine_step ( struct engine *e , int sort_queues ) { /* Stop the clock. */ TIMER_TOC(timer_runners); - // engine_single_force( e->s->dim , 6178 , e->s->parts , e->s->nr_parts , e->s->periodic ); + // engine_single_force( e->s->dim , 494748 , e->s->parts , e->s->nr_parts , e->s->periodic ); // for(k=0; k<10; ++k) // printParticle(parts, k); // printParticle( parts , 432626 ); - // printParticle( e->s->parts , 6178 , e->s->nr_parts ); + // printParticle( e->s->parts , 494748 , e->s->nr_parts ); /* Collect the cell data from the second kick. */ for ( k = 0 ; k < e->s->nr_cells ; k++ ) { diff --git a/src/kernel.h b/src/kernel.h index 5996e182611ecaf985e3b7f05fad1d7e4bd1bc29..8c856b23b9bf91406be51e036bcf9521d1ea3327 100644 --- a/src/kernel.h +++ b/src/kernel.h @@ -35,6 +35,7 @@ #define kernel_gamma 2.0f #define kernel_gamma2 4.0f #define kernel_gamma3 8.0f +#define kernel_igamma 0.5f static float kernel_coeffs[ (kernel_degree + 1) * (kernel_ivals + 1) ] __attribute__ ((aligned (16))) = { 3.0/4.0*M_1_PI , -3.0/2.0*M_1_PI , 0.0 , M_1_PI , -0.25*M_1_PI , 3.0/2.0*M_1_PI , -3.0*M_1_PI , M_2_PI , diff --git a/src/runner_iact.h b/src/runner_iact.h index ab0887be2674accafc510c814cfd833d09118bf1..9d6c68a5b9942a12112ea8866e59874278845e0a 100644 --- a/src/runner_iact.h +++ b/src/runner_iact.h @@ -229,44 +229,40 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density ( float dv[3], curlvr[3]; int k; - if ( r2 < hi*hi*kernel_gamma2 ) { - - /* Get the masses. */ - mj = pj->mass; - - /* Get r and r inverse. */ - r = sqrtf( r2 ); - ri = 1.0f / r; - - /* Compute dv dot r */ - dv[0] = pi->v[0] - pj->v[0]; - dv[1] = pi->v[1] - pj->v[1]; - dv[2] = pi->v[2] - pj->v[2]; - dvdr = dv[0]*dx[0] + dv[1]*dx[1] + dv[2]*dx[2]; - dvdr *= ri; - - /* Compute dv cross r */ - curlvr[0] = dv[1]*dx[2] - dv[2]*dx[1]; - curlvr[1] = dv[2]*dx[0] - dv[0]*dx[2]; - curlvr[2] = dv[0]*dx[1] - dv[1]*dx[0]; - for ( k = 0 ; k < 3 ; k++ ) - curlvr[k] *= ri; - - h_inv = 1.0 / hi; - xi = r * h_inv; - kernel_deval( xi , &wi , &wi_dx ); - - pi->rho += mj * wi; - pi->rho_dh -= mj * ( 3.0*wi + xi*wi_dx ); - pi->density.wcount += wi; - pi->density.wcount_dh -= xi * wi_dx; + /* Get the masses. */ + mj = pj->mass; - pi->density.div_v += mj * dvdr * wi_dx; - for ( k = 0 ; k < 3 ; k++ ) - pi->density.curl_v[k] += mj * curlvr[k] * wi_dx; - - } + /* Get r and r inverse. */ + r = sqrtf( r2 ); + ri = 1.0f / r; + /* Compute dv dot r */ + dv[0] = pi->v[0] - pj->v[0]; + dv[1] = pi->v[1] - pj->v[1]; + dv[2] = pi->v[2] - pj->v[2]; + dvdr = dv[0]*dx[0] + dv[1]*dx[1] + dv[2]*dx[2]; + dvdr *= ri; + + /* Compute dv cross r */ + curlvr[0] = dv[1]*dx[2] - dv[2]*dx[1]; + curlvr[1] = dv[2]*dx[0] - dv[0]*dx[2]; + curlvr[2] = dv[0]*dx[1] - dv[1]*dx[0]; + for ( k = 0 ; k < 3 ; k++ ) + curlvr[k] *= ri; + + h_inv = 1.0 / hi; + xi = r * h_inv; + kernel_deval( xi , &wi , &wi_dx ); + + pi->rho += mj * wi; + pi->rho_dh -= mj * ( 3.0*wi + xi*wi_dx ); + pi->density.wcount += wi; + pi->density.wcount_dh -= xi * wi_dx; + + pi->density.div_v += mj * dvdr * wi_dx; + for ( k = 0 ; k < 3 ; k++ ) + pi->density.curl_v[k] += mj * curlvr[k] * wi_dx; + } /** diff --git a/src/space.h b/src/space.h index 51ddbd5c01e945176679529f11e7bc9729e5f0c6..d5cb42f14dff889d9c3f885461099bb6014f4ff8 100644 --- a/src/space.h +++ b/src/space.h @@ -26,7 +26,7 @@ #define space_splitratio 0.875 #define space_splitsize_default 400 #define space_subsize_default 5000 -#define space_dosub 0 +#define space_dosub 1 #define space_stretch 1.05 #define space_maxtaskspercell 31