diff --git a/src/Makefile.am b/src/Makefile.am index 53fc189f38d20616bdc75a8ef8a2abcf49e8c87e..4ab052fabb82f0a7466a564e4c06ffed9c6b6aba 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -22,7 +22,7 @@ AUTOMAKE_OPTIONS=gnu # Add the debug flag to the whole thing AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \ -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \ - -DTIMER -DCOUNTER -DCPU_TPS=2.67e9 + -DTIMER -DCOUNTER -DCPU_TPS=2.40e9 # AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \ # -DTIMER -DCOUNTER -DCPU_TPS=2.67e9 diff --git a/src/engine.c b/src/engine.c index 67ab021e0dd8440e8c69f77e3db71767e16e8bb6..a9ee7ecc385adaeb529c6bc41ba11e261a38f96f 100644 --- a/src/engine.c +++ b/src/engine.c @@ -235,7 +235,11 @@ void engine_map_kick_first ( struct cell *c , void *data ) { p->v[1] = v[1] += dt * a[1]; p->v[2] = v[2] += dt * a[2]; p->u = u += u_dt * dt; - p->h = h += h_dt * dt; + // p->h = h += h_dt * dt; + { + float w = h_dt / h *dt; + p->h = h *= 1.0f + w*( -1.0f + w*( 0.5f + w*(-1.0f/6.0f + 1.0f/24.0*w ) ) ); + } h_max = fmaxf( h_max , h ); @@ -504,10 +508,12 @@ void engine_step ( struct engine *e , int sort_queues ) { // printf( "engine_step: total entropic function is %e .\n", ent ); fflush(stdout); printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout); + /* Increase the step. */ + e->step += 1; + /* Does the time step need adjusting? */ if ( e->policy & engine_policy_fixdt ) { e->dt = e->dt_orig; - e->step += 1; } else { if ( e->dt == 0 ) { @@ -529,7 +535,6 @@ void engine_step ( struct engine *e , int sort_queues ) { e->step /= 2; printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt ); } - e->step += 1; } } diff --git a/src/space.c b/src/space.c index 1810713d8bee7de035da633e5ea0703cd27fb6d8..7baf6499225f3877ae8fa3ca4efba228c2d8b9cd 100644 --- a/src/space.c +++ b/src/space.c @@ -944,7 +944,7 @@ void space_splittasks ( struct space *s ) { { -1 , -1 , -1 , -1 , -1 , -1 , -1 , 12 } }; /* Loop through the tasks... */ - #pragma omp parallel default(none) shared(s,tid,pts,space_subsize) private(ind,j,k,t,t_old,redo,ci,cj,hi,hj,sid,shift) + // #pragma omp parallel default(none) shared(s,tid,pts,space_subsize) private(ind,j,k,t,t_old,redo,ci,cj,hi,hj,sid,shift) { redo = 0; t_old = t = NULL; while ( 1 ) { @@ -1334,7 +1334,7 @@ void space_maketasks ( struct space *s , int do_sort ) { Each force task depends on the cell ghosts and unlocks the kick2 task of its super-cell. */ kk = s->nr_tasks; - #pragma omp parallel for private(t,t2) + // #pragma omp parallel for private(t,t2) for ( k = 0 ; k < kk ; k++ ) { /* Get a pointer to the task. */ diff --git a/src/vector.h b/src/vector.h index 39f1385e57b5fec3cfc30911c9ed900d6ba78524..65f0d30c67601e8d17d8347e508aacdd30fffcc2 100644 --- a/src/vector.h +++ b/src/vector.h @@ -27,7 +27,7 @@ #define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type /* So what will the vector size be? */ - #ifdef __AVX__ + #ifdef NO__AVX__ #define VECTORIZE #define VEC_SIZE 8 #define VEC_FLOAT __m256 @@ -54,7 +54,7 @@ #define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a) #define vec_dbl_fmin(a,b) _mm256_min_pd(a,b) #define vec_dbl_fmax(a,b) _mm256_max_pd(a,b) - #elif defined( __SSE2__ ) + #elif defined( NO__SSE2__ ) #define VECTORIZE #define VEC_SIZE 4 #define VEC_FLOAT __m128