diff --git a/src/cache.h b/src/cache.h index 47324924c56ef6773febf965be4e997ef6ff8daf..e0ef43b628a718bbc30f2982222792c670ae077a 100644 --- a/src/cache.h +++ b/src/cache.h @@ -84,35 +84,17 @@ struct cache { /* Particle z velocity. */ float *restrict vz __attribute__((aligned(CACHE_ALIGN))); + + /* Maximum distance of pi particles into cj. */ + float *restrict max_di __attribute__((aligned(CACHE_ALIGN))); + + /* Maximum distance of pj particles into ci. */ + float *restrict max_dj __attribute__((aligned(CACHE_ALIGN))); /* Cache size. */ int count; #endif - /* Particle x position. */ - //float *restrict rho __attribute__((aligned(sizeof(float) * VEC_SIZE))); - - ///* Particle y position. */ - //float *restrict rho_dh __attribute__((aligned(sizeof(float) * VEC_SIZE))); - - ///* Particle z position. */ - //float *restrict wcount __attribute__((aligned(sizeof(float) * VEC_SIZE))); - - ///* Particle smoothing length. */ - //float *restrict wcount_dh __attribute__((aligned(sizeof(float) * VEC_SIZE))); - - ///* Particle mass. */ - //float *restrict div_v __attribute__((aligned(sizeof(float) * VEC_SIZE))); - - ///* Particle x velocity. */ - //float *restrict curl_vx __attribute__((aligned(sizeof(float) * VEC_SIZE))); - - ///* Particle y velocity. */ - //float *restrict curl_vy __attribute__((aligned(sizeof(float) * VEC_SIZE))); - - ///* Particle z velocity. */ - //float *restrict curl_vz __attribute__((aligned(sizeof(float) * VEC_SIZE))); - }; #ifdef DOPAIR1_AUTO_VEC @@ -175,6 +157,8 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c, free(c->vy); free(c->vz); free(c->h); + free(c->max_di); + free(c->max_dj); } error += posix_memalign((void **)&c->x, alignment, sizeBytes); @@ -185,15 +169,9 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c, error += posix_memalign((void **)&c->vy, alignment, sizeBytes); error += posix_memalign((void **)&c->vz, alignment, sizeBytes); error += posix_memalign((void **)&c->h, alignment, sizeBytes); - //error += posix_memalign((void **)&c->rho, alignment, sizeBytes); - //error += posix_memalign((void **)&c->rho_dh, alignment, sizeBytes); - //error += posix_memalign((void **)&c->wcount, alignment, sizeBytes); - //error += posix_memalign((void **)&c->wcount_dh, alignment, sizeBytes); - //error += posix_memalign((void **)&c->div_v, alignment, sizeBytes); - //error += posix_memalign((void **)&c->curl_vx, alignment, sizeBytes); - //error += posix_memalign((void **)&c->curl_vy, alignment, sizeBytes); - //error += posix_memalign((void **)&c->curl_vz, alignment, sizeBytes); - + error += posix_memalign((void **)&c->max_di, alignment, sizeBytes); + error += posix_memalign((void **)&c->max_dj, alignment, sizeBytes); + if (error != 0) error("Couldn't allocate cache, no. of particles: %d", (int)count); c->count = count; diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index d1501787dc139f9a3e9e39c78ffda8dbdd15d200..f8dec7c235ec114f4b4ea0d936ad337ee7eaa191 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -972,9 +972,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec_2( #endif /* WITH_VECTORIZATION */ } -float max_di[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into ci */ -float max_dj[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into cj */ - void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *cj) { #ifdef WITH_VECTORIZATION @@ -1029,7 +1026,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * } int first_pi, last_pj; - + float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + max_di = r->par_cache.max_di; + max_dj = r->par_cache.max_dj; + /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */ /* For particles in ci */ populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, max_di, max_dj, &first_pi, &last_pj); @@ -1409,6 +1411,12 @@ void runner_dopair1_density_vec_1(struct runner *r, struct cell *ci, struct cell cache_read_two_cells_sorted(ci, cj, ci_cache, &cj_cache, sort_i, sort_j, shift); + float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + max_di = r->par_cache.max_di; + max_dj = r->par_cache.max_dj; + /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */ /* For particles in ci */ populate_max_d(ci, cj, sort_i, sort_j, ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj); @@ -1833,6 +1841,12 @@ void runner_dopair1_density_vec_2(struct runner *r, struct cell *ci, struct cell cache_init(&cj_cache, count_j); } + float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + max_di = r->par_cache.max_di; + max_dj = r->par_cache.max_dj; + int first_pi, last_pj; /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */ /* For particles in ci */ @@ -2401,6 +2415,12 @@ void runner_dopair1_density_vec_3(struct runner *r, struct cell *ci, struct cell cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift); //cache_read_two_cells_sorted(ci, cj, ci_cache, &cj_cache, sort_i, sort_j, shift); + float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + max_di = r->par_cache.max_di; + max_dj = r->par_cache.max_dj; + /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */ /* For particles in ci */ populate_max_d(ci, cj, sort_i, sort_j, ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj); @@ -2898,6 +2918,12 @@ void runner_dopair1_density_vec_4(struct runner *r, struct cell *ci, struct cell cache_read_cell_sorted(cj, ci_cache, sort_j, loc, shift_cj); + float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE))); + float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + max_di = r->par_cache.max_di; + max_dj = r->par_cache.max_dj; + /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */ /* For particles in ci */ float h = parts_i[sort_i[0].i].h;