Commit 08a7e2fd authored by James Willis's avatar James Willis
Browse files

Added max_di and max_dj arrays to the cache.

parent f40c4d49
......@@ -84,35 +84,17 @@ struct cache {
/* Particle z velocity. */
float *restrict vz __attribute__((aligned(CACHE_ALIGN)));
/* Maximum distance of pi particles into cj. */
float *restrict max_di __attribute__((aligned(CACHE_ALIGN)));
/* Maximum distance of pj particles into ci. */
float *restrict max_dj __attribute__((aligned(CACHE_ALIGN)));
/* Cache size. */
int count;
#endif
/* Particle x position. */
//float *restrict rho __attribute__((aligned(sizeof(float) * VEC_SIZE)));
///* Particle y position. */
//float *restrict rho_dh __attribute__((aligned(sizeof(float) * VEC_SIZE)));
///* Particle z position. */
//float *restrict wcount __attribute__((aligned(sizeof(float) * VEC_SIZE)));
///* Particle smoothing length. */
//float *restrict wcount_dh __attribute__((aligned(sizeof(float) * VEC_SIZE)));
///* Particle mass. */
//float *restrict div_v __attribute__((aligned(sizeof(float) * VEC_SIZE)));
///* Particle x velocity. */
//float *restrict curl_vx __attribute__((aligned(sizeof(float) * VEC_SIZE)));
///* Particle y velocity. */
//float *restrict curl_vy __attribute__((aligned(sizeof(float) * VEC_SIZE)));
///* Particle z velocity. */
//float *restrict curl_vz __attribute__((aligned(sizeof(float) * VEC_SIZE)));
};
#ifdef DOPAIR1_AUTO_VEC
......@@ -175,6 +157,8 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
free(c->vy);
free(c->vz);
free(c->h);
free(c->max_di);
free(c->max_dj);
}
error += posix_memalign((void **)&c->x, alignment, sizeBytes);
......@@ -185,15 +169,9 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
error += posix_memalign((void **)&c->vy, alignment, sizeBytes);
error += posix_memalign((void **)&c->vz, alignment, sizeBytes);
error += posix_memalign((void **)&c->h, alignment, sizeBytes);
//error += posix_memalign((void **)&c->rho, alignment, sizeBytes);
//error += posix_memalign((void **)&c->rho_dh, alignment, sizeBytes);
//error += posix_memalign((void **)&c->wcount, alignment, sizeBytes);
//error += posix_memalign((void **)&c->wcount_dh, alignment, sizeBytes);
//error += posix_memalign((void **)&c->div_v, alignment, sizeBytes);
//error += posix_memalign((void **)&c->curl_vx, alignment, sizeBytes);
//error += posix_memalign((void **)&c->curl_vy, alignment, sizeBytes);
//error += posix_memalign((void **)&c->curl_vz, alignment, sizeBytes);
error += posix_memalign((void **)&c->max_di, alignment, sizeBytes);
error += posix_memalign((void **)&c->max_dj, alignment, sizeBytes);
if (error != 0)
error("Couldn't allocate cache, no. of particles: %d", (int)count);
c->count = count;
......
......@@ -972,9 +972,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec_2(
#endif /* WITH_VECTORIZATION */
}
float max_di[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into ci */
float max_dj[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into cj */
void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *cj) {
#ifdef WITH_VECTORIZATION
......@@ -1029,7 +1026,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
}
int first_pi, last_pj;
float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE)));
float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE)));
max_di = r->par_cache.max_di;
max_dj = r->par_cache.max_dj;
/* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
/* For particles in ci */
populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, max_di, max_dj, &first_pi, &last_pj);
......@@ -1409,6 +1411,12 @@ void runner_dopair1_density_vec_1(struct runner *r, struct cell *ci, struct cell
cache_read_two_cells_sorted(ci, cj, ci_cache, &cj_cache, sort_i, sort_j, shift);
float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE)));
float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE)));
max_di = r->par_cache.max_di;
max_dj = r->par_cache.max_dj;
/* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
/* For particles in ci */
populate_max_d(ci, cj, sort_i, sort_j, ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj);
......@@ -1833,6 +1841,12 @@ void runner_dopair1_density_vec_2(struct runner *r, struct cell *ci, struct cell
cache_init(&cj_cache, count_j);
}
float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE)));
float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE)));
max_di = r->par_cache.max_di;
max_dj = r->par_cache.max_dj;
int first_pi, last_pj;
/* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
/* For particles in ci */
......@@ -2401,6 +2415,12 @@ void runner_dopair1_density_vec_3(struct runner *r, struct cell *ci, struct cell
cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
//cache_read_two_cells_sorted(ci, cj, ci_cache, &cj_cache, sort_i, sort_j, shift);
float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE)));
float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE)));
max_di = r->par_cache.max_di;
max_dj = r->par_cache.max_dj;
/* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
/* For particles in ci */
populate_max_d(ci, cj, sort_i, sort_j, ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj);
......@@ -2898,6 +2918,12 @@ void runner_dopair1_density_vec_4(struct runner *r, struct cell *ci, struct cell
cache_read_cell_sorted(cj, ci_cache, sort_j, loc, shift_cj);
float *max_di __attribute__((aligned(sizeof(float) * VEC_SIZE)));
float *max_dj __attribute__((aligned(sizeof(float) * VEC_SIZE)));
max_di = r->par_cache.max_di;
max_dj = r->par_cache.max_dj;
/* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
/* For particles in ci */
float h = parts_i[sort_i[0].i].h;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment