Skip to content
Snippets Groups Projects
Commit 2b50cf09 authored by James Willis's avatar James Willis
Browse files

Use unsorted cache for particle properties in first loop.

parent 8200f3d1
No related branches found
No related tags found
1 merge request!320Dopair1 vectorisation merge
...@@ -926,6 +926,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * ...@@ -926,6 +926,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
const double dj_min = sort_j[0].d; const double dj_min = sort_j[0].d;
const float dx_max = (ci->dx_max + cj->dx_max); const float dx_max = (ci->dx_max + cj->dx_max);
/* Get the particle cache from the runner and re-allocate
* the cache if it is not big enough for the cell. */
struct cache *restrict ci_cache = &r->par_cache;
if (ci_cache->count < count_i) {
cache_init(ci_cache, count_i);
}
if (cj_cache.count < count_j) {
cache_init(&cj_cache, count_j);
}
cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
/* Loop over the parts in ci. */ /* Loop over the parts in ci. */
for (int pid = count_i - 1; for (int pid = count_i - 1;
pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
...@@ -933,12 +946,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * ...@@ -933,12 +946,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
/* Get a hold of the ith part in ci. */ /* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[sort_i[pid].i]; struct part *restrict pi = &parts_i[sort_i[pid].i];
if (!part_is_active(pi, e)) continue; if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
int ci_cache_idx = sort_i[pid].i;
//const float hi = pi->h;
const float hi = ci_cache->h[ci_cache_idx];
const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
if (di < dj_min) continue; if (di < dj_min) continue;
double pix[3]; double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; //for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
pix[0] = ci_cache->x[ci_cache_idx];
pix[1] = ci_cache->y[ci_cache_idx];
pix[2] = ci_cache->z[ci_cache_idx];
const float hig2 = hi * hi * kernel_gamma2; const float hig2 = hi * hi * kernel_gamma2;
//vector pix, piy, piz; //vector pix, piy, piz;
...@@ -975,19 +995,30 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * ...@@ -975,19 +995,30 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
curlvySum.v = vec_setzero(); curlvySum.v = vec_setzero();
curlvzSum.v = vec_setzero(); curlvzSum.v = vec_setzero();
//int exit_iteration = count_j;
//for (int pjd = 0; pjd < count_j ; pjd++) {
// if(sort_j[pjd].d >= di) exit_iteration = pjd;
//}
/* Loop over the parts in cj. */ /* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
//for (int pjd = 0; pjd < exit_iteration; pjd++) {
int cj_cache_idx = sort_j[pjd].i;
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[sort_j[pjd].i]; //struct part *restrict pj = &parts_j[sort_j[pjd].i];
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
float r2 = 0.0f; float r2;// = 0.0f;
float dx[3]; float dx[3];
for (int k = 0; k < 3; k++) { //for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k]; // dx[k] = pix[k] - pj->x[k] + ci->loc[k];
r2 += dx[k] * dx[k]; // r2 += dx[k] * dx[k];
} //}
dx[0] = pix[0] - cj_cache.x[cj_cache_idx];
dx[1] = pix[1] - cj_cache.y[cj_cache_idx];
dx[2] = pix[2] - cj_cache.z[cj_cache_idx];
r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
/* Hit or miss? */ /* Hit or miss? */
if (r2 < hig2) { if (r2 < hig2) {
...@@ -997,15 +1028,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * ...@@ -997,15 +1028,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
int_cache.dxq[icount] = dx[0]; int_cache.dxq[icount] = dx[0];
int_cache.dyq[icount] = dx[1]; int_cache.dyq[icount] = dx[1];
int_cache.dzq[icount] = dx[2]; int_cache.dzq[icount] = dx[2];
int_cache.mq[icount] = pj->mass; int_cache.mq[icount] = cj_cache.m[cj_cache_idx];//pj->mass;
int_cache.vxq[icount] = pj->v[0]; int_cache.vxq[icount] = cj_cache.vx[cj_cache_idx];//pj->v[0];
int_cache.vyq[icount] = pj->v[1]; int_cache.vyq[icount] = cj_cache.vy[cj_cache_idx];//pj->v[1];
int_cache.vzq[icount] = pj->v[2]; int_cache.vzq[icount] = cj_cache.vz[cj_cache_idx];//pj->v[2];
hjq[icount] = pj->h; icount++;
pjq[icount] = pj;
icount += 1;
} }
} /* loop over the parts in cj. */ } /* loop over the parts in cj. */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment