Skip to content
Snippets Groups Projects
Commit 2b50cf09 authored by James Willis's avatar James Willis
Browse files

Use unsorted cache for particle properties in first loop.

parent 8200f3d1
Branches
Tags
1 merge request!320Dopair1 vectorisation merge
...@@ -926,6 +926,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * ...@@ -926,6 +926,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
const double dj_min = sort_j[0].d; const double dj_min = sort_j[0].d;
const float dx_max = (ci->dx_max + cj->dx_max); const float dx_max = (ci->dx_max + cj->dx_max);
/* Get the particle cache from the runner and re-allocate
* the cache if it is not big enough for the cell. */
struct cache *restrict ci_cache = &r->par_cache;
if (ci_cache->count < count_i) {
cache_init(ci_cache, count_i);
}
if (cj_cache.count < count_j) {
cache_init(&cj_cache, count_j);
}
cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
/* Loop over the parts in ci. */ /* Loop over the parts in ci. */
for (int pid = count_i - 1; for (int pid = count_i - 1;
pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
...@@ -933,12 +946,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * ...@@ -933,12 +946,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
/* Get a hold of the ith part in ci. */ /* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[sort_i[pid].i]; struct part *restrict pi = &parts_i[sort_i[pid].i];
if (!part_is_active(pi, e)) continue; if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
int ci_cache_idx = sort_i[pid].i;
//const float hi = pi->h;
const float hi = ci_cache->h[ci_cache_idx];
const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
if (di < dj_min) continue; if (di < dj_min) continue;
double pix[3]; double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; //for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
pix[0] = ci_cache->x[ci_cache_idx];
pix[1] = ci_cache->y[ci_cache_idx];
pix[2] = ci_cache->z[ci_cache_idx];
const float hig2 = hi * hi * kernel_gamma2; const float hig2 = hi * hi * kernel_gamma2;
//vector pix, piy, piz; //vector pix, piy, piz;
...@@ -975,19 +995,30 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * ...@@ -975,19 +995,30 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
curlvySum.v = vec_setzero(); curlvySum.v = vec_setzero();
curlvzSum.v = vec_setzero(); curlvzSum.v = vec_setzero();
//int exit_iteration = count_j;
//for (int pjd = 0; pjd < count_j ; pjd++) {
// if(sort_j[pjd].d >= di) exit_iteration = pjd;
//}
/* Loop over the parts in cj. */ /* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
//for (int pjd = 0; pjd < exit_iteration; pjd++) {
int cj_cache_idx = sort_j[pjd].i;
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[sort_j[pjd].i]; //struct part *restrict pj = &parts_j[sort_j[pjd].i];
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
float r2 = 0.0f; float r2;// = 0.0f;
float dx[3]; float dx[3];
for (int k = 0; k < 3; k++) { //for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k]; // dx[k] = pix[k] - pj->x[k] + ci->loc[k];
r2 += dx[k] * dx[k]; // r2 += dx[k] * dx[k];
} //}
dx[0] = pix[0] - cj_cache.x[cj_cache_idx];
dx[1] = pix[1] - cj_cache.y[cj_cache_idx];
dx[2] = pix[2] - cj_cache.z[cj_cache_idx];
r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
/* Hit or miss? */ /* Hit or miss? */
if (r2 < hig2) { if (r2 < hig2) {
...@@ -997,15 +1028,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * ...@@ -997,15 +1028,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
int_cache.dxq[icount] = dx[0]; int_cache.dxq[icount] = dx[0];
int_cache.dyq[icount] = dx[1]; int_cache.dyq[icount] = dx[1];
int_cache.dzq[icount] = dx[2]; int_cache.dzq[icount] = dx[2];
int_cache.mq[icount] = pj->mass; int_cache.mq[icount] = cj_cache.m[cj_cache_idx];//pj->mass;
int_cache.vxq[icount] = pj->v[0]; int_cache.vxq[icount] = cj_cache.vx[cj_cache_idx];//pj->v[0];
int_cache.vyq[icount] = pj->v[1]; int_cache.vyq[icount] = cj_cache.vy[cj_cache_idx];//pj->v[1];
int_cache.vzq[icount] = pj->v[2]; int_cache.vzq[icount] = cj_cache.vz[cj_cache_idx];//pj->v[2];
hjq[icount] = pj->h; icount++;
pjq[icount] = pj;
icount += 1;
} }
} /* loop over the parts in cj. */ } /* loop over the parts in cj. */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment