From 2b50cf09ab8648d923c69a7704f0fe848afa8628 Mon Sep 17 00:00:00 2001 From: James Willis <james.s.willis@durham.ac.uk> Date: Wed, 14 Dec 2016 11:23:12 +0000 Subject: [PATCH] Use unsorted cache for particle properties in first loop. --- src/runner_doiact_vec.c | 60 ++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index db4d3a32cf..9e9eaf0864 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -926,6 +926,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * const double dj_min = sort_j[0].d; const float dx_max = (ci->dx_max + cj->dx_max); + /* Get the particle cache from the runner and re-allocate + * the cache if it is not big enough for the cell. */ + struct cache *restrict ci_cache = &r->par_cache; + + if (ci_cache->count < count_i) { + cache_init(ci_cache, count_i); + } + if (cj_cache.count < count_j) { + cache_init(&cj_cache, count_j); + } + + cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift); + /* Loop over the parts in ci. */ for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { @@ -933,12 +946,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[sort_i[pid].i]; if (!part_is_active(pi, e)) continue; - const float hi = pi->h; + + int ci_cache_idx = sort_i[pid].i; + + //const float hi = pi->h; + const float hi = ci_cache->h[ci_cache_idx]; const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; if (di < dj_min) continue; double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + //for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + pix[0] = ci_cache->x[ci_cache_idx]; + pix[1] = ci_cache->y[ci_cache_idx]; + pix[2] = ci_cache->z[ci_cache_idx]; const float hig2 = hi * hi * kernel_gamma2; //vector pix, piy, piz; @@ -975,19 +995,30 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * curlvySum.v = vec_setzero(); curlvzSum.v = vec_setzero(); + //int exit_iteration = count_j; + //for (int pjd = 0; pjd < count_j ; pjd++) { + // if(sort_j[pjd].d >= di) exit_iteration = pjd; + //} + /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + //for (int pjd = 0; pjd < exit_iteration; pjd++) { + int cj_cache_idx = sort_j[pjd].i; /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; + //struct part *restrict pj = &parts_j[sort_j[pjd].i]; /* Compute the pairwise distance. */ - float r2 = 0.0f; + float r2;// = 0.0f; float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + //for (int k = 0; k < 3; k++) { + // dx[k] = pix[k] - pj->x[k] + ci->loc[k]; + // r2 += dx[k] * dx[k]; + //} + dx[0] = pix[0] - cj_cache.x[cj_cache_idx]; + dx[1] = pix[1] - cj_cache.y[cj_cache_idx]; + dx[2] = pix[2] - cj_cache.z[cj_cache_idx]; + r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ if (r2 < hig2) { @@ -997,15 +1028,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell * int_cache.dxq[icount] = dx[0]; int_cache.dyq[icount] = dx[1]; int_cache.dzq[icount] = dx[2]; - int_cache.mq[icount] = pj->mass; - int_cache.vxq[icount] = pj->v[0]; - int_cache.vyq[icount] = pj->v[1]; - int_cache.vzq[icount] = pj->v[2]; + int_cache.mq[icount] = cj_cache.m[cj_cache_idx];//pj->mass; + int_cache.vxq[icount] = cj_cache.vx[cj_cache_idx];//pj->v[0]; + int_cache.vyq[icount] = cj_cache.vy[cj_cache_idx];//pj->v[1]; + int_cache.vzq[icount] = cj_cache.vz[cj_cache_idx];//pj->v[2]; - hjq[icount] = pj->h; - pjq[icount] = pj; - - icount += 1; + icount++; } } /* loop over the parts in cj. */ -- GitLab