From 3184d4a642cecbb9db87ad82ee16cfe57f1391ce Mon Sep 17 00:00:00 2001 From: Pedro Gonnet <pedro.gonnet@durham.ac.uk> Date: Mon, 6 Oct 2014 20:48:29 +0000 Subject: [PATCH] added a new iact_pair_direct_sorted which uses local copies of the sorted particles. tested for correctness, not yet for speed. --- examples/test_bh_sorted.c | 265 +++++++++++++++++++++++++++++++++++++- 1 file changed, 264 insertions(+), 1 deletion(-) diff --git a/examples/test_bh_sorted.c b/examples/test_bh_sorted.c index e0f3e3f..c10920d 100644 --- a/examples/test_bh_sorted.c +++ b/examples/test_bh_sorted.c @@ -1100,7 +1100,7 @@ static inline void iact_pair_direct_unsorted(struct cell *ci, struct cell *cj) { * @param ci The #cell containing the particles. * @param cj The #cell containing the other particles */ -static inline void iact_pair_direct_sorted(struct cell *ci, struct cell *cj) { +static inline void iact_pair_direct_sorted_old(struct cell *ci, struct cell *cj) { int i, j, k, l; int count_i, count_j; @@ -1326,6 +1326,269 @@ static inline void iact_pair_direct_sorted(struct cell *ci, struct cell *cj) { } } +static inline void iact_pair_direct_sorted(struct cell *ci, struct cell *cj) { + + struct part_local { + double x[3]; + float a[3]; + float mass, d; + }; + + int i, j, k, l; + int count_i, count_j; + struct part_local *parts_i, *parts_j; + double cjh = cj->h; + double xi[3]; + float dx[3], ai[3], mi, mj, r2, w, ir; + +#ifdef SANITY_CHECKS + + /* Bad stuff will happen if cell sizes are different */ + if (ci->h != cj->h) + error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h); + +#endif + + /* Get the sorted indices and stuff. */ + struct index *ind_i, *ind_j; + double com[4][3] = {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0}}; + float com_mass[4] = {0.0, 0.0, 0.0, 0.0}; + float orth1[4], orth2[4]; + int num_orth_planes = 0; + get_axis(&ci, &cj, &ind_i, &ind_j, &num_orth_planes, orth1, orth2); + cjh = cj->h; + + /* Allocate and fill-in the local parts. */ + count_i = ci->count; + count_j = cj->count; + if ((parts_i = (struct part_local *)alloca(sizeof(struct part_local) * count_i)) == NULL || + (parts_j = (struct part_local *)alloca(sizeof(struct part_local) * count_j)) == NULL) + error("Failed to allocate local part arrays."); + for (i = 0; i < count_i; i++) { + int pid = ind_i[i].ind; + parts_i[i].d = ind_i[i].d; + for (k = 0; k < 3; k++) { + parts_i[i].x[k] = ci->parts[pid].x[k]; + parts_i[i].a[k] = 0.0f; + } + parts_i[i].mass = ci->parts[pid].mass; + } + for (j = 0; j < count_j; j++) { + int pjd = ind_j[j].ind; + parts_j[j].d = ind_j[j].d; + for (k = 0; k < 3; k++) { + parts_j[j].x[k] = cj->parts[pjd].x[k]; + parts_j[j].a[k] = 0.0f; + } + parts_j[j].mass = cj->parts[pjd].mass; + } + +#if ICHECK >= 0 + for (k = 0; k < count_i; k++) + if (parts_i[k].id == ICHECK) + message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and " + "loc=[%f,%f,%f], h=%f.", + ci->loc[0], ci->loc[1], ci->loc[2], ci->h, cj->loc[0], cj->loc[1], + cj->loc[2], cj->h); + for (k = 0; k < count_j; k++) + if (parts_j[k].id == ICHECK) + message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and " + "loc=[%f,%f,%f], h=%f.", + cj->loc[0], cj->loc[1], cj->loc[2], cj->h, ci->loc[0], ci->loc[1], + ci->loc[2], ci->h); +#endif + + /* Distance along the axis as of which we will use a multipole. */ + float d_max = dist_cutoff_ratio * cjh; + + /* Loop over all particles in ci... */ + for (i = count_i - 1; i >= 0; i--) { + + /* Get a local copy of the distance along the axis. */ + float di = parts_i[i].d; + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts_i[i].x[k]; + ai[k] = 0.0; + } + mi = parts_i[i].mass; + + /* Loop over every following particle within d_max along the axis. */ + for (j = 0; j < count_j && (parts_j[j].d - di) < d_max; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts_j[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts_j[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts_j[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#if ICHECK >= 0 + if (parts_i[i].id == ICHECK) + message("Interaction with part %d - d= %f", parts_j[j].id, sqrt(r2)); +#endif + +#ifdef COUNTERS + ++count_direct_sorted_pp; +#endif + +#if ICHECK >= 0 && 0 + if (parts_i[i].id == ICHECK) + printf("[NEW] Interaction with particle id= %d (pair i)\n", + parts_j[pjd].id); + + if (parts_j[j].id == ICHECK) + printf("[NEW] Interaction with particle id= %d (pair j) h_i= %f h_j= " + "%f ci= %p cj= %p count_i= %d count_j= %d d_i= %d d_j= %d\n", + parts_i[pid].id, ci->h, cj->h, ci, cj, count_i, count_j, ci->res, + cj->res); +#endif + + } /* loop over every other particle. */ + + /* Add any remaining particles to the COM. */ + for (int jj = j; jj < count_j; jj++) { + mj = parts_j[jj].mass; + + l = 0; +#ifdef MANY_MULTIPOLES + if (num_orth_planes > 0) { + float n1 = parts_j[jj].x[0] * orth1[0] + parts_j[jj].x[1] * orth1[1] + + parts_j[jj].x[2] * orth1[2] + orth1[3]; + l = 2 * l + ((n1 < 0.0) ? 0 : 1); + if (num_orth_planes > 1) { + float n2 = + parts_j[jj].x[0] * orth2[0] + parts_j[jj].x[1] * orth2[1] + + parts_j[jj].x[2] * orth2[2] + orth2[3]; + l = 2 * l + ((n2 < 0.0) ? 0 : 1); + } + } +#endif + + com[l][0] += mj * parts_j[jj].x[0]; + com[l][1] += mj * parts_j[jj].x[1]; + com[l][2] += mj * parts_j[jj].x[2]; + com_mass[l] += mj; + } + + /* Shrink count_j to the latest valid particle. */ + count_j = j; + + /* Interact part_i with the center of mass. */ + for (l = 0; l < (1 << num_orth_planes); ++l) { + if (com_mass[l] > 0.0) { + float icom_mass = 1.0f / com_mass[l]; + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - com[l][k] * icom_mass; + r2 += dx[k] * dx[k]; + } + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + for (k = 0; k < 3; k++) ai[k] -= w * dx[k] * com_mass[l]; + +#if ICHECK >= 0 + if (parts_i[i].id == ICHECK) + message("Interaction with multipole"); //, parts_j[j].id ); +#endif + +#ifdef COUNTERS + ++count_direct_sorted_pm_i; +#endif + } + } + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k]; + + } /* loop over all particles in ci. */ + + /* Re-init some values. */ + count_j = cj->count; + int last_i = 0; + for (l = 0; l < (1 << num_orth_planes); ++l) { + com[l][0] = 0.0; + com[l][1] = 0.0; + com[l][2] = 0.0; + com_mass[l] = 0.0f; + } + + /* Loop over the particles in cj, catch the COM interactions. */ + for (j = 0; j < count_j; j++) { + + /* Get the sorted index. */ + float dj = parts_j[j].d; + + /* Fill the COM with any new particles. */ + for (i = last_i; i < count_i && (dj - parts_i[i].d) > d_max; i++) { + mi = parts_i[i].mass; + + l = 0; +#ifdef MANY_MULTIPOLES + if (num_orth_planes > 0) { + float n1 = parts_i[i].x[0] * orth1[0] + parts_i[i].x[1] * orth1[1] + + parts_i[i].x[2] * orth1[2] + orth1[3]; + l = 2 * l + ((n1 < 0) ? 0 : 1); + if (num_orth_planes > 1) { + float n2 = + parts_i[i].x[0] * orth2[0] + parts_i[i].x[1] * orth2[1] + + parts_i[i].x[2] * orth2[2] + orth2[3]; + l = 2 * l + ((n2 < 0) ? 0 : 1); + } + } +#endif + + com[l][0] += parts_i[i].x[0] * mi; + com[l][1] += parts_i[i].x[1] * mi; + com[l][2] += parts_i[i].x[2] * mi; + com_mass[l] += mi; + } + + /* Set the new last_i to the last particle checked. */ + last_i = i; + + /* Interact part_j with the COM. */ + for (l = 0; l < (1 << num_orth_planes); ++l) { + if (com_mass[l] > 0.0) { + float icom_mass = 1.0f / com_mass[l]; + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = com[l][k] * icom_mass - parts_j[j].x[k]; + r2 += dx[k] * dx[k]; + } + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + for (k = 0; k < 3; k++) parts_j[j].a[k] += w * dx[k] * com_mass[l]; + +#ifdef COUNTERS + ++count_direct_sorted_pm_j; +#endif + } + } + } + + /* Copy the accelerations back to the original particles. */ + for (i = 0; i < count_i; i++) { + int pid = ind_i[i].ind; + for (k = 0; k < 3; k++) + ci->parts[pid].a[k] += parts_i[i].a[k]; + } + for (j = 0; j < count_j; j++) { + int pjd = ind_j[j].ind; + for (k = 0; k < 3; k++) + cj->parts[pjd].a[k] += parts_j[j].a[k]; + } +} + /** * @brief Decides whether two cells use the direct summation interaction or the * multipole interactions -- GitLab