diff --git a/examples/test_bh_sorted.c b/examples/test_bh_sorted.c index b8832653e9d395c41a87a3dbca7ba7b520154966..f01b0cf0c2c8a66167fb299dd5d6aa38257e1ec9 100644 --- a/examples/test_bh_sorted.c +++ b/examples/test_bh_sorted.c @@ -42,7 +42,7 @@ #define const_G 1 // 6.6738e-8 #define dist_min 0.5 /* Used for legacy walk only */ #define dist_cutoff_ratio 1.5 -#define iact_pair_direct iact_pair_direct_sorted +#define iact_pair_direct iact_pair_direct_sorted_tripole #define ICHECK -1 #define NO_SANITY_CHECKS @@ -181,6 +181,103 @@ static inline void dipole_iact(const struct dipole *d, const float *x, for (k = 0; k < 3; k++) a[k] -= w * dx[k]; } +/** Tripole stuff. Moment-matching multipole along a given direction. */ +struct tripole { + float axis1[3], axis2[3]; + float x[3]; + float mass; + float da, da2, db, db2; +}; + +static inline void tripole_init(struct tripole *d, const float *axis1, const float *axis2) { + for (int k = 0; k < 3; k++) { + d->axis1[k] = axis1[k]; + d->axis2[k] = axis2[k]; + d->x[k] = 0.0; + } + d->mass = 0.0; + d->da = 0.0; d->db = 0.0; + d->da2 = 0.0; d->db2 = 0.0; +} + +static inline void tripole_reset(struct tripole *d) { + for (int k = 0; k < 3; k++) d->x[k] = 0.0; + d->mass = 0.0; + d->da = 0.0; d->db = 0.0; + d->da2 = 0.0; d->db2 = 0.0; +} + +static inline void tripole_add(struct tripole *d, const float *x, float mass, + float da, float db) { + for (int k = 0; k < 3; k++) d->x[k] += x[k] * mass; + d->mass += mass; + d->da += da * mass; + d->da2 += da * da * mass; + d->db += db * mass; + d->db2 += db * db * mass; +} + +static inline void tripole_iact(const struct tripole *d, const float *x, + float *a) { + // Bail early if no mass. + if (d->mass == 0.0) return; + + float inv_mass = 1.0f / d->mass; + float dx[3], r2, ir, w; + int k; + + /* Get the offset along the dipole axis. This looks weird, but negative + offsets can happen due to rounding error. */ + float offset1 = (d->da2 - 2 * d->da * d->da * inv_mass) * inv_mass; + if (offset1 > 0) { + offset1 = sqrtf(offset1); + } else { + offset1 = 0.0f; + } + float offset2 = (d->db2 - 2 * d->db * d->db * inv_mass) * inv_mass; + if (offset2 > 0) { + offset2 = sqrtf(offset2); + } else { + offset2 = 0.0f; + } + + /* Compute the first dipole. */ + for (r2 = 0.0f, k = 0; k < 3; k++) { + dx[k] = x[k] - (d->x[k] * inv_mass + offset1 * d->axis1[k]); + r2 += dx[k] * dx[k]; + } + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir * d->mass * 0.25f; + for (k = 0; k < 3; k++) a[k] -= w * dx[k]; + + /* Compute the second dipole. */ + for (r2 = 0.0f, k = 0; k < 3; k++) { + dx[k] = x[k] - (d->x[k] * inv_mass - offset1 * d->axis1[k]); + r2 += dx[k] * dx[k]; + } + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir * d->mass * 0.25f; + for (k = 0; k < 3; k++) a[k] -= w * dx[k]; + + /* Compute the first dipole. */ + for (r2 = 0.0f, k = 0; k < 3; k++) { + dx[k] = x[k] - (d->x[k] * inv_mass + offset2 * d->axis2[k]); + r2 += dx[k] * dx[k]; + } + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir * d->mass * 0.25f; + for (k = 0; k < 3; k++) a[k] -= w * dx[k]; + + /* Compute the second dipole. */ + for (r2 = 0.0f, k = 0; k < 3; k++) { + dx[k] = x[k] - (d->x[k] * inv_mass - offset2 * d->axis2[k]); + r2 += dx[k] * dx[k]; + } + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir * d->mass * 0.25f; + for (k = 0; k < 3; k++) a[k] -= w * dx[k]; +} + #ifdef COUNTERS int count_direct_unsorted; int count_direct_sorted_pp; @@ -197,9 +294,11 @@ struct cell *cell_pool = NULL; /** Constants for the sorting axes. */ const float axis_shift[13 * 3] = { 5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01, - 7.071067811865475e-01, 7.071067811865475e-01, 0.0, 5.773502691896258e-01, - 5.773502691896258e-01, -5.773502691896258e-01, 7.071067811865475e-01, 0.0, - 7.071067811865475e-01, 1.0, 0.0, 0.0, 7.071067811865475e-01, 0.0, + 7.071067811865475e-01, 7.071067811865475e-01, 0.0, + 5.773502691896258e-01, 5.773502691896258e-01, -5.773502691896258e-01, + 7.071067811865475e-01, 0.0, 7.071067811865475e-01, + 1.0, 0.0, 0.0, + 7.071067811865475e-01, 0.0, -7.071067811865475e-01, 5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01, 7.071067811865475e-01, -7.071067811865475e-01, 0.0, 5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01, 0.0, @@ -207,17 +306,22 @@ const float axis_shift[13 * 3] = { 7.071067811865475e-01, -7.071067811865475e-01, 0.0, 0.0, 1.0, }; const float axis_orth_first[13 * 3] = { - 7.071067811865475e-01, -7.071067811865475e-01, 0.0, 0.0, 0.0, 1.0, - 7.071067811865475e-01, -7.071067811865475e-01, 0.0, 0, 1.0, 0, 0.0, 1.0, 0.0, - 0.0, 1.0, 0, 7.071067811865475e-01, 0.0, -7.071067811865475e-01, 0, 0.0, 1.0, + 7.071067811865475e-01, -7.071067811865475e-01, 0.0, + 0.0, 0.0, 1.0, + 7.071067811865475e-01, -7.071067811865475e-01, 0.0, + 0.0, 1.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 1.0, 0.0, 7.071067811865475e-01, 0.0, -7.071067811865475e-01, 0, 0.0, 1.0, 7.071067811865475e-01, 7.071067811865475e-01, 0.0, 1.0, 0, 0, 1.0, 0.0, 0.0, 1.0, 0, 0, 1.0, 0.0, 0.0, }; const float axis_orth_second[13 * 3] = { 4.08248290463862e-01, 4.08248290463858e-01, -8.16496580927729e-01, - 7.071067811865475e-01, -7.071067811865475e-01, 0.0, 4.08248290463863e-01, - 4.08248290463863e-01, 8.16496580927726e-01, 7.071067811865475e-01, 0.0, - -7.071067811865475e-01, 0.0, 0.0, 1.0, 7.071067811865475e-01, 0.0, + 7.071067811865475e-01, -7.071067811865475e-01, 0.0, + 4.08248290463863e-01, 4.08248290463863e-01, 8.16496580927726e-01, + 7.071067811865475e-01, 0.0, -7.071067811865475e-01, + 0.0, 0.0, 1.0, + 7.071067811865475e-01, 0.0, 7.071067811865475e-01, 4.08248290463863e-01, 8.16496580927726e-01, 4.08248290463863e-01, 7.071067811865475e-01, 7.071067811865475e-01, 0.0, 4.08248290463864e-01, -4.08248290463862e-01, 8.16496580927726e-01, 0.0, @@ -900,12 +1004,6 @@ static inline void make_interact_pc(struct cell *leaf, struct cell *cj) { */ static inline int is_inside(struct cell *leaf, struct cell *c) { return (leaf->parts >= c->parts) && (leaf->parts < c->parts + c->count); - /* if (leaf->loc[0] >= c->loc[0] && leaf->loc[0] < c->loc[0] + c->h && - leaf->loc[1] >= c->loc[1] && leaf->loc[1] < c->loc[1] + c->h && - leaf->loc[2] >= c->loc[2] && leaf->loc[2] < c->loc[2] + c->h) - return 1; - else - return 0; */ } /** @@ -1458,6 +1556,7 @@ static inline void iact_pair_direct_sorted_dipole(struct cell *ci, float axis[3], orth1[4], orth2[4]; int num_orth_planes = 0; get_axis(&ci, &cj, &ind_i, &ind_j, axis, &num_orth_planes, orth1, orth2); + for (k = 0; k < 3; k++) axis[k] = M_SQRT1_2 * (orth1[k] + orth2[k]); for (k = 0; k < (1 << num_orth_planes); k++) dipole_init(&dip[k], axis); cjh = cj->h; @@ -1580,7 +1679,9 @@ static inline void iact_pair_direct_sorted_dipole(struct cell *ci, } #endif - dipole_add(&dip[l], parts_j[jj].x, parts_j[jj].mass, parts_j[jj].d); + float da = 0.0f; + for (k = 0; k < 3; k++) da += parts_j[jj].x[k] * axis[k]; + dipole_add(&dip[l], parts_j[jj].x, parts_j[jj].mass, da); } /* Shrink count_j to the latest valid particle. */ @@ -1642,8 +1743,9 @@ static inline void iact_pair_direct_sorted_dipole(struct cell *ci, } } #endif - - dipole_add(&dip[l], parts_i[i].x, parts_i[i].mass, parts_i[i].d); + float da = 0.0f; + for (k = 0; k < 3; k++) da += parts_i[i].x[k] * axis[k]; + dipole_add(&dip[l], parts_i[i].x, parts_i[i].mass, da); } /* Set the new last_i to the last particle checked. */ @@ -1669,6 +1771,256 @@ static inline void iact_pair_direct_sorted_dipole(struct cell *ci, } } +static inline void iact_pair_direct_sorted_tripole(struct cell *ci, + struct cell *cj) { + + struct part_local { + float x[3]; + float a[3]; + float mass, d; + }; + + int i, j, k, l; + int count_i, count_j; + struct part_local *parts_i, *parts_j; + float cjh = cj->h; + float xi[3]; + float dx[3], ai[3], mi, mj, r2, w, ir; + +#ifdef SANITY_CHECKS + + /* Bad stuff will happen if cell sizes are different */ + if (ci->h != cj->h) + error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h); + +#endif + + /* Get the sorted indices and stuff. */ + struct index *ind_i, *ind_j; + struct tripole dip[4]; + float axis[3], orth1[4], orth2[4]; + int num_orth_planes = 0; + get_axis(&ci, &cj, &ind_i, &ind_j, axis, &num_orth_planes, orth1, orth2); + for (k = 0; k < (1 << num_orth_planes); k++) tripole_init(&dip[k], orth1, orth2); + cjh = cj->h; + + /* Allocate and fill-in the local parts. */ + count_i = ci->count; + count_j = cj->count; + if ((parts_i = + (struct part_local *)alloca(sizeof(struct part_local) *count_i)) == + NULL || + (parts_j = + (struct part_local *)alloca(sizeof(struct part_local) * count_j)) == + NULL) + error("Failed to allocate local part arrays."); + for (i = 0; i < count_i; i++) { + int pid = ind_i[i].ind; + parts_i[i].d = ind_i[i].d; + for (k = 0; k < 3; k++) { + parts_i[i].x[k] = ci->parts[pid].x[k] - cj->loc[k]; + parts_i[i].a[k] = 0.0f; + } + parts_i[i].mass = ci->parts[pid].mass; + } + for (j = 0; j < count_j; j++) { + int pjd = ind_j[j].ind; + parts_j[j].d = ind_j[j].d; + for (k = 0; k < 3; k++) { + parts_j[j].x[k] = cj->parts[pjd].x[k] - cj->loc[k]; + parts_j[j].a[k] = 0.0f; + } + parts_j[j].mass = cj->parts[pjd].mass; + } + +#if ICHECK >= 0 + for (k = 0; k < count_i; k++) + if (parts_i[k].id == ICHECK) + message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and " + "loc=[%f,%f,%f], h=%f.", + ci->loc[0], ci->loc[1], ci->loc[2], ci->h, cj->loc[0], cj->loc[1], + cj->loc[2], cj->h); + for (k = 0; k < count_j; k++) + if (parts_j[k].id == ICHECK) + message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and " + "loc=[%f,%f,%f], h=%f.", + cj->loc[0], cj->loc[1], cj->loc[2], cj->h, ci->loc[0], ci->loc[1], + ci->loc[2], ci->h); +#endif + + /* Distance along the axis as of which we will use a multipole. */ + float d_max = dist_cutoff_ratio * cjh; + + /* Loop over all particles in ci... */ + for (i = count_i - 1; i >= 0; i--) { + + /* Get a local copy of the distance along the axis. */ + float di = parts_i[i].d; + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts_i[i].x[k]; + ai[k] = 0.0; + } + mi = parts_i[i].mass; + + /* Loop over every following particle within d_max along the axis. */ + for (j = 0; j < count_j && (parts_j[j].d - di) < d_max; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts_j[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts_j[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts_j[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#if ICHECK >= 0 + if (parts_i[i].id == ICHECK) + message("Interaction with part %d - d= %f", parts_j[j].id, sqrt(r2)); +#endif + +#ifdef COUNTERS + ++count_direct_sorted_pp; +#endif + +#if ICHECK >= 0 && 0 + if (parts_i[i].id == ICHECK) + printf("[NEW] Interaction with particle id= %d (pair i)\n", + parts_j[pjd].id); + + if (parts_j[j].id == ICHECK) + printf("[NEW] Interaction with particle id= %d (pair j) h_i= %f h_j= " + "%f ci= %p cj= %p count_i= %d count_j= %d d_i= %d d_j= %d\n", + parts_i[pid].id, ci->h, cj->h, ci, cj, count_i, count_j, ci->res, + cj->res); +#endif + + } /* loop over every other particle. */ + + /* Add any remaining particles to the COM. */ + for (int jj = j; jj < count_j; jj++) { + + l = 0; +#ifdef MANY_MULTIPOLES + if (num_orth_planes > 0) { + float n1 = parts_j[jj].x[0] * orth1[0] + parts_j[jj].x[1] * orth1[1] + + parts_j[jj].x[2] * orth1[2] + orth1[3]; + l = 2 * l + ((n1 < 0.0) ? 0 : 1); + if (num_orth_planes > 1) { + float n2 = parts_j[jj].x[0] * orth2[0] + parts_j[jj].x[1] * orth2[1] + + parts_j[jj].x[2] * orth2[2] + orth2[3]; + l = 2 * l + ((n2 < 0.0) ? 0 : 1); + } + } +#endif + + float da = 0.0f, db = 0.0f; + for (k = 0; k < 3; k++) { + da += parts_j[jj].x[k] * orth1[k]; + db += parts_j[jj].x[k] * orth2[k]; + } + tripole_add(&dip[l], parts_j[jj].x, parts_j[jj].mass, da, db); + } + + /* Shrink count_j to the latest valid particle. */ + count_j = j; + + /* Interact part_i with the center of mass. */ + for (l = 0; l < (1 << num_orth_planes); ++l) { + tripole_iact(&dip[l], xi, ai); +#if ICHECK >= 0 + if (parts_i[i].id == ICHECK) + message("Interaction with multipole"); //, parts_j[j].id ); +#endif + +#ifdef COUNTERS + ++count_direct_sorted_pm_i; +#endif + } + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k]; + + } /* loop over all particles in ci. */ + + /* Dump extensive info on the dipole. */ + /* message("dip[0] has x=[%e,%e,%e], axis=[%e,%e,%e], da=%e, da2=%e, mass=%e.", + dip[0].x[0], dip[0].x[1], dip[0].x[2], + dip[0].axis[0], dip[0].axis[1], dip[0].axis[2], + dip[0].da, dip[0].da2, dip[0].mass); + for (j = count_j; j < cj->count; j++) { + message(" particle x=[%e,%e,%e], mass=%e.", + parts_j[j].x[0], parts_j[j].x[1], parts_j[j].x[2], parts_j[j].mass); + } */ + + /* Re-init some values. */ + count_j = cj->count; + int last_i = 0; + for (l = 0; l < (1 << num_orth_planes); ++l) { + tripole_reset(&dip[l]); + } + + /* Loop over the particles in cj, catch the COM interactions. */ + for (j = 0; j < count_j; j++) { + + /* Get the sorted index. */ + float dj = parts_j[j].d; + + /* Fill the COM with any new particles. */ + for (i = last_i; i < count_i && (dj - parts_i[i].d) > d_max; i++) { + l = 0; +#ifdef MANY_MULTIPOLES + if (num_orth_planes > 0) { + float n1 = parts_i[i].x[0] * orth1[0] + parts_i[i].x[1] * orth1[1] + + parts_i[i].x[2] * orth1[2] + orth1[3]; + l = 2 * l + ((n1 < 0) ? 0 : 1); + if (num_orth_planes > 1) { + float n2 = parts_i[i].x[0] * orth2[0] + parts_i[i].x[1] * orth2[1] + + parts_i[i].x[2] * orth2[2] + orth2[3]; + l = 2 * l + ((n2 < 0) ? 0 : 1); + } + } +#endif + float da = 0.0f, db = 0.0f; + for (k = 0; k < 3; k++) { + da += parts_i[i].x[k] * orth1[k]; + db += parts_i[i].x[k] * orth2[k]; + } + tripole_add(&dip[l], parts_i[i].x, parts_i[i].mass, da, db); + } + + /* Set the new last_i to the last particle checked. */ + last_i = i; + + /* Interact part_j with the COM. */ + for (l = 0; l < (1 << num_orth_planes); ++l) { + tripole_iact(&dip[l], parts_j[j].x, parts_j[j].a); +#ifdef COUNTERS + ++count_direct_sorted_pm_j; +#endif + } + } + + /* Copy the accelerations back to the original particles. */ + for (i = 0; i < count_i; i++) { + int pid = ind_i[i].ind; + for (k = 0; k < 3; k++) ci->parts[pid].a[k] += parts_i[i].a[k]; + } + for (j = 0; j < count_j; j++) { + int pjd = ind_j[j].ind; + for (k = 0; k < 3; k++) cj->parts[pjd].a[k] += parts_j[j].a[k]; + } +} + /** * @brief Decides whether two cells use the direct summation interaction or the * multipole interactions