diff --git a/examples/test_bh.c b/examples/test_bh.c index 4c410a518c64d43e948fc4acbe6d6a3bfa95c062..70897a1689c621663985bc3efe8207511b4596a8 100644 --- a/examples/test_bh.c +++ b/examples/test_bh.c @@ -619,13 +619,13 @@ static inline void iact_self_pc(struct cell *c, struct cell *leaf) { * @param ci The #cell containing the particles. * @param cj The #cell containing the other particles */ -static inline void iact_pair_direct(struct cell *ci, struct cell *cj) { +static inline void iact_pair_direct_dp(struct cell *ci, struct cell *cj) { int i, j, k; int count_i = ci->count, count_j = cj->count; - struct part *parts_i = ci->parts, *parts_j = cj->parts; double xi[3]; float dx[3], ai[3], mi, mj, r2, w, ir; + struct part *parts_i = ci->parts, *parts_j = cj->parts; #ifdef SANITY_CHECKS @@ -688,6 +688,112 @@ static inline void iact_pair_direct(struct cell *ci, struct cell *cj) { } /* loop over all particles. */ } +static inline void iact_pair_direct(struct cell *ci, struct cell *cj) { + + int i, j, k; + int count_i = ci->count, count_j = cj->count; + float xi[3]; + float dx[3], ai[3], mi, mj, r2, w, ir; + double com[3]; + struct part_local { + float x[3]; + float a[3]; + float mass; + } parts_i[count_i], parts_j[count_j] __attribute__((aligned(64)));; + + +#ifdef SANITY_CHECKS + + /* Bad stuff will happen if cell sizes are different */ + if (ci->h != cj->h) + error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h); + +#endif + + /* Find the center point of the interaction. */ + for (k = 0; k < 3; k++) { + com[k] = 0.5 * (ci->loc[k] + cj->loc[k] + ci->h); + } + + /* Init the local copies of the particles. */ + for (k = 0; k < count_i; k++) { + for (j = 0; j < 3; j++) { + parts_i[k].x[j] = ci->parts[k].x[j] - com[j]; + parts_i[k].a[j] = 0.0f; + } + parts_i[k].mass = ci->parts[k].mass; + } + for (k = 0; k < count_j; k++) { + for (j = 0; j < 3; j++) { + parts_j[k].x[j] = cj->parts[k].x[j] - com[j]; + parts_j[k].a[j] = 0.0f; + } + parts_j[k].mass = ci->parts[k].mass; + } + + /* Loop over all particles in ci... */ + for (i = 0; i < count_i; i++) { + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts_i[i].x[k]; + ai[k] = 0.0f; + } + mi = parts_i[i].mass; + + /* Loop over every particle in the other cell. */ + for (j = 0; j < count_j; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts_j[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts_j[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts_j[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#ifdef COUNTERS + ++count_direct_unsorted; +#endif + +#if ICHECK >= 0 && 0 + if (parts_i[i].id == ICHECK) + printf( + "[NEW] Interaction with particle id= %d (pair i) a=( %e %e %e )\n", + parts_j[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]); + + if (parts_j[j].id == ICHECK) + printf( + "[NEW] Interaction with particle id= %d (pair j) a=( %e %e %e )\n", + parts_i[i].id, mj * w * dx[0], mj * w * dx[1], mj * w * dx[2]); +#endif + + } /* loop over every other particle. */ + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k]; + + } /* loop over all particles. */ + + /* Copy the local particle data back. */ + for (k = 0; k < count_i; k++) { + for (j = 0; j < 3; j++) + ci->parts[k].a[j] = parts_i[k].a[j]; + } + for (k = 0; k < count_j; k++) { + for (j = 0; j < 3; j++) + cj->parts[k].a[j] = parts_j[k].a[j]; + } +} + /** * @brief Decides whether two cells use the direct summation interaction or the * multipole interactions @@ -742,12 +848,92 @@ void iact_pair(struct cell *ci, struct cell *cj) { * * @param c The #cell. */ -void iact_self_direct(struct cell *c) { +void iact_self_direct_dp(struct cell *c) { int i, j, k, count = c->count; double xi[3] = {0.0, 0.0, 0.0}; float ai[3] = {0.0, 0.0, 0.0}, mi, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; + struct cell *cp, *cps; struct part *parts = c->parts; + +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (count == 0) error("Empty cell !"); + +#endif + + /* If the cell is split, interact each progeny with itself, and with + each of its siblings. */ + if (c->split) { + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + iact_self_direct_dp(cp); + for (cps = cp->sibling; cps != c->sibling; cps = cps->sibling) + iact_pair(cp, cps); + } + + /* Otherwise, compute the interactions directly. */ + } else { + + /* Loop over all particles... */ + for (i = 0; i < count; i++) { + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts[i].x[k]; + ai[k] = 0.0; + } + mi = parts[i].mass; + + /* Loop over every following particle. */ + for (j = i + 1; j < count; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#if ICHECK >= 0 + if (parts[i].id == ICHECK) + message("[NEW] Interaction with particle id= %d (self i)", + parts[j].id); + + if (parts[j].id == ICHECK) + message("[NEW] Interaction with particle id= %d (self j)", + parts[i].id); +#endif + + } /* loop over every other particle. */ + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts[i].a[k] += ai[k]; + + } /* loop over all particles. */ + + } /* otherwise, compute interactions directly. */ +} + +void iact_self_direct(struct cell *c) { + int i, j, k, count = c->count; + float xi[3] = {0.0, 0.0, 0.0}; + float ai[3] = {0.0, 0.0, 0.0}, mi, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; struct cell *cp, *cps; + struct part_local { + float x[3]; + float a[3]; + float mass; + } parts[count] __attribute__((aligned(64)));; + double loc[3]; #ifdef SANITY_CHECKS @@ -768,6 +954,16 @@ void iact_self_direct(struct cell *c) { /* Otherwise, compute the interactions directly. */ } else { + /* Init the local copies of the particles. */ + loc[0] = c->loc[0]; loc[1] = c->loc[1]; loc[2] = c->loc[2]; + for (k = 0; k < count; k++) { + for (j = 0; j < 3; j++) { + parts[k].x[j] = c->parts[k].x[j] - loc[j]; + parts[k].a[j] = 0.0f; + } + parts[k].mass = c->parts[k].mass; + } + /* Loop over all particles... */ for (i = 0; i < count; i++) { @@ -814,6 +1010,11 @@ void iact_self_direct(struct cell *c) { } /* loop over all particles. */ + /* Copy the local particle data back. */ + for (k = 0; k < count; k++) { + for (j = 0; j < 3; j++) + c->parts[k].a[j] = parts[k].a[j]; + } } /* otherwise, compute interactions directly. */ } @@ -1212,8 +1413,7 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { for (k = 0; k < task_type_count; k++) task_timers[k] = 0; /* Initialize the scheduler. */ - qsched_init(&s, nr_threads, qsched_flag_noreown | - qsched_flag_pthread); + qsched_init(&s, nr_threads, qsched_flag_noreown); /* Init and fill the particle array. */ if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL)