diff --git a/examples/Makefile.am b/examples/Makefile.am index 11013c9414642beb4672e8cec435854ef39507ec..e9a7a59112fa3cea392ef5c7b035454b00aa4884 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -31,7 +31,7 @@ AM_CFLAGS = -g -O3 -Wall -Werror -I../src -ffast-math -fstrict-aliasing \ AM_LDFLAGS = -lm # -fsanitize=address # Set-up the library -bin_PROGRAMS = test test_qr test_bh test_bh_2 test_bh_3 test_bh_sorted +bin_PROGRAMS = test test_qr test_bh test_bh_sorted # Sources for test test_SOURCES = test.c @@ -48,16 +48,6 @@ test_bh_SOURCES = test_bh.c test_bh_CFLAGS = $(AM_CFLAGS) test_bh_LDADD = ../src/.libs/libquicksched.a -# Sources for test_bh_2 -test_bh_2_SOURCES = test_bh_2.c -test_bh_2_CFLAGS = $(AM_CFLAGS) -test_bh_2_LDADD = ../src/.libs/libquicksched.a - -# Sources for test_bh_3 -test_bh_3_SOURCES = test_bh_3.c -test_bh_3_CFLAGS = $(AM_CFLAGS) -test_bh_3_LDADD = ../src/.libs/libquicksched.a - # Sources for test_bh_sorted test_bh_sorted_SOURCES = test_bh_sorted.c test_bh_sorted_CFLAGS = $(AM_CFLAGS) diff --git a/examples/test_bh.c b/examples/test_bh.c new file mode 100644 index 0000000000000000000000000000000000000000..63b592c18cebf23d080258cd43530d72d6947904 --- /dev/null +++ b/examples/test_bh.c @@ -0,0 +1,1515 @@ +/******************************************************************************* + * This file is part of QuickSched. + * Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * +* *****************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Standard includes. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <math.h> +#include <float.h> +#include <limits.h> +#include <omp.h> +#include <fenv.h> + +/* Local includes. */ +#include "quicksched.h" + +/* Some local constants. */ +#define cell_pool_grow 1000 +#define cell_maxparts 1 +#define task_limit 1e8 +#define const_G 1 // 6.6738e-8 +#define dist_min 0.5 /* Used for legacy walk only */ +#define dist_cutoff_ratio 1.5 +#define iact_pair_direct iact_pair_direct_sorted + +#define ICHECK -1 +#define NO_SANITY_CHECKS +#define NO_COM_AS_TASK +#define COUNTERS +#define MANY_MULTIPOLES + +/** Data structure for the particles. */ +struct part { + double x[3]; + // union { + float a[3]; + float a_legacy[3]; + float a_exact[3]; + // }; + float mass; + int id; +}; // __attribute__((aligned(64))); + +/** Data structure for the sorted particle positions. */ +struct index { + int ind; + float d; +}; + +struct multipole { + double com[3]; + float mass; +}; + +/** Data structure for the BH tree cell. */ +struct cell { + double loc[3]; + double h; + + int count; + unsigned short int split, sorted; + struct part *parts; + + struct cell *firstchild; /* Next node if opening */ + struct cell *sibling; /* Next node */ + + /* We keep both CoMs and masses to make sure the comp_com calculation is + * correct (use an anonymous union to keep variable names compact). */ + union { + + /* Information for the legacy walk */ + struct multipole legacy; + + /* Information for the QuickShed walk */ + struct multipole new; + }; + + int res, com_tid; + struct index *indices; + +} __attribute__((aligned(128))); + +/** Task types. */ +enum task_type { + task_type_self = 0, + task_type_pair, + task_type_self_pc, + task_type_pair_direct, + task_type_com, + task_type_count +}; + +#ifdef COUNTERS +int count_direct_unsorted; +int count_direct_sorted_pp; +int count_direct_sorted_pm_i; +int count_direct_sorted_pm_j; +#endif + +/** Per-type timers. */ +ticks task_timers[task_type_count]; + +/** Global variable for the pool of allocated cells. */ +struct cell *cell_pool = NULL; + +/** + * @brief Get a #cell from the pool. + */ +struct cell *cell_get() { + + struct cell *res; + int k; + + /* Allocate a new batch? */ + if (cell_pool == NULL) { + + /* Allocate the cell array. */ + if ((cell_pool = + (struct cell *)calloc(cell_pool_grow, sizeof(struct cell))) == + NULL) + error("Failed to allocate fresh batch of cells."); + + /* Link them up via their progeny pointers. */ + for (k = 1; k < cell_pool_grow; k++) + cell_pool[k - 1].firstchild = &cell_pool[k]; + } + + /* Pick a cell off the pool. */ + res = cell_pool; + cell_pool = cell_pool->firstchild; + + /* Clean up a few things. */ + res->res = qsched_res_none; + res->firstchild = 0; + + /* Return the cell. */ + return res; +} + +/** + * @brief Compute the center of mass of a given cell. + * + * @param c The #cell. + */ +void comp_com(struct cell *c) { + + int k, count = c->count; + struct cell *cp; + struct part *parts = c->parts; + double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; + + if (c->split) { + + /* Loop over the projenitors and collect the multipole information. */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + float cp_mass = cp->new.mass; + com[0] += cp->new.com[0] * cp_mass; + com[1] += cp->new.com[1] * cp_mass; + com[2] += cp->new.com[2] * cp_mass; + mass += cp_mass; + } + + /* Otherwise, collect the multipole from the particles. */ + } else { + + for (k = 0; k < count; k++) { + float p_mass = parts[k].mass; + com[0] += parts[k].x[0] * p_mass; + com[1] += parts[k].x[1] * p_mass; + com[2] += parts[k].x[2] * p_mass; + mass += p_mass; + } + } + + /* Store the COM data, if any was collected. */ + if (mass > 0.0) { + float imass = 1.0f / mass; + c->new.com[0] = com[0] * imass; + c->new.com[1] = com[1] * imass; + c->new.com[2] = com[2] * imass; + c->new.mass = mass; + } else { + c->new.com[0] = 0.0; + c->new.com[1] = 0.0; + c->new.com[2] = 0.0; + c->new.mass = 0.0; + } +} + +/** + * @brief Sort the parts into eight bins along the given pivots and + * fill the multipoles. Also adds the hierarchical resources + * to the sched. + * + * @param c The #cell to be split. + * @param N The total number of parts. + * @param s The #sched to store the resources. + */ +void cell_split(struct cell *c, struct qsched *s) { + + int i, j, k, kk, count = c->count; + struct part temp, *parts = c->parts; + struct cell *cp; + int left[8], right[8]; + double pivot[3]; + static struct cell *root = NULL; + struct cell *progenitors[8]; + + /* Set the root cell. */ + if (root == NULL) { + root = c; + c->sibling = 0; + } + + /* Add a resource for this cell if it doesn't have one yet. */ + if (c->res == qsched_res_none) + c->res = qsched_addres(s, qsched_owner_none, qsched_res_none); + +/* Attach a center-of-mass task to the cell. */ +#ifdef COM_AS_TASK + if (count > 0) + c->com_tid = qsched_addtask(s, task_type_com, task_flag_none, &c, + sizeof(struct cell *), 1); +#endif + + /* Does this cell need to be split? */ + if (count > cell_maxparts) { + + /* Mark this cell as split. */ + c->split = 1; + + /* Create the progeny. */ + for (k = 0; k < 8; k++) { + progenitors[k] = cp = cell_get(); + cp->loc[0] = c->loc[0]; + cp->loc[1] = c->loc[1]; + cp->loc[2] = c->loc[2]; + cp->h = c->h * 0.5; + cp->res = qsched_addres(s, qsched_owner_none, c->res); + if (k & 4) cp->loc[0] += cp->h; + if (k & 2) cp->loc[1] += cp->h; + if (k & 1) cp->loc[2] += cp->h; + } + + /* Init the pivots. */ + for (k = 0; k < 3; k++) pivot[k] = c->loc[k] + c->h * 0.5; + + /* Split along the x-axis. */ + i = 0; + j = count - 1; + while (i <= j) { + while (i <= count - 1 && parts[i].x[0] < pivot[0]) i += 1; + while (j >= 0 && parts[j].x[0] >= pivot[0]) j -= 1; + if (i < j) { + temp = parts[i]; + parts[i] = parts[j]; + parts[j] = temp; + } + } + left[1] = i; + right[1] = count - 1; + left[0] = 0; + right[0] = j; + + /* Split along the y axis, twice. */ + for (k = 1; k >= 0; k--) { + i = left[k]; + j = right[k]; + while (i <= j) { + while (i <= right[k] && parts[i].x[1] < pivot[1]) i += 1; + while (j >= left[k] && parts[j].x[1] >= pivot[1]) j -= 1; + if (i < j) { + temp = parts[i]; + parts[i] = parts[j]; + parts[j] = temp; + } + } + left[2 * k + 1] = i; + right[2 * k + 1] = right[k]; + left[2 * k] = left[k]; + right[2 * k] = j; + } + + /* Split along the z axis, four times. */ + for (k = 3; k >= 0; k--) { + i = left[k]; + j = right[k]; + while (i <= j) { + while (i <= right[k] && parts[i].x[2] < pivot[2]) i += 1; + while (j >= left[k] && parts[j].x[2] >= pivot[2]) j -= 1; + if (i < j) { + temp = parts[i]; + parts[i] = parts[j]; + parts[j] = temp; + } + } + left[2 * k + 1] = i; + right[2 * k + 1] = right[k]; + left[2 * k] = left[k]; + right[2 * k] = j; + } + + /* Store the counts and offsets. */ + for (k = 0; k < 8; k++) { + progenitors[k]->count = right[k] - left[k] + 1; + progenitors[k]->parts = &c->parts[left[k]]; + } + + /* Find the first non-empty progenitor */ + for (k = 0; k < 8; k++) + if (progenitors[k]->count > 0) { + c->firstchild = progenitors[k]; + break; + } + +#ifdef SANITY_CHECKS + if (c->firstchild == NULL) + error("Cell has been split but all progenitors have 0 particles"); +#endif + + /* Prepare the pointers. */ + for (k = 0; k < 8; k++) { + + /* Find the next non-empty sibling */ + for (kk = k + 1; kk < 8; ++kk) { + if (progenitors[kk]->count > 0) { + progenitors[k]->sibling = progenitors[kk]; + break; + } + } + + /* No non-empty sibling ? Go back a level */ + if (kk == 8) progenitors[k]->sibling = c->sibling; + } + + /* Recurse. */ + for (k = 0; k < 8; k++) + if (progenitors[k]->count > 0) cell_split(progenitors[k], s); + +/* Link the COM tasks. */ +#ifdef COM_AS_TASK + for (k = 0; k < 8; k++) + if (progenitors[k]->count > 0) + qsched_addunlock(s, progenitors[k]->com_tid, c->com_tid); +#endif + + /* Otherwise, we're at a leaf, so create the cell's particle-cell task. */ + } else { + struct cell *data[2] = {root, c}; + int tid = qsched_addtask(s, task_type_self_pc, task_flag_none, data, + 2 * sizeof(struct cell *), 1); + qsched_addlock(s, tid, c->res); +#ifdef COM_AS_TASK + qsched_addunlock(s, root->com_tid, tid); +#endif + } /* does the cell need to be split? */ + +/* Compute the cell's center of mass. */ +#ifndef COM_AS_TASK + comp_com(c); +#endif + + /* Set this cell's resources ownership. */ + qsched_res_own(s, c->res, + s->nr_queues * (c->parts - root->parts) / root->count); +} + +/* -------------------------------------------------------------------------- */ +/* New tree walk */ +/* -------------------------------------------------------------------------- */ + +/** + * @brief Interacts all particles in ci with the monopole in cj + */ +static inline void make_interact_pc(struct cell *leaf, struct cell *cj) { + + int j, k; + double com[3] = {0.0, 0.0, 0.0}; + float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; + +#ifdef SANITY_CHECKS + + /* Sanity checks */ + if (leaf->count == 0) error("Empty cell!"); + + /* Sanity check. */ + if (cj->new.mass == 0.0) { + message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1], + cj->new.com[2], cj->count, cj, cj->split, cj->sibling); + + for (j = 0; j < cj->count; ++j) + message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id); + + error("com does not seem to have been set."); + } + +#endif + + /* Init the com's data. */ + for (k = 0; k < 3; k++) com[k] = cj->new.com[k]; + mcom = cj->new.mass; + + /* Loop over every particle in leaf. */ + for (j = 0; j < leaf->count; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = com[k] - leaf->parts[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = mcom * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) leaf->parts[j].a[k] += w * dx[k]; + +#if ICHECK >= 0 + if (leaf->parts[j].id == ICHECK) + printf("[DEBUG] cell [%f,%f,%f] interacting with cj->loc=[%f,%f,%f] " + "m=%f h=%f\n", + leaf->loc[0], leaf->loc[1], leaf->loc[2], cj->loc[0], cj->loc[1], + cj->loc[2], mcom, cj->h); + + if (leaf->parts[j].id == ICHECK) + printf("[NEW] Interaction with monopole a=( %e %e %e ) h= %f Nj= %d m_j= " + "%f\n", + w * dx[0], w * dx[1], w * dx[2], cj->h, cj->count, mcom); +#endif + + } /* loop over every particle. */ +} + +/** + * @brief Checks whether the cell leaf is a subvolume of the cell c + */ +static inline int is_inside(struct cell *leaf, struct cell *c) { + return (leaf->parts >= c->parts) && (leaf->parts < c->parts + c->count); +} + +/** + * @brief Checks whether the cells are direct neighbours ot not + */ +static inline int are_neighbours_different_size(struct cell *ci, + struct cell *cj) { + + int k; + float dx[3]; + double cih = ci->h, cjh = cj->h; + + /* Maximum allowed distance */ + float min_dist = 0.5 * (cih + cjh); + + /* (Manhattan) Distance between the cells */ + for (k = 0; k < 3; k++) { + float center_i = ci->loc[k] + 0.5 * cih; + float center_j = cj->loc[k] + 0.5 * cjh; + dx[k] = fabs(center_i - center_j); + } + + return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); +} + +/** + * @brief Checks whether the cells are direct neighbours ot not. Both cells have + * to be of the same size + */ +static inline int are_neighbours(struct cell *ci, struct cell *cj) { + + int k; + float dx[3]; + +#ifdef SANITY_CHECKS + if (ci->h != cj->h) + error(" Cells of different size in distance calculation."); +#endif + + /* Maximum allowed distance */ + float min_dist = ci->h; + + /* (Manhattan) Distance between the cells */ + for (k = 0; k < 3; k++) { + float center_i = ci->loc[k]; + float center_j = cj->loc[k]; + dx[k] = fabs(center_i - center_j); + } + + return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); +} + +/** + * @brief Compute the interactions between all particles in a cell leaf + * and the center of mass of all the cells in a part of the tree + * described by ci and cj + * + * @param ci The #cell containing the particle + * @param cj The #cell containing the center of mass. + */ +static inline void iact_pair_pc(struct cell *ci, struct cell *cj, + struct cell *leaf) { + + struct cell *cp, *cps; + +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (ci->count == 0 || cj->count == 0) error("Empty cell !"); + + /* Sanity check */ + if (ci == cj) + error("The impossible has happened: pair interaction between a cell and " + "itself."); + + /* Sanity check */ + if (!is_inside(leaf, ci)) + error("The impossible has happened: The leaf is not within ci"); + + /* Are the cells direct neighbours? */ + if (!are_neighbours(ci, cj)) error("Cells are not neighours"); + + /* Are both cells split ? */ + if (!ci->split || !cj->split) error("One of the cells is not split !"); +#endif + + /* Let's find in which subcell of ci the leaf is */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { + + if (is_inside(leaf, cp)) break; + } + + if (are_neighbours_different_size(cp, cj)) { + + /* Now interact this subcell with all subcells of cj */ + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { + + /* Check whether we have to recurse or can directly jump to the multipole + * calculation */ + if (are_neighbours(cp, cps)) { + + /* We only recurse if the children are split */ + if (cp->split && cps->split) { + iact_pair_pc(cp, cps, leaf); + } + + } else { + make_interact_pc(leaf, cps); + } + } + } else { + + /* If cp is not a neoghbour of cj, we can directly interact with the + * multipoles */ + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { + + make_interact_pc(leaf, cps); + } + } +} + +/** + * @brief Compute the interactions between all particles in a leaf and + * and all the monopoles in the cell c + * + * @param c The #cell containing the monopoles + * @param leaf The #cell containing the particles + */ +static inline void iact_self_pc(struct cell *c, struct cell *leaf) { + + struct cell *cp, *cps; + +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (c->count == 0) error("Empty cell !"); + + if (!c->split) error("Cell is not split !"); + +#endif + + /* Find in which subcell of c the leaf is */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + + /* Only recurse if the leaf is in this part of the tree */ + if (is_inside(leaf, cp)) break; + } + + if (cp->split) { + + /* Recurse if the cell can be split */ + iact_self_pc(cp, leaf); + + /* Now, interact with every other subcell */ + for (cps = c->firstchild; cps != c->sibling; cps = cps->sibling) { + + /* Since cp and cps will be direct neighbours it is only worth recursing + */ + /* if the cells can both be split */ + if (cp != cps && cps->split) iact_pair_pc(cp, cps, leaf); + } + } +} + +/** + * @brief Starts the recursive tree walk of a given leaf + */ +void init_multipole_walk(struct cell *root, struct cell *leaf) { + + /* Pre-fetch the leaf's particles */ + __builtin_prefetch(leaf->parts, 1, 3); + + /* Start the recursion */ + iact_self_pc(root, leaf); +} + +/** + * @brief Compute the interactions between all particles in a cell + * and all particles in the other cell (N^2 algorithm) + * + * @param ci The #cell containing the particles. + * @param cj The #cell containing the other particles + */ +static inline void iact_pair_direct(struct cell *ci, struct cell *cj) { + + int i, j, k; + int count_i = ci->count, count_j = cj->count; + struct part *parts_i = ci->parts, *parts_j = cj->parts; + double xi[3]; + float dx[3], ai[3], mi, mj, r2, w, ir; + +#ifdef SANITY_CHECKS + + /* Bad stuff will happen if cell sizes are different */ + if (ci->h != cj->h) + error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h); + +#endif + + /* Loop over all particles in ci... */ + for (i = 0; i < count_i; i++) { + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts_i[i].x[k]; + ai[k] = 0.0f; + } + mi = parts_i[i].mass; + + /* Loop over every particle in the other cell. */ + for (j = 0; j < count_j; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts_j[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts_j[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts_j[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#ifdef COUNTERS + ++count_direct_unsorted; +#endif + +#if ICHECK >= 0 && 0 + if (parts_i[i].id == ICHECK) + printf( + "[NEW] Interaction with particle id= %d (pair i) a=( %e %e %e )\n", + parts_j[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]); + + if (parts_j[j].id == ICHECK) + printf( + "[NEW] Interaction with particle id= %d (pair j) a=( %e %e %e )\n", + parts_i[i].id, mj * w * dx[0], mj * w * dx[1], mj * w * dx[2]); +#endif + + } /* loop over every other particle. */ + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k]; + + } /* loop over all particles. */ +} + +/** + * @brief Decides whether two cells use the direct summation interaction or the + * multipole interactions + * + * @param ci The #cell. + * @param cj The other #cell. + */ +void iact_pair(struct cell *ci, struct cell *cj) { + + struct cell *cp, *cps; + +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (ci->count == 0 || cj->count == 0) error("Empty cell !"); + + /* Bad stuff will happen if cell sizes are different */ + if (ci->h != cj->h) + error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h); + + /* Sanity check */ + if (ci == cj) + error("The impossible has happened: pair interaction between a cell and " + "itself."); + +#endif + + /* Are the cells direct neighbours? */ + if (are_neighbours(ci, cj)) { + + /* Are both cells split ? */ + if (ci->split && cj->split) { + + /* Let's split both cells and build all possible pairs */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { + + /* If the cells are neighbours, recurse. */ + if (are_neighbours(cp, cps)) { + iact_pair(cp, cps); + } + } + } + } else {/* Otherwise, compute the interactions at this level directly. */ + iact_pair_direct(ci, cj); + } + } +} + +/** + * @brief Compute the interactions between all particles in a cell. + * + * @param c The #cell. + */ +void iact_self_direct(struct cell *c) { + int i, j, k, count = c->count; + double xi[3] = {0.0, 0.0, 0.0}; + float ai[3] = {0.0, 0.0, 0.0}, mi, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; + struct part *parts = c->parts; + struct cell *cp, *cps; + +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (count == 0) error("Empty cell !"); + +#endif + + /* If the cell is split, interact each progeny with itself, and with + each of its siblings. */ + if (c->split) { + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + iact_self_direct(cp); + for (cps = cp->sibling; cps != c->sibling; cps = cps->sibling) + iact_pair(cp, cps); + } + + /* Otherwise, compute the interactions directly. */ + } else { + + /* Loop over all particles... */ + for (i = 0; i < count; i++) { + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts[i].x[k]; + ai[k] = 0.0; + } + mi = parts[i].mass; + + /* Loop over every following particle. */ + for (j = i + 1; j < count; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#if ICHECK >= 0 + if (parts[i].id == ICHECK) + message("[NEW] Interaction with particle id= %d (self i)", + parts[j].id); + + if (parts[j].id == ICHECK) + message("[NEW] Interaction with particle id= %d (self j)", + parts[i].id); +#endif + + } /* loop over every other particle. */ + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts[i].a[k] += ai[k]; + + } /* loop over all particles. */ + + } /* otherwise, compute interactions directly. */ +} + +/** + * @brief Create the tasks for the cell pair/self. + * + * @param s The #sched in which to create the tasks. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void create_tasks(struct qsched *s, struct cell *ci, struct cell *cj) { + + qsched_task_t tid; + struct cell *data[2], *cp, *cps; + +#ifdef SANITY_CHECKS + + /* If either cell is empty, stop. */ + if (ci->count == 0 || (cj != NULL && cj->count == 0)) error("Empty cell !"); + +#endif + + /* Single cell? */ + if (cj == NULL) { + + /* Is this cell split and above the task limit ? */ + if (ci->split && ci->count > task_limit / ci->count) { + + /* Loop over each of this cell's progeny. */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { + + /* Make self-interaction task. */ + create_tasks(s, cp, NULL); + + /* Make all pair-interaction tasks. */ + for (cps = cp->sibling; cps != ci->sibling; cps = cps->sibling) + create_tasks(s, cp, cps); + } + + /* Otherwise, add a self-interaction task. */ + } else { + + /* Set the data. */ + data[0] = ci; + data[1] = NULL; + + /* Create the task. */ + tid = + qsched_addtask(s, task_type_self, task_flag_none, data, + sizeof(struct cell *) * 2, ci->count * ci->count / 2); + + /* Add the resource (i.e. the cell) to the new task. */ + qsched_addlock(s, tid, ci->res); + +/* If this call might recurse, add a dependency on the cell's COM + task. */ +#ifdef COM_AS_TASK + if (ci->split) qsched_addunlock(s, ci->com_tid, tid); +#endif + } + + /* Otherwise, it's a pair. */ + } else { + + /* Are the cells NOT neighbours ? */ + if (!are_neighbours(ci, cj)) { + + } else {/* Cells are direct neighbours */ + + /* Are both cells split ? */ + if (ci->split && cj->split && ci->count > task_limit / cj->count) { + + /* Let's split both cells and build all possible pairs */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { + /* Recurse */ + create_tasks(s, cp, cps); + } + } + /* Otherwise, at least one of the cells is not split, build a direct + * interaction. */ + } else { + + /* Set the data. */ + data[0] = ci; + data[1] = cj; + + /* Create the task. */ + tid = qsched_addtask(s, task_type_pair, task_flag_none, data, + sizeof(struct cell *) * 2, ci->count * cj->count); + + /* Add the resources. */ + qsched_addlock(s, tid, ci->res); + qsched_addlock(s, tid, cj->res); + } + } /* Cells are direct neighbours */ + } /* Otherwise it's a pair */ +} + +/* -------------------------------------------------------------------------- */ +/* Legacy tree walk */ +/* -------------------------------------------------------------------------- */ + +/** + * @brief Compute the center of mass of a given cell recursively. + * + * @param c The #cell. + */ +void legacy_comp_com(struct cell *c, int *countCoMs) { + + int k, count = c->count; + struct cell *cp; + struct part *p, *parts = c->parts; + double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; + + ++(*countCoMs); + + /* Is the cell split? */ + if (c->split) { + + /* Loop over the progeny. */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + /* Recurse */ + legacy_comp_com(cp, countCoMs); + + /* Collect multipole information */ + float cp_mass = cp->legacy.mass; + com[0] += cp->legacy.com[0] * cp_mass; + com[1] += cp->legacy.com[1] * cp_mass; + com[2] += cp->legacy.com[2] * cp_mass; + mass += cp_mass; + } + + /* Otherwise, collect the multipole from local data. */ + } else { + + for (k = 0; k < count; k++) { + p = &parts[k]; + float p_mass = p->mass; + com[0] += p->x[0] * p_mass; + com[1] += p->x[1] * p_mass; + com[2] += p->x[2] * p_mass; + mass += p_mass; + } + } + + /* Finish multipole calculation */ + if (mass > 0.0) { + float imass = 1.0f / mass; + c->legacy.com[0] = com[0] * imass; + c->legacy.com[1] = com[1] * imass; + c->legacy.com[2] = com[2] * imass; + c->legacy.mass = mass; + } else { + c->legacy.com[0] = 0.0; + c->legacy.com[1] = 0.0; + c->legacy.com[2] = 0.0; + c->legacy.mass = 0.0; + } +} + +/** + * @brief Interacts a particle with a cell recursively using the original B-H + * tree walk procedure + * + * @param parts The array of particles + * @param i The particle of interest + * @param root The root of the tree under which we will search. + * @param monitor If set to @c parts[i].id, will produce debug output when + * ICHECK is set. + * @param cell The cell the particle interacts with + */ +void legacy_interact(struct part *parts, int i, struct cell *root, int monitor, + int *countMultipoles, int *countPairs) { + + int j, k; + float r2, dx[3], ir, w; + float a[3] = {0.0, 0.0, 0.0}; + double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]}; + int pid = parts[i].id; + struct cell *cell = root; + + /* Traverse the cells of the tree. */ + while (cell != NULL) { + + /* Are we in a leaf ? */ + if (!cell->split) { + + /* Interact the particle with the particles in the leaf */ + for (j = 0; j < cell->count; ++j) { + if (cell->parts[j].id == pid) continue; + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = cell->parts[j].x[k] - pix[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = cell->parts[j].mass * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) a[k] += w * dx[k]; + + (*countPairs)++; + +#if ICHECK >= 0 + if (pid == monitor) + message("[BH_] Interaction with particle id= %d a=( %e %e %e ) h= %f " + "loc=( %e %e %e )\n", + cell->parts[j].id, w * dx[0], w * dx[1], w * dx[2], cell->h, + cell->loc[0], cell->loc[1], cell->loc[2]); +#endif + } + + cell = cell->sibling; + } else { + + /* We are in a node */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = cell->legacy.com[k] - pix[k]; + r2 += dx[k] * dx[k]; + } + +#if ICHECK >= 0 + if (pid == monitor) + message("This is a node with %d particles h= %f. r= %f theta= %f", + cell->count, cell->h, sqrt(r2), dist_min); +#endif + + /* Is the cell far enough ? */ + if (dist_min * dist_min * r2 < cell->h * cell->h) { + +#if ICHECK >= 0 + if (pid == monitor) printf("Recursing...\n"); +#endif + cell = cell->firstchild; + continue; + } + +#if ICHECK >= 0 + if (pid == monitor) + message("[BH_] Can interact with the monopole. x= %f %f %f m= %f h= %f", + cell->legacy.com[0], cell->legacy.com[1], cell->legacy.com[2], + cell->legacy.mass, cell->h); +#endif + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = cell->legacy.mass * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) a[k] += w * dx[k]; + + (*countMultipoles)++; + + /* Move to the next node */ + cell = cell->sibling; + } + } + + /* Store the locally computed acceleration back to the particle. */ + for (k = 0; k < 3; k++) parts[i].a_legacy[k] += a[k]; +} + +/** + * @brief Does a tree walk as in the B-H original work for all particles + * + * @param N The number of particles + * @param parts The array of particles + * @param root The root cell of the tree + * @param monitor ID of the particle to monitor and output interactions to + * stdout + */ +void legacy_tree_walk(int N, struct part *parts, struct cell *root, int monitor, + int *countMultipoles, int *countPairs, int *countCoMs) { + + int i; + + /* Compute multipoles (recursively) */ + legacy_comp_com(root, countCoMs); + + //#pragma omp parallel for + for (i = 0; i < N; ++i) { + if (parts[i].id == monitor) + message("tree walk for particle %d x= %f %f %f", parts[i].id, + parts[i].x[0], parts[i].x[1], parts[i].x[2]); + + legacy_interact(parts, i, root, monitor, countMultipoles, countPairs); + + if (parts[i].id == monitor) + message("[check] legacy acceleration for particle %d a= %.3e %.3e %.3e", + parts[i].id, parts[i].a_legacy[0], parts[i].a_legacy[1], + parts[i].a_legacy[2]); + } +} + +/* -------------------------------------------------------------------------- */ +/* Exact interaction */ +/* -------------------------------------------------------------------------- */ + +/** + * @brief Solve the particle interactions using the stupid N^2 algorithm + * + * @param N The number of particles + * @param parts The array of particles + */ +void interact_exact(int N, struct part *parts, int monitor) { + + int i, j, k; + float ir, w, r2, dx[3]; + + /* Loop over all particles. */ + for (i = 0; i < N; ++i) { + + /* Some things to store locally. */ + double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]}; + float mi = parts[i].mass; + + /* Loop over every other particle. */ + for (j = i + 1; j < N; ++j) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = parts[j].x[k] - pix[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts[j].a_exact[k] -= wdx * mi; + parts[i].a_exact[k] += wdx * parts[j].mass; + } + } + } + +#if ICHECK >= 0 + for (i = 0; i < N; ++i) + if (parts[i].id == monitor) + message("[check] exact acceleration for particle %d a= %.3e %.3e %.3e\n", + parts[i].id, parts[i].a_exact[0], parts[i].a_exact[1], + parts[i].a_exact[2]); +#endif +} + +/** + * @brief Set up and run a task-based Barnes-Hutt N-body solver. + * + * @param N The number of random particles to use. + * @param nr_threads Number of threads to use. + * @param runs Number of force evaluations to use as a benchmark. + * @param fileName Input file name. If @c NULL or an empty string, random + * particle positions will be used. + */ +void test_bh(int N, int nr_threads, int runs, char *fileName) { + + int i, k; + struct cell *root; + struct part *parts; + FILE *file; + struct qsched s; + ticks tic, toc_run, tot_setup = 0, tot_run = 0; + int countMultipoles = 0, countPairs = 0, countCoMs = 0; + + /* Runner function. */ + void runner(int type, void * data) { + + ticks tic = getticks(); + + /* Decode the data. */ + struct cell **d = (struct cell **)data; + + /* Decode and execute the task. */ + switch (type) { + case task_type_self: + iact_self_direct(d[0]); + break; + case task_type_pair: + iact_pair(d[0], d[1]); + break; + case task_type_pair_direct: + iact_pair_direct(d[0], d[1]); + break; + case task_type_self_pc: + init_multipole_walk(d[0], d[1]); + break; + case task_type_com: + comp_com(d[0]); + break; + default: + error("Unknown task type."); + } + + atomic_add(&task_timers[type], getticks() - tic); + } + + /* Initialize the per-task type timers. */ + for (k = 0; k < task_type_count; k++) task_timers[k] = 0; + + /* Initialize the scheduler. */ + qsched_init(&s, nr_threads, qsched_flag_noreown); + + /* Init and fill the particle array. */ + if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL) + error("Failed to allocate particle buffer."); + + /* If no input file was specified, generate random particle positions. */ + if (fileName == NULL || fileName[0] == 0) { + for (k = 0; k < N; k++) { + parts[k].id = k; + parts[k].x[0] = ((double)rand()) / RAND_MAX; + parts[k].x[1] = ((double)rand()) / RAND_MAX; + parts[k].x[2] = ((double)rand()) / RAND_MAX; + parts[k].mass = ((double)rand()) / RAND_MAX; + parts[k].a_legacy[0] = 0.0; + parts[k].a_legacy[1] = 0.0; + parts[k].a_legacy[2] = 0.0; + } + + /* Otherwise, read them from a file. */ + } else { + file = fopen(fileName, "r"); + if (file) { + for (k = 0; k < N; k++) { + if (fscanf(file, "%d", &parts[k].id) != 1) + error("Failed to read ID of part %i.", k); + if (fscanf(file, "%lf%lf%lf", &parts[k].x[0], &parts[k].x[1], + &parts[k].x[2]) != + 3) + error("Failed to read position of part %i.", k); + if (fscanf(file, "%f", &parts[k].mass) != 1) + error("Failed to read mass of part %i.", k); + } + fclose(file); + } + } + + /* Init the cells. */ + root = cell_get(); + root->loc[0] = 0.0; + root->loc[1] = 0.0; + root->loc[2] = 0.0; + root->h = 1.0; + root->count = N; + root->parts = parts; + cell_split(root, &s); + + /* Iterate over the cells and get the average number of particles + per leaf. */ + struct cell *c = root; + int nr_leaves = 0; + while (c != NULL) { + if (!c->split) { + nr_leaves++; + c = c->sibling; + } else { + c = c->firstchild; + } + } + message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves); + +#if ICHECK > 0 + printf("----------------------------------------------------------\n"); + + /* Do a N^2 interactions calculation */ + + ticks tic_exact = getticks(); + interact_exact(N, parts, ICHECK); + ticks toc_exact = getticks(); + + printf("Exact calculation (1 thread) took %lli (= %e) ticks\n", + toc_exact - tic_exact, (float)(toc_exact - tic_exact)); + + printf("----------------------------------------------------------\n"); +#endif + + /* Create the tasks. */ + tic = getticks(); + create_tasks(&s, root, NULL); + tot_setup += getticks() - tic; + + /* Dump the number of tasks. */ + message("total nr of tasks: %i.", s.count); + message("total nr of deps: %i.", s.count_deps); + message("total nr of res: %i.", s.count_res); + message("total nr of locks: %i.", s.count_locks); + message("total nr of uses: %i.", s.count_uses); + int counts[task_type_count]; + for (k = 0; k < task_type_count; k++) counts[k] = 0; + for (k = 0; k < s.count; k++) counts[s.tasks[k].type] += 1; + + // char buffer[200]; + // sprintf(buffer, "timings_legacy_%d_%d.dat", cell_maxparts, nr_threads); + // FILE *fileTime = fopen(buffer, "w"); + + /* Loop over the number of runs. */ + for (k = 0; k < 0 /* runs */; k++) { + + countMultipoles = 0; + countPairs = 0; + countCoMs = 0; + + /* Execute the legacy walk. */ + tic = getticks(); + legacy_tree_walk(N, parts, root, ICHECK, &countMultipoles, &countPairs, + &countCoMs); + toc_run = getticks(); + + /* Dump some timings. */ + message("%ith legacy run took %lli (= %e) ticks...", k, toc_run - tic, + (float)(toc_run - tic)); + tot_run += toc_run - tic; + // fprintf(fileTime, "%lli %e\n", toc_run - tic, (float)(toc_run - tic)); + } + +// fclose(fileTime); + +#if ICHECK >= 0 + message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK, + root->parts[ICHECK].a[0], root->parts[ICHECK].a[1], + root->parts[ICHECK].a[2]); +#endif + printf("task counts: [ %8s %8s %8s %8s %8s ]\n", "self", "pair", "m-poles", + "direct", "CoMs"); + printf("task counts: [ %8i %8i %8i %8i %8i ] (legacy).\n", 0, 0, + countMultipoles, countPairs, countCoMs); + printf("task counts: [ "); + for (k = 0; k < task_type_count; k++) printf("%8i ", counts[k]); + printf("] (new).\n"); + + /* Loop over the number of runs. */ + for (k = 0; k < runs; k++) { + + for (i = 0; i < N; ++i) { + parts[i].a[0] = 0.0; + parts[i].a[1] = 0.0; + parts[i].a[2] = 0.0; + } + + struct cell *finger = root; + while (finger != NULL) { + finger->sorted = 0; + if (finger->split) + finger = finger->firstchild; + else + finger = finger->sibling; + } + + /* Execute the tasks. */ + tic = getticks(); + qsched_run(&s, nr_threads, runner); + toc_run = getticks(); + message("%ith run took %lli (= %e) ticks...", k, toc_run - tic, + (float)(toc_run - tic)); + tot_run += toc_run - tic; + } + + message("[check] root mass= %f %f", root->legacy.mass, root->new.mass); + message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]); + message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]); + message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]); +#if ICHECK >= 0 + for (i = 0; i < N; ++i) + if (root->parts[i].id == ICHECK) + message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK, + root->parts[i].a[0], root->parts[i].a[1], root->parts[i].a[2]); +#endif + + /* Dump the tasks. */ + /* for ( k = 0 ; k < s.count ; k++ ) + printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , + s.tasks[k].tic , s.tasks[k].toc ); */ + + /* Dump the costs. */ + message("costs: setup=%lli ticks, run=%lli ticks.", tot_setup, + tot_run / runs); + + /* Dump the timers. */ + for (k = 0; k < qsched_timer_count; k++) + message("timer %s is %lli ticks.", qsched_timer_names[k], + s.timers[k] / runs); + + /* Dump the per-task type timers. */ + printf("task timers: [ "); + for (k = 0; k < task_type_count; k++) printf("%lli ", task_timers[k] / runs); + printf("] ticks.\n"); + + /* Dump the particles to a file */ + file = fopen("particle_dump.dat", "w"); + fprintf(file, + "# ID m x y z a_exact.x a_exact.y a_exact.z a_legacy.x " + "a_legacy.y a_legacy.z a_new.x a_new.y a_new.z\n"); + for (k = 0; k < N; ++k) + fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e %e\n", parts[k].id, + parts[k].mass, parts[k].x[0], parts[k].x[1], parts[k].x[2], + parts[k].a_exact[0], parts[k].a_exact[1], parts[k].a_exact[2], + parts[k].a_legacy[0], parts[k].a_legacy[1], parts[k].a_legacy[2], + parts[k].a[0], parts[k].a[1], parts[k].a[2]); + fclose(file); + + /* Clean up. */ + qsched_free(&s); + free(parts); +} + +/** + * @brief Main function. + */ + +int main(int argc, char *argv[]) { + + int c, nr_threads; + int N = 1000, runs = 1; + char fileName[100] = {0}; + + /* Die on FP-exceptions. */ + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); + +/* Get the number of threads. */ +#pragma omp parallel shared(nr_threads) + { + if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads(); + } + + /* Parse the options */ + while ((c = getopt(argc, argv, "n:r:t:f:c:i:")) != -1) switch (c) { + case 'n': + if (sscanf(optarg, "%d", &N) != 1) + error("Error parsing number of particles."); + break; + case 'r': + if (sscanf(optarg, "%d", &runs) != 1) + error("Error parsing number of runs."); + break; + case 't': + if (sscanf(optarg, "%d", &nr_threads) != 1) + error("Error parsing number of threads."); + omp_set_num_threads(nr_threads); + break; + case 'f': + if (sscanf(optarg, "%s", &fileName[0]) != 1) + error("Error parsing file name."); + break; + case '?': + fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] " + "[-c Nparts] [-i Niterations] \n", + argv[0]); + fprintf(stderr, "Solves the N-body problem using a Barnes-Hut\n" + "tree code with N random particles read from a file in " + "[0,1]^3 using" + "nr_threads threads.\n" + "A test of the neighbouring cells interaction with " + "Nparts per cell is also run Niterations times.\n"); + exit(EXIT_FAILURE); + } + + /* Tree node information */ + printf("Size of cell: %zu bytes.\n", sizeof(struct cell)); + + /* Part information */ + printf("Size of part: %zu bytes.\n", sizeof(struct part)); + + /* Dump arguments. */ + if (fileName[0] == 0) { + message("Computing the N-body problem over %i random particles using %i " + "threads (%i runs).", + N, nr_threads, runs); + } else { + message("Computing the N-body problem over %i particles read from '%s' " + "using %i threads (%i runs).", + N, fileName, nr_threads, runs); + } + + /* Run the BH test. */ + test_bh(N, nr_threads, runs, fileName); + + return 0; +} diff --git a/examples/test_bh_sorted.c b/examples/test_bh_sorted.c index 1e596813280a5c7265bc9eb3e0f65730d2b29357..8ede5e23d2636e87eb8ff09a2c85747bb294f5d0 100644 --- a/examples/test_bh_sorted.c +++ b/examples/test_bh_sorted.c @@ -899,13 +899,13 @@ static inline void make_interact_pc(struct cell *leaf, struct cell *cj) { * @brief Checks whether the cell leaf is a subvolume of the cell c */ static inline int is_inside(struct cell *leaf, struct cell *c) { - if (leaf->loc[0] >= c->loc[0] && leaf->loc[0] < c->loc[0] + c->h && + return (leaf->parts >= c->parts) && (leaf->parts < c->parts + c->count); + /* if (leaf->loc[0] >= c->loc[0] && leaf->loc[0] < c->loc[0] + c->h && leaf->loc[1] >= c->loc[1] && leaf->loc[1] < c->loc[1] + c->h && leaf->loc[2] >= c->loc[2] && leaf->loc[2] < c->loc[2] + c->h) - /* if ( leaf->parts >= c->parts && leaf->parts < c->parts + c->count ) */ return 1; else - return 0; + return 0; */ } /** @@ -928,10 +928,7 @@ static inline int are_neighbours_different_size(struct cell *ci, dx[k] = fabs(center_i - center_j); } - if ((dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist)) - return 1; - else - return 0; + return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); } /** @@ -958,10 +955,7 @@ static inline int are_neighbours(struct cell *ci, struct cell *cj) { dx[k] = fabs(center_i - center_j); } - if ((dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist)) - return 1; - else - return 0; + return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); } /**