-
Matthieu Schaller authoredMatthieu Schaller authored
test_bh_sorted.c 77.92 KiB
/*******************************************************************************
* This file is part of QuickSched.
* Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
* Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* *****************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Standard includes. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <omp.h>
#include <fenv.h>
/* Local includes. */
#include "quicksched.h"
/* Some local constants. */
#define cell_pool_grow 1000
#define cell_maxparts 1
#define task_limit 1e8
#define const_G 1 // 6.6738e-8
#define dist_min 0.5 /* Used for legacy walk only */
#define dist_cutoff_ratio 1.2
#define iact_pair_direct iact_pair_direct_sorted_multipole
#define ICHECK -1
#define NO_SANITY_CHECKS
#define NO_COM_AS_TASK
//#define COUNTERS
/** Data structure for the particles. */
struct part {
double x[3];
// union {
float a[3];
float a_legacy[3];
float a_exact[3];
// };
float mass;
int id;
float d;
float d_min;
float d_max;
}; // __attribute__((aligned(64)));
struct part_local {
float x[3];
float a[3];
float mass;
float d;
} __attribute__((aligned(32)));
/** Data structure for the sorted particle positions. */
struct index {
int ind;
float d;
};
struct monopole {
double com[3];
float mass;
};
/** Data structure for the BH tree cell. */
struct cell {
double loc[3];
double h;
int count;
unsigned short int split, sorted;
struct part *parts;
struct cell *firstchild; /* Next node if opening */
struct cell *sibling; /* Next node */
/* We keep both CoMs and masses to make sure the comp_com calculation is
* correct (use an anonymous union to keep variable names compact). */
union {
/* Information for the legacy walk */
struct monopole legacy;
/* Information for the QuickShed walk */
struct monopole new;
};
int res, com_tid;
struct index *indices;
} __attribute__((aligned(128)));
/** Task types. */
enum task_type {
task_type_self = 0,
task_type_pair,
task_type_self_pc,
task_type_pair_direct,
task_type_com,
task_type_count
};
/** Multipole stuff. */
struct multipole {
float mass;
float mu[3]; /* Centre of Mass */
float sigma_00, sigma_11, sigma_22, sigma_01, sigma_02, sigma_12;
};
static inline void multipole_reset(struct multipole *d) {
bzero(d, sizeof(struct multipole));
}
static inline void multipole_add(struct multipole *d, const float *x, float mass) {
float xm[3] = { x[0] * mass, x[1] * mass, x[2] * mass };
d->mu[0] += xm[0];
d->mu[1] += xm[1];
d->mu[2] += xm[2];
d->sigma_00 += xm[0] * x[0];
d->sigma_11 += xm[1] * x[1];
d->sigma_22 += xm[2] * x[2];
d->sigma_01 += xm[0] * x[1];
d->sigma_02 += xm[0] * x[2];
d->sigma_12 += xm[1] * x[2];
d->mass += mass;
}
static inline void multipole_iact(struct multipole *d, const float *x, float* a) {
int k;
float r2,ir, dx[3];
/* early abort */
if (d->mass == 0.0f) return;
float inv_mass = 1.0f / d->mass;
float mu[3] = {d->mu[0], d->mu[1], d->mu[2]};
/* Temporary quantities. */
dx[0] = d->sigma_00 - inv_mass*mu[0]*mu[0]; /* Abusing a so far unused */
dx[1] = d->sigma_11 - inv_mass*mu[1]*mu[1]; /* variable temporarily... */
dx[2] = d->sigma_22 - inv_mass*mu[2]*mu[2];
/* Construct quadrupole matrix */
float Q_00 = 2.0f*dx[0] - dx[1] - dx[2];
float Q_11 = 2.0f*dx[1] - dx[0] - dx[2];
float Q_22 = -(Q_00 + Q_11); /* Q_ij is traceless */
// Q_22 *= -1.0f;
float Q_01 = d->sigma_01 - inv_mass*mu[0]*mu[1];
float Q_02 = d->sigma_02 - inv_mass*mu[0]*mu[2];
float Q_12 = d->sigma_12 - inv_mass*mu[1]*mu[2];
Q_01 *= 3.0f;
Q_02 *= 3.0f;
Q_12 *= 3.0f;
/* Compute the distance to the CoM. */
for (r2 = 0.0f, k = 0; k < 3; k++) {
dx[k] = x[k] - mu[k] * inv_mass;
r2 += dx[k] * dx[k];
}
ir = 1.0f / sqrtf(r2);
/* Compute the acceleration from the monopole */
float ir2 = ir * ir;
float ir3G = ir * ir2 * const_G;
// float w0 = ir3G * d->mass;
/* Compute some helpful terms */
float w1 = ir3G * ir2;
float xi = 0.0f;
xi += dx[0] * dx[0] * Q_00;
xi += dx[1] * dx[1] * Q_11;
xi += dx[2] * dx[2] * Q_22;
xi += 2.f * dx[0] * dx[1] * Q_01;
xi += 2.f * dx[0] * dx[2] * Q_02;
xi += 2.f * dx[1] * dx[2] * Q_12;
float w2 = ir3G * (-2.5f * ir2 * ir2 * xi - d->mass);
/* Compute the acceleration from the quadrupole */
a[0] += w2 * dx[0] + w1 * (dx[0]*Q_00 + dx[1]*Q_01 + dx[2]*Q_02);
a[1] += w2 * dx[1] + w1 * (dx[0]*Q_01 + dx[1]*Q_11 + dx[2]*Q_12);
a[2] += w2 * dx[2] + w1 * (dx[0]*Q_02 + dx[1]*Q_12 + dx[2]*Q_22);
}
#ifdef COUNTERS
int count_direct_unsorted;
int count_direct_sorted_pp;
int count_direct_sorted_pm_i;
int count_direct_sorted_pm_j;
#endif
/** Per-type timers. */
ticks task_timers[task_type_count];
/** Global variable for the pool of allocated cells. */
struct cell *cell_pool = NULL;
/** Constants for the sorting axes. */
const float axis_shift[13 * 3] = {
5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01,
7.071067811865475e-01, 7.071067811865475e-01, 0.0,
5.773502691896258e-01, 5.773502691896258e-01, -5.773502691896258e-01,
7.071067811865475e-01, 0.0, 7.071067811865475e-01,
1.0, 0.0, 0.0,
7.071067811865475e-01, 0.0, -7.071067811865475e-01,
5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01,
7.071067811865475e-01, -7.071067811865475e-01, 0.0,
5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01,
0.0, 7.071067811865475e-01, 7.071067811865475e-01,
0.0, 1.0, 0.0,
0.0, 7.071067811865475e-01, -7.071067811865475e-01,
0.0, 0.0, 1.0,
};
const char axis_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
/* Map shift vector to sortlist. */
const int axis_sid[27] = {
/* ( -1 , -1 , -1 ) */ 0,
/* ( -1 , -1 , 0 ) */ 1,
/* ( -1 , -1 , 1 ) */ 2,
/* ( -1 , 0 , -1 ) */ 3,
/* ( -1 , 0 , 0 ) */ 4,
/* ( -1 , 0 , 1 ) */ 5,
/* ( -1 , 1 , -1 ) */ 6,
/* ( -1 , 1 , 0 ) */ 7,
/* ( -1 , 1 , 1 ) */ 8,
/* ( 0 , -1 , -1 ) */ 9,
/* ( 0 , -1 , 0 ) */ 10,
/* ( 0 , -1 , 1 ) */ 11,
/* ( 0 , 0 , -1 ) */ 12,
/* ( 0 , 0 , 0 ) */ 0,
/* ( 0 , 0 , 1 ) */ 12,
/* ( 0 , 1 , -1 ) */ 11,
/* ( 0 , 1 , 0 ) */ 10,
/* ( 0 , 1 , 1 ) */ 9,
/* ( 1 , -1 , -1 ) */ 8,
/* ( 1 , -1 , 0 ) */ 7,
/* ( 1 , -1 , 1 ) */ 6,
/* ( 1 , 0 , -1 ) */ 5,
/* ( 1 , 0 , 0 ) */ 4,
/* ( 1 , 0 , 1 ) */ 3,
/* ( 1 , 1 , -1 ) */ 2,
/* ( 1 , 1 , 0 ) */ 1,
/* ( 1 , 1 , 1 ) */ 0
};
/* Some forward declarations. */
void comp_com(struct cell *c);
/**
* @brief Sort the entries in ascending order using QuickSort.
*
* @param sort The indices
* @param N The number of entries.
*/
void indices_sort(struct index *sort, int N) {
struct {
short int lo, hi;
} qstack[10];
int qpos, i, j, lo, hi, imin;
struct index temp;
float pivot;
/* Sort parts in cell_i in decreasing order with quicksort */
qstack[0].lo = 0;
qstack[0].hi = N - 1;
qpos = 0;
while (qpos >= 0) {
lo = qstack[qpos].lo;
hi = qstack[qpos].hi;
qpos -= 1;
if (hi - lo < 15) {
for (i = lo; i < hi; i++) {
imin = i;
for (j = i + 1; j <= hi; j++)
if (sort[j].d < sort[imin].d) imin = j;
if (imin != i) {
temp = sort[imin];
sort[imin] = sort[i];
sort[i] = temp;
}
}
} else {
pivot = sort[(lo + hi) / 2].d;
i = lo;
j = hi;
while (i <= j) {
while (sort[i].d < pivot) i++;
while (sort[j].d > pivot) j--;
if (i <= j) {
if (i < j) {
temp = sort[i];
sort[i] = sort[j];
sort[j] = temp;
}
i += 1;
j -= 1;
}
}
if (j > (lo + hi) / 2) {
if (lo < j) {
qpos += 1;
qstack[qpos].lo = lo;
qstack[qpos].hi = j;
}
if (i < hi) {
qpos += 1;
qstack[qpos].lo = i;
qstack[qpos].hi = hi;
}
} else {
if (i < hi) {
qpos += 1;
qstack[qpos].lo = i;
qstack[qpos].hi = hi;
}
if (lo < j) {
qpos += 1;
qstack[qpos].lo = lo;
qstack[qpos].hi = j;
}
}
}
}
}
/**
* @brief Sort the particles in the given cell along the given axis.
*
* @param c The #cell.
* @param axis The normalized axis along which to sort.
* @param aid The axis ID at which to store the indices.
*/
void cell_sort(struct cell *c, const float *axis, int aid) {
/* Has the indices array even been allocated? */
if (c->indices == NULL) {
if ((c->indices = (struct index *)malloc((c->count + 1) * 13 *
sizeof(struct index))) ==
NULL)
error("Failed to allocate cell sorting indices.");
} else if (c->sorted & (1 << aid)) {
return;
}
/* If this cell has been split, merge from the progeny. */
if (c->split) {
/* Heap of pointers to the progeny's indices. */
struct {
struct index *index;
int offset;
} temp, pindex[8];
int k = 0;
/* First, make sure all the progeny have been sorted, and get the pointers
to their first entries. */
for (struct cell *cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
if (!(cp->sorted & (1 << aid))) cell_sort(cp, axis, aid);
pindex[k].index = &cp->indices[(cp->count + 1) * aid];
pindex[k].offset = cp->parts - c->parts;
k++;
}
/* Fill any remaining gaps with the sentinel. */
c->indices[(c->count + 1) * aid + c->count].d = FLT_MAX;
for (; k < 8; k++)
pindex[k].index = &c->indices[(c->count + 1) * aid + c->count];
/* Heapify the pindices. */
for (k = 3; k >= 0; k--) {
int j = k;
while (j < 4) {
int jj = 2 * j + 1;
if (jj + 1 < 8 && pindex[jj + 1].index->d < pindex[jj].index->d)
jj = jj + 1;
if (pindex[jj].index->d < pindex[j].index->d) {
temp = pindex[jj];
pindex[jj] = pindex[j];
pindex[j] = temp;
j = jj;
} else
break;
}
}
/* Copy the indices into the local index in increasing order. */
struct index *dest = &c->indices[(c->count + 1) * aid];
for (int k = 0; k < c->count; k++) {
/* Copy the top of the heap to the destination. */
*dest = *pindex[0].index;
dest->ind += pindex[0].offset;
/* Increase the pointers. */
dest++;
pindex[0].index++;
/* Fix the heap. */
int j = 0;
while (j < 4) {
int jj = 2 * j + 1;
if (jj + 1 < 8 && pindex[jj + 1].index->d < pindex[jj].index->d)
jj = jj + 1;
if (pindex[jj].index->d < pindex[j].index->d) {
temp = pindex[jj];
pindex[jj] = pindex[j];
pindex[j] = temp;
j = jj;
} else
break;
}
}
/* Add a sentinel at the end. */
dest->ind = -1;
dest->d = FLT_MAX;
/* Otherwise, just sort the entries. */
} else {
/* Fill the indices. */
struct index *dest = &c->indices[(c->count + 1) * aid];
for (int k = 0; k < c->count; k++) {
dest[k].ind = k;
dest[k].d = c->parts[k].x[0] * axis[0] + c->parts[k].x[1] * axis[1] +
c->parts[k].x[2] * axis[2];
}
/* Sort the indices. */
indices_sort(&c->indices[(c->count + 1) * aid], c->count);
/* Set the sentinel on the last entry. */
dest[c->count].ind = -1;
dest[c->count].d = FLT_MAX;
}
/* Mark this cell as sorted in the given direction. */
c->sorted |= (1 << aid);
/* Verify the sort. */
/* int offset = (c->count + 1) * aid;
for (int k = 0; k < c->count; k++)
if (c->indices[offset + k].d > c->indices[offset + k + 1].d)
error( "Sorting failed." ); */
} /* */
/**
* @brief Get all the data needed for computing a sorted pair.
*
* @param ci Pointer to a pointer to the first cell.
* @param cj Pointer to a pointer to the second cell.
* @param ind_i Sorted indices of the cell @c ci.
* @param ind_j Sorted indices of the cell @c cj.
*/
void get_axis(struct cell **ci, struct cell **cj, struct index **ind_i,
struct index **ind_j, float *min_dist, float* max_dist, float axis[3]) {
float dx[3];
int aid = 0;
/* Get the cell pair separation and the axis index. */
for (int k = 0; k < 3; k++) {
dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
aid = 3 * aid + ((dx[k] < 0) ? 0 : (dx[k] > 0) ? 2 : 1);
}
/* Flip the cells? */
if (axis_flip[aid]) {
struct cell *temp = *ci;
*ci = *cj;
*cj = temp;
}
aid = axis_sid[aid];
/* Copy the shift vector to the output parameter. */
axis[0] = axis_shift[aid * 3 + 0];
axis[1] = axis_shift[aid * 3 + 1];
axis[2] = axis_shift[aid * 3 + 2];
/* Make sure the cells are sorted. */
if (!((*ci)->sorted & (1 << aid))) cell_sort(*ci, axis, aid);
if (!((*cj)->sorted & (1 << aid))) cell_sort(*cj, axis, aid);
/* Set the indices. */
*ind_i = &(*ci)->indices[aid * ((*ci)->count + 1)];
*ind_j = &(*cj)->indices[aid * ((*cj)->count + 1)];
/* Make sure the sorts are ok. */
/* for (int k = 1; k < (*ci)->count; k++)
if ((*ind_i)[k].d < (*ind_i)[k-1].d)
error("Sorting failed.");
for (int k = 1; k < (*cj)->count; k++)
if ((*ind_j)[k].d < (*ind_j)[k-1].d)
error("Sorting failed."); */
/* Compute the minimal and maximal distance along the axis linking the cells */
*min_dist = 0.f;
*max_dist = 0.f;
for ( int k = 0; k < 3; k++) {
if ( axis[k] < 0 )
*min_dist += ( (*ci)->loc[k] + (*ci)->h ) * axis[k];
else
*min_dist += (*ci)->loc[k] * axis[k];
if ( axis[k] > 0 )
*max_dist += ( (*cj)->loc[k] + (*cj)->h ) * axis[k];
else
*max_dist += (*cj)->loc[k] * axis[k];
}
}
/**
* @brief Get a #cell from the pool.
*/
struct cell *cell_get() {
struct cell *res;
int k;
/* Allocate a new batch? */
if (cell_pool == NULL) {
/* Allocate the cell array. */
if ((cell_pool =
(struct cell *)calloc(cell_pool_grow, sizeof(struct cell))) ==
NULL)
error("Failed to allocate fresh batch of cells.");
/* Link them up via their progeny pointers. */
for (k = 1; k < cell_pool_grow; k++)
cell_pool[k - 1].firstchild = &cell_pool[k];
}
/* Pick a cell off the pool. */
res = cell_pool;
cell_pool = cell_pool->firstchild;
/* Clean up a few things. */
res->res = qsched_res_none;
res->firstchild = 0;
/* Return the cell. */
return res;
}
/**
* @brief Sort the parts into eight bins along the given pivots and
* fill the multipoles. Also adds the hierarchical resources
* to the sched.
*
* @param c The #cell to be split.
* @param N The total number of parts.
* @param s The #sched to store the resources.
*/
void cell_split(struct cell *c, struct qsched *s) {
int i, j, k, kk, count = c->count;
struct part temp, *parts = c->parts;
struct cell *cp;
int left[8], right[8];
double pivot[3];
static struct cell *root = NULL;
struct cell *progenitors[8];
/* Set the root cell. */
if (root == NULL) {
root = c;
c->sibling = 0;
}
/* Add a resource for this cell if it doesn't have one yet. */
if (c->res == qsched_res_none)
c->res = qsched_addres(s, qsched_owner_none, qsched_res_none);
/* Attach a center-of-mass task to the cell. */
#ifdef COM_AS_TASK
if (count > 0)
c->com_tid = qsched_addtask(s, task_type_com, task_flag_none, &c,
sizeof(struct cell *), 1);
#endif
/* Does this cell need to be split? */
if (count > cell_maxparts) {
/* Mark this cell as split. */
c->split = 1;
/* Create the progeny. */
for (k = 0; k < 8; k++) {
progenitors[k] = cp = cell_get();
cp->loc[0] = c->loc[0];
cp->loc[1] = c->loc[1];
cp->loc[2] = c->loc[2];
cp->h = c->h * 0.5;
cp->res = qsched_addres(s, qsched_owner_none, c->res);
if (k & 4) cp->loc[0] += cp->h;
if (k & 2) cp->loc[1] += cp->h;
if (k & 1) cp->loc[2] += cp->h;
}
/* Init the pivots. */
for (k = 0; k < 3; k++) pivot[k] = c->loc[k] + c->h * 0.5;
/* Split along the x-axis. */
i = 0;
j = count - 1;
while (i <= j) {
while (i <= count - 1 && parts[i].x[0] < pivot[0]) i += 1;
while (j >= 0 && parts[j].x[0] >= pivot[0]) j -= 1;
if (i < j) {
temp = parts[i];
parts[i] = parts[j];
parts[j] = temp;
}
}
left[1] = i;
right[1] = count - 1;
left[0] = 0;
right[0] = j;
/* Split along the y axis, twice. */
for (k = 1; k >= 0; k--) {
i = left[k];
j = right[k];
while (i <= j) {
while (i <= right[k] && parts[i].x[1] < pivot[1]) i += 1;
while (j >= left[k] && parts[j].x[1] >= pivot[1]) j -= 1;
if (i < j) {
temp = parts[i];
parts[i] = parts[j];
parts[j] = temp;
}
}
left[2 * k + 1] = i;
right[2 * k + 1] = right[k];
left[2 * k] = left[k];
right[2 * k] = j;
}
/* Split along the z axis, four times. */
for (k = 3; k >= 0; k--) {
i = left[k];
j = right[k];
while (i <= j) {
while (i <= right[k] && parts[i].x[2] < pivot[2]) i += 1;
while (j >= left[k] && parts[j].x[2] >= pivot[2]) j -= 1;
if (i < j) {
temp = parts[i];
parts[i] = parts[j];
parts[j] = temp;
}
}
left[2 * k + 1] = i;
right[2 * k + 1] = right[k];
left[2 * k] = left[k];
right[2 * k] = j;
}
/* Store the counts and offsets. */
for (k = 0; k < 8; k++) {
progenitors[k]->count = right[k] - left[k] + 1;
progenitors[k]->parts = &c->parts[left[k]];
}
/* Find the first non-empty progenitor */
for (k = 0; k < 8; k++)
if (progenitors[k]->count > 0) {
c->firstchild = progenitors[k];
break;
}
#ifdef SANITY_CHECKS
if (c->firstchild == NULL)
error("Cell has been split but all progenitors have 0 particles");
#endif
/* Prepare the pointers. */
for (k = 0; k < 8; k++) {
/* Find the next non-empty sibling */
for (kk = k + 1; kk < 8; ++kk) {
if (progenitors[kk]->count > 0) {
progenitors[k]->sibling = progenitors[kk];
break;
}
}
/* No non-empty sibling ? Go back a level */
if (kk == 8) progenitors[k]->sibling = c->sibling;
}
/* Recurse. */
for (k = 0; k < 8; k++)
if (progenitors[k]->count > 0) cell_split(progenitors[k], s);
/* Link the COM tasks. */
#ifdef COM_AS_TASK
for (k = 0; k < 8; k++)
if (progenitors[k]->count > 0)
qsched_addunlock(s, progenitors[k]->com_tid, c->com_tid);
#endif
/* Otherwise, we're at a leaf, so create the cell's particle-cell task. */
} else {
struct cell *data[2] = {root, c};
int tid = qsched_addtask(s, task_type_self_pc, task_flag_none, data,
2 * sizeof(struct cell *), 1);
qsched_addlock(s, tid, c->res);
#ifdef COM_AS_TASK
qsched_addunlock(s, root->com_tid, tid);
#endif
} /* does the cell need to be split? */
/* Compute the cell's center of mass. */
#ifndef COM_AS_TASK
comp_com(c);
#endif
/* Set this cell's resources ownership. */
qsched_res_own(s, c->res,
s->nr_queues * (c->parts - root->parts) / root->count);
}
/* -------------------------------------------------------------------------- */
/* New tree walk */
/* -------------------------------------------------------------------------- */
/**
* @brief Compute the center of mass of a given cell.
*
* @param c The #cell.
*/
void comp_com(struct cell *c) {
int k, count = c->count;
struct cell *cp;
struct part *parts = c->parts;
double com[3] = {0.0, 0.0, 0.0}, mass = 0.0;
if (c->split) {
/* Loop over the projenitors and collect the multipole information. */
for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
float cp_mass = cp->new.mass;
com[0] += cp->new.com[0] * cp_mass;
com[1] += cp->new.com[1] * cp_mass;
com[2] += cp->new.com[2] * cp_mass;
mass += cp_mass;
}
/* Otherwise, collect the multipole from the particles. */
} else {
for (k = 0; k < count; k++) {
float p_mass = parts[k].mass;
com[0] += parts[k].x[0] * p_mass;
com[1] += parts[k].x[1] * p_mass;
com[2] += parts[k].x[2] * p_mass;
mass += p_mass;
}
}
/* Store the COM data, if any was collected. */
if (mass > 0.0) {
float imass = 1.0f / mass;
c->new.com[0] = com[0] * imass;
c->new.com[1] = com[1] * imass;
c->new.com[2] = com[2] * imass;
c->new.mass = mass;
} else {
c->new.com[0] = 0.0;
c->new.com[1] = 0.0;
c->new.com[2] = 0.0;
c->new.mass = 0.0;
}
}
/**
* @brief Interacts all particles in ci with the monopole in cj
*/
static inline void make_interact_pc(struct part_local *parts, int count,
double *loc, struct cell *cj) {
int j, k;
float com[3] = {0.0, 0.0, 0.0};
float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w;
#ifdef SANITY_CHECKS
/* Sanity checks */
if (leaf->count == 0) error("Empty cell!");
/* Sanity check. */
if (cj->new.mass == 0.0) {
message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1],
cj->new.com[2], cj->count, cj, cj->split, cj->sibling);
for (j = 0; j < cj->count; ++j)
message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id);
error("com does not seem to have been set.");
}
#endif
/* Init the com's data. */
for (k = 0; k < 3; k++) com[k] = cj->new.com[k] - loc[k];
mcom = cj->new.mass;
/* Loop over every particle in leaf. */
for (j = 0; j < count; j++) {
/* Compute the pairwise distance. */
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = com[k] - parts[j].x[k];
r2 += dx[k] * dx[k];
}
/* Apply the gravitational acceleration. */
ir = 1.0f / sqrtf(r2);
w = mcom * const_G * ir * ir * ir;
for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k];
#if ICHECK >= 0
if (leaf->parts[j].id == ICHECK)
printf("[DEBUG] cell [%f,%f,%f] interacting with cj->loc=[%f,%f,%f] "
"m=%f h=%f\n",
leaf->loc[0], leaf->loc[1], leaf->loc[2], cj->loc[0], cj->loc[1],
cj->loc[2], mcom, cj->h);
if (leaf->parts[j].id == ICHECK)
printf("[NEW] Interaction with monopole a=( %e %e %e ) h= %f Nj= %d m_j= "
"%f\n",
w * dx[0], w * dx[1], w * dx[2], cj->h, cj->count, mcom);
#endif
} /* loop over every particle. */
}
/**
* @brief Checks whether the cell leaf is a subvolume of the cell c
*/
static inline int is_inside(struct cell *leaf, struct cell *c) {
return (leaf->parts >= c->parts) && (leaf->parts < c->parts + c->count);
}
/**
* @brief Checks whether the cells are direct neighbours ot not
*/
static inline int are_neighbours_different_size(struct cell *ci,
struct cell *cj) {
double cih = ci->h, cjh = cj->h;
/* Maximum allowed distance */
float min_dist = 0.5 * (cih + cjh);
/* (Manhattan) Distance between the cells */
for (int k = 0; k < 3; k++) {
float center_i = ci->loc[k] + 0.5 * cih;
float center_j = cj->loc[k] + 0.5 * cjh;
if (fabsf(center_i - center_j) > min_dist) return 0;
}
return 1;
}
/**
* @brief Checks whether the cells are direct neighbours ot not. Both cells have
* to be of the same size
*/
static inline int are_neighbours(struct cell *ci, struct cell *cj) {
#ifdef SANITY_CHECKS
if (ci->h != cj->h)
error(" Cells of different size in distance calculation.");
#endif
/* Maximum allowed distance */
float min_dist = ci->h;
/* (Manhattan) Distance between the cells */
for (int k = 0; k < 3; k++) {
float center_i = ci->loc[k];
float center_j = cj->loc[k];
if (fabsf(center_i - center_j) > min_dist) return 0;
}
return 1;
}
/**
* @brief Compute the interactions between all particles in a cell leaf
* and the center of mass of all the cells in a part of the tree
* described by ci and cj
*
* @param ci The #cell containing the particle
* @param cj The #cell containing the center of mass.
*/
static inline void iact_pair_pc(struct cell *ci, struct cell *cj,
struct cell *leaf, struct part_local *parts,
int count, double *loc) {
struct cell *cp, *cps;
#ifdef SANITY_CHECKS
/* Early abort? */
if (ci->count == 0 || cj->count == 0) error("Empty cell !");
/* Sanity check */
if (ci == cj)
error("The impossible has happened: pair interaction between a cell and "
"itself.");
/* Sanity check */
if (!is_inside(leaf, ci))
error("The impossible has happened: The leaf is not within ci");
/* Are the cells direct neighbours? */
if (!are_neighbours(ci, cj)) error("Cells are not neighours");
/* Are both cells split ? */
if (!ci->split || !cj->split) error("One of the cells is not split !");
#endif
/* Let's find in which subcell of ci the leaf is */
for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) {
if (is_inside(leaf, cp)) break;
}
if (are_neighbours_different_size(cp, cj)) {
/* Now interact this subcell with all subcells of cj */
for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) {
/* Check whether we have to recurse or can directly jump to the multipole
* calculation */
if (are_neighbours(cp, cps)) {
/* We only recurse if the children are split */
if (cp->split && cps->split) {
iact_pair_pc(cp, cps, leaf, parts, count, loc);
}
} else {
make_interact_pc(parts, count, loc, cps);
}
}
} else {
/* If cp is not a neoghbour of cj, we can directly interact with the
* multipoles */
for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) {
make_interact_pc(parts, count, loc, cps);
}
}
}
/**
* @brief Compute the interactions between all particles in a leaf and
* and all the monopoles in the cell c
*
* @param c The #cell containing the monopoles
* @param leaf The #cell containing the particles
*/
static inline void iact_self_pc(struct cell *c, struct cell *leaf,
struct part_local *parts) {
struct cell *cp, *cps;
int collect_part_data = 0;
#ifdef SANITY_CHECKS
/* Early abort? */
if (c->count == 0) error("Empty cell !");
if (!c->split) error("Cell is not split !");
#endif
/* Get local copies of the particle data. */
if (parts == NULL) {
int count = leaf->count;
if ((parts =
(struct part_local *)malloc(sizeof(struct part_local) * count)) ==
NULL)
error("Failed to allocate local parts.");
for (int k = 0; k < count; k++) {
for (int j = 0; j < 3; j++) {
parts[k].x[j] = leaf->parts[k].x[j] - leaf->loc[j];
parts[k].a[j] = 0.0f;
}
parts[k].mass = leaf->parts[k].mass;
}
collect_part_data = 1;
}
/* Find in which subcell of c the leaf is */
for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
/* Only recurse if the leaf is in this part of the tree */
if (is_inside(leaf, cp)) break;
}
if (cp->split) {
/* Recurse if the cell can be split */
iact_self_pc(cp, leaf, parts);
/* Now, interact with every other subcell */
for (cps = c->firstchild; cps != c->sibling; cps = cps->sibling) {
/* Since cp and cps will be direct neighbours it is only worth recursing
*/
/* if the cells can both be split */
if (cp != cps && cps->split)
iact_pair_pc(cp, cps, leaf, parts, leaf->count, leaf->loc);
}
}
/* Clean up local parts? */
if (collect_part_data) {
for (int k = 0; k < leaf->count; k++) {
for (int j = 0; j < 3; j++) leaf->parts[k].a[j] += parts[k].a[j];
}
free(parts);
}
}
/**
* @brief Starts the recursive tree walk of a given leaf
*/
void init_multipole_walk(struct cell *root, struct cell *leaf) {
/* Pre-fetch the leaf's particles */
__builtin_prefetch(leaf->parts, 1, 3);
/* Start the recursion */
iact_self_pc(root, leaf, NULL);
}
/**
* @brief Compute the interactions between all particles in a cell
* and all particles in the other cell (N^2 algorithm)
*
* @param ci The #cell containing the particles.
* @param cj The #cell containing the other particles
*/
static inline void iact_pair_direct_unsorted(struct cell *ci, struct cell *cj) {
int i, j, k;
int count_i = ci->count, count_j = cj->count;
struct part *parts_i = ci->parts, *parts_j = cj->parts;
double xi[3];
float dx[3], ai[3], mi, mj, r2, w, ir;
#ifdef SANITY_CHECKS
/* Bad stuff will happen if cell sizes are different */
if (ci->h != cj->h)
error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h);
#endif
/* Loop over all particles in ci... */
for (i = 0; i < count_i; i++) {
/* Init the ith particle's data. */
for (k = 0; k < 3; k++) {
xi[k] = parts_i[i].x[k];
ai[k] = 0.0f;
}
mi = parts_i[i].mass;
/* Loop over every particle in the other cell. */
for (j = 0; j < count_j; j++) {
/* Compute the pairwise distance. */
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = xi[k] - parts_j[j].x[k];
r2 += dx[k] * dx[k];
}
/* Apply the gravitational acceleration. */
ir = 1.0f / sqrtf(r2);
w = const_G * ir * ir * ir;
mj = parts_j[j].mass;
for (k = 0; k < 3; k++) {
float wdx = w * dx[k];
parts_j[j].a[k] += wdx * mi;
ai[k] -= wdx * mj;
}
#ifdef COUNTERS
++count_direct_unsorted;
#endif
#if ICHECK >= 0 && 0
if (parts_i[i].id == ICHECK)
printf(
"[NEW] Interaction with particle id= %d (pair i) a=( %e %e %e )\n",
parts_j[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]);
if (parts_j[j].id == ICHECK)
printf(
"[NEW] Interaction with particle id= %d (pair j) a=( %e %e %e )\n",
parts_i[i].id, mj * w * dx[0], mj * w * dx[1], mj * w * dx[2]);
#endif
} /* loop over every other particle. */
/* Store the accumulated acceleration on the ith part. */
for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k];
} /* loop over all particles. */
}
float correction_coefs[6*4] =
{
0.04730587, -0.13379902, 0.10447393, 0.982606,
-0.04397043, -0.12480554, -0.09791498, 0.98388736,
0.00536261, -0.04095222, 0.05153867, 0.98824618,
-0.00677177, -0.04110387, -0.04859363, 0.98916257,
0.00425108, 0.01519535, -0.02025942, 1.0035729,
-0.00711434, 0.01114372, 0.01865343, 1.00334988
};
static inline float cubic(float x, float a, float b, float c, float d)
{
return a*x*x*x + b*x*x + c*x + d;
}
/**
* @brief Compute the best-fitting bias correction factor for the acceleration of a particle
*
* @param d The normalized position of the particle along the axis
* @param axis The axis along which the two cells are interacting
* @param The correction to apply in each direction
*/
static inline void gravity_bias_correction( float d, float axis[3], float corr[3] ) {
int k, orientation;
int is_zero[3];
/* Start with no applied correction */
corr[0] = corr[1] = corr[2] = 1.f;
/* Find which case we are dealing with */
orientation = 0 ;
for (k = 0 ; k < 3; ++k) {
is_zero[k] = ( fabs(axis[k]) > 0 ? 1 : 0 );
orientation += is_zero[k];
}
//message("%f %f", d, limit_exact);
#if 1
float limit_exact = ( dist_cutoff_ratio - 1. );
/* Apply the correction corresponding to this axis */
switch(orientation)
{
case 1: /* side */
{
if ( d < -limit_exact )
for( k = 0; k < 3; k++ )
corr[k] = 1./cubic(d, correction_coefs[5*4+0], correction_coefs[5*4+1], correction_coefs[5*4+2], correction_coefs[5*4+3]);
else if (d > limit_exact)
for( k = 0; k < 3; k++ )
corr[k] = 1./cubic(d, correction_coefs[4*4+0], correction_coefs[4*4+1], correction_coefs[4*4+2], correction_coefs[4*4+3]);
}
break;
case 2: /* edge */
{
if ( d < -limit_exact )
for( k = 0; k < 3; k++ )
corr[k] = 1./cubic(d, correction_coefs[3*4+0], correction_coefs[3*4+1], correction_coefs[3*4+2], correction_coefs[3*4+3]);
else if (d > limit_exact)
for( k = 0; k < 3; k++ )
corr[k] = 1./cubic(d, correction_coefs[2*4+0], correction_coefs[2*4+1], correction_coefs[2*4+2], correction_coefs[2*4+3]);
}
break;
case 3: /* corner */
{
if ( d < -limit_exact )
for( k = 0; k < 3; k++ )
corr[k] = 1./cubic(d, correction_coefs[1*4+0], correction_coefs[1*4+1], correction_coefs[1*4+2], correction_coefs[1*4+3]);
else if (d > limit_exact)
for( k = 0; k < 3; k++ )
corr[k] = 1./cubic(d, correction_coefs[0*4+0], correction_coefs[0*4+1], correction_coefs[0*4+2], correction_coefs[0*4+3]);
}
break;
default:
error( "Invalid orientation axis=(%f %f %f) orientation= %d", axis[0], axis[1], axis[2], orientation);
}
/* Correction only applies in the direction along the axis. Rest is untouched */
for ( k = 0; k < 3; ++k )
corr[k] = (is_zero[k] ? corr[k] : 1 );
#endif
}
static inline void iact_pair_direct_sorted_multipole(struct cell *ci,
struct cell *cj) {
int i, j, k;
int count_i, count_j;
struct part_local *parts_i, *parts_j;
float xi[3];
float dx[3], ai[3], mi, mj, r2, w, ir;
float min_dist, max_dist, length_dist, mid_dist, d_norm, axis[3], corr[3];
#ifdef SANITY_CHECKS
/* Bad stuff will happen if cell sizes are different */
if (ci->h != cj->h)
error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h);
#endif
/* Get the sorted indices and geometry. */
struct index *ind_i, *ind_j;
get_axis(&ci, &cj, &ind_i, &ind_j, &min_dist, &max_dist, axis);
length_dist = max_dist - min_dist;
mid_dist = min_dist + 0.5f * length_dist;
/* Initialise the multipole */
struct multipole multi;
multipole_reset(&multi);
/* Allocate and fill-in the local parts. */
count_i = ci->count;
count_j = cj->count;
if ((parts_i =
(struct part_local *)alloca(sizeof(struct part_local) *count_i)) ==
NULL ||
(parts_j =
(struct part_local *)alloca(sizeof(struct part_local) * count_j)) ==
NULL)
error("Failed to allocate local part arrays.");
/* Store only the particle position relative to a point close to the cells */
/* This point does not have to be in the middle cj->loc is good enough */
float midpoint[3] = { cj->loc[0] , cj->loc[1], cj->loc[2] };
for (i = 0; i < count_i; i++) {
int pid = ind_i[i].ind;
parts_i[i].d = ind_i[i].d;
for (k = 0; k < 3; k++) {
parts_i[i].x[k] = ci->parts[pid].x[k] - midpoint[k];
parts_i[i].a[k] = 0.0f;
}
parts_i[i].mass = ci->parts[pid].mass;
}
for (j = 0; j < count_j; j++) {
int pjd = ind_j[j].ind;
parts_j[j].d = ind_j[j].d;
for (k = 0; k < 3; k++) {
parts_j[j].x[k] = cj->parts[pjd].x[k] - midpoint[k];
parts_j[j].a[k] = 0.0f;
}
parts_j[j].mass = cj->parts[pjd].mass;
}
#if ICHECK >= 0
for (k = 0; k < count_i; k++)
if (parts_i[k].id == ICHECK)
message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and "
"loc=[%f,%f,%f], h=%f.",
ci->loc[0], ci->loc[1], ci->loc[2], ci->h, cj->loc[0], cj->loc[1],
cj->loc[2], cj->h);
for (k = 0; k < count_j; k++)
if (parts_j[k].id == ICHECK)
message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and "
"loc=[%f,%f,%f], h=%f.",
cj->loc[0], cj->loc[1], cj->loc[2], cj->h, ci->loc[0], ci->loc[1],
ci->loc[2], ci->h);
#endif
/* Distance along the axis as of which we will use a multipole. */
float d_max = dist_cutoff_ratio * cj->h;
/* Loop over all particles in ci... */
for (i = count_i - 1; i >= 0; i--) {
/* Get a local copy of the maximal distance along the axis. */
float di = parts_i[i].d + d_max;
/* Init the ith particle's data. */
for (k = 0; k < 3; k++) {
xi[k] = parts_i[i].x[k];
ai[k] = 0.0;
}
mi = parts_i[i].mass;
/* Loop over every following particle within d_max along the axis. */
for (j = 0; j < count_j && parts_j[j].d < di; j++) {
/* Compute the pairwise distance. */
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = xi[k] - parts_j[j].x[k];
r2 += dx[k] * dx[k];
}
/* Apply the gravitational acceleration. */
ir = 1.0f / sqrtf(r2);
w = const_G * ir * ir * ir;
mj = parts_j[j].mass;
for (k = 0; k < 3; k++) {
float wdx = w * dx[k];
parts_j[j].a[k] += wdx * mi;
ai[k] -= wdx * mj;
}
#if ICHECK >= 0
if (parts_i[i].id == ICHECK)
message("Interaction with part %d - d= %f", parts_j[j].id, sqrt(r2));
#endif
#ifdef COUNTERS
++count_direct_sorted_pp;
#endif
#if ICHECK >= 0 && 0
if (parts_i[i].id == ICHECK)
printf("[NEW] Interaction with particle id= %d (pair i)\n",
parts_j[pjd].id);
if (parts_j[j].id == ICHECK)
printf("[NEW] Interaction with particle id= %d (pair j) h_i= %f h_j= "
"%f ci= %p cj= %p count_i= %d count_j= %d d_i= %d d_j= %d\n",
parts_i[pid].id, ci->h, cj->h, ci, cj, count_i, count_j, ci->res,
cj->res);
#endif
} /* loop over every other particle. */
/* Add any remaining particles to the multipole. */
for (int jj = j; jj < count_j; jj++) {
multipole_add(&multi, parts_j[jj].x, parts_j[jj].mass);
}
/* Shrink count_j to the latest valid particle. */
count_j = j;
/* Interact part_i with the multipole. */
multipole_iact(&multi, xi, ai);
#if ICHECK >= 0
if (parts_i[i].id == ICHECK)
message("Interaction with multipole");
#endif
#ifdef COUNTERS
++count_direct_sorted_pm_i;
#endif
/* Store the accumulated acceleration on the ith part. */
for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k];
} /* loop over all particles in ci. */
/* Re-init some values. */
count_j = cj->count;
int last_i = 0;
multipole_reset(&multi);
/* Loop over the particles in cj, catch the COM interactions. */
for (j = 0; j < count_j; j++) {
/* Get the sorted index. */
float dj = parts_j[j].d - d_max;
/* Fill the COM with any new particles. */
for (i = last_i; i < count_i && dj > parts_i[i].d; i++) {
multipole_add(&multi, parts_i[i].x, parts_i[i].mass);
}
/* Set the new last_i to the last particle checked. */
last_i = i;
/* Interact part_j with the COM. */
multipole_iact(&multi, parts_j[j].x, parts_j[j].a);
#if ICHECK >= 0
if (parts_j[j].id == ICHECK)
message("Interaction with multipole");
#endif
#ifdef COUNTERS
++count_direct_sorted_pm_j;
#endif
}
/* Apply the correction to cancel the bias */
for (i = 0; i < count_i; i++) {
/* Get the normalized position along the axis */
d_norm = parts_i[i].d - mid_dist;
d_norm /= ci->h;
/* Get the correction */
gravity_bias_correction( d_norm, axis, corr );
//corr[0] = 1.;
//corr[1] = 1.;
//corr[2] = 1.;
/* And apply it */
parts_i[i].a[0] *= corr[0];
parts_i[i].a[1] *= corr[1];
parts_i[i].a[2] *= corr[2];
parts_i[i].d = d_norm;
}
/* Do the same on the other particles */
for (j = 0; j < count_i; j++) {
/* Get the normalized position along the axis */
d_norm = parts_j[j].d - mid_dist;
d_norm /= cj->h;
/* Get the correction */
gravity_bias_correction( d_norm, axis, corr );
//corr[0] = 1.;
//corr[1] = 1.;
//corr[2] = 1.;
/* And apply it */
parts_j[j].a[0] *= corr[0];
parts_j[j].a[1] *= corr[1];
parts_j[j].a[2] *= corr[2];
parts_j[j].d = d_norm;
}
/* Copy the accelerations back to the original particles. */
for (i = 0; i < count_i; i++) {
int pid = ind_i[i].ind;
for (k = 0; k < 3; k++) ci->parts[pid].a[k] += parts_i[i].a[k];
ci->parts[pid].d = parts_i[i].d;
ci->parts[pid].d_min = min_dist;
ci->parts[pid].d_max = max_dist;
}
for (j = 0; j < count_j; j++) {
int pjd = ind_j[j].ind;
for (k = 0; k < 3; k++) cj->parts[pjd].a[k] += parts_j[j].a[k];
cj->parts[pjd].d = parts_j[j].d;
cj->parts[pjd].d_min = min_dist;
cj->parts[pjd].d_max = max_dist;
}
}
/**
* @brief Decides whether two cells use the direct summation interaction or the
* multipole interactions
*
* @param ci The #cell.
* @param cj The other #cell.
*/
void iact_pair(struct cell *ci, struct cell *cj) {
struct cell *cp, *cps;
#ifdef SANITY_CHECKS
/* Early abort? */
if (ci->count == 0 || cj->count == 0) error("Empty cell !");
/* Bad stuff will happen if cell sizes are different */
if (ci->h != cj->h)
error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h);
/* Sanity check */
if (ci == cj)
error("The impossible has happened: pair interaction between a cell and "
"itself.");
#endif
/* Are the cells direct neighbours? */
if (are_neighbours(ci, cj)) {
/* Are both cells split ? */
if (ci->split && cj->split) {
/* Let's split both cells and build all possible pairs */
for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) {
for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) {
/* If the cells are neighbours, recurse. */
if (are_neighbours(cp, cps)) {
iact_pair(cp, cps);
}
}
}
} else {/* Otherwise, compute the interactions at this level directly. */
iact_pair_direct(ci, cj);
}
}
}
/**
* @brief Compute the interactions between all particles in a cell.
*
* @param c The #cell.
*/
void iact_self_direct(struct cell *c) {
int i, j, k, count = c->count;
float xi[3] = {0.0, 0.0, 0.0};
float ai[3] = {0.0, 0.0, 0.0}, mi, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w;
struct cell *cp, *cps;
#ifdef SANITY_CHECKS
/* Early abort? */
if (count == 0) error("Empty cell !");
#endif
/* If the cell is split, interact each progeny with itself, and with
each of its siblings. */
if (c->split) {
for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
iact_self_direct(cp);
for (cps = cp->sibling; cps != c->sibling; cps = cps->sibling)
iact_pair(cp, cps);
}
/* Otherwise, compute the interactions directly. */
} else {
/* Init the local copies of the particles. */
double loc[3];
struct part_local parts[count];
loc[0] = c->loc[0];
loc[1] = c->loc[1];
loc[2] = c->loc[2];
for (k = 0; k < count; k++) {
for (j = 0; j < 3; j++) {
parts[k].x[j] = c->parts[k].x[j] - loc[j];
parts[k].a[j] = 0.0f;
}
parts[k].mass = c->parts[k].mass;
}
/* Loop over all particles... */
for (i = 0; i < count; i++) {
/* Init the ith particle's data. */
for (k = 0; k < 3; k++) {
xi[k] = parts[i].x[k];
ai[k] = 0.0;
}
mi = parts[i].mass;
/* Loop over every following particle. */
for (j = i + 1; j < count; j++) {
/* Compute the pairwise distance. */
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = xi[k] - parts[j].x[k];
r2 += dx[k] * dx[k];
}
/* Apply the gravitational acceleration. */
ir = 1.0f / sqrtf(r2);
w = const_G * ir * ir * ir;
mj = parts[j].mass;
for (k = 0; k < 3; k++) {
float wdx = w * dx[k];
parts[j].a[k] += wdx * mi;
ai[k] -= wdx * mj;
}
#if ICHECK >= 0
if (parts[i].id == ICHECK)
message("[NEW] Interaction with particle id= %d (self i)",
parts[j].id);
if (parts[j].id == ICHECK)
message("[NEW] Interaction with particle id= %d (self j)",
parts[i].id);
#endif
} /* loop over every other particle. */
/* Store the accumulated acceleration on the ith part. */
for (k = 0; k < 3; k++) parts[i].a[k] += ai[k];
} /* loop over all particles. */
/* Copy the local particle data back. */
for (k = 0; k < count; k++) {
for (j = 0; j < 3; j++) c->parts[k].a[j] += parts[k].a[j];
}
} /* otherwise, compute interactions directly. */
}
/**
* @brief Create the tasks for the cell pair/self.
*
* @param s The #sched in which to create the tasks.
* @param ci The first #cell.
* @param cj The second #cell.
*/
void create_tasks(struct qsched *s, struct cell *ci, struct cell *cj) {
qsched_task_t tid;
struct cell *data[2], *cp, *cps;
#ifdef SANITY_CHECKS
/* If either cell is empty, stop. */
if (ci->count == 0 || (cj != NULL && cj->count == 0)) error("Empty cell !");
#endif
/* Single cell? */
if (cj == NULL) {
/* Is this cell split and above the task limit ? */
if (ci->split && ci->count > task_limit / ci->count) {
/* Loop over each of this cell's progeny. */
for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) {
/* Make self-interaction task. */
create_tasks(s, cp, NULL);
/* Make all pair-interaction tasks. */
for (cps = cp->sibling; cps != ci->sibling; cps = cps->sibling)
create_tasks(s, cp, cps);
}
/* Otherwise, add a self-interaction task. */
} else {
/* Set the data. */
data[0] = ci;
data[1] = NULL;
/* Create the task. */
tid =
qsched_addtask(s, task_type_self, task_flag_none, data,
sizeof(struct cell *) * 2, ci->count * ci->count / 2);
/* Add the resource (i.e. the cell) to the new task. */
qsched_addlock(s, tid, ci->res);
/* If this call might recurse, add a dependency on the cell's COM
task. */
#ifdef COM_AS_TASK
if (ci->split) qsched_addunlock(s, ci->com_tid, tid);
#endif
}
/* Otherwise, it's a pair. */
} else {
/* Are the cells NOT neighbours ? */
if (!are_neighbours(ci, cj)) {
} else {/* Cells are direct neighbours */
/* Are both cells split ? */
if (ci->split && cj->split && ci->count > task_limit / cj->count) {
/* Let's split both cells and build all possible pairs */
for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) {
for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) {
/* Recurse */
create_tasks(s, cp, cps);
}
}
/* Otherwise, at least one of the cells is not split, build a direct
* interaction. */
} else {
/* Set the data. */
data[0] = ci;
data[1] = cj;
/* Create the task. */
tid = qsched_addtask(s, task_type_pair, task_flag_none, data,
sizeof(struct cell *) * 2, ci->count * cj->count);
/* Add the resources. */
qsched_addlock(s, tid, ci->res);
qsched_addlock(s, tid, cj->res);
}
} /* Cells are direct neighbours */
} /* Otherwise it's a pair */
}
/* -------------------------------------------------------------------------- */
/* Legacy tree walk */
/* -------------------------------------------------------------------------- */
/**
* @brief Compute the center of mass of a given cell recursively.
*
* @param c The #cell.
*/
void legacy_comp_com(struct cell *c, int *countCoMs) {
int k, count = c->count;
struct cell *cp;
struct part *p, *parts = c->parts;
double com[3] = {0.0, 0.0, 0.0}, mass = 0.0;
++(*countCoMs);
/* Is the cell split? */
if (c->split) {
/* Loop over the progeny. */
for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) {
/* Recurse */
legacy_comp_com(cp, countCoMs);
/* Collect multipole information */
float cp_mass = cp->legacy.mass;
com[0] += cp->legacy.com[0] * cp_mass;
com[1] += cp->legacy.com[1] * cp_mass;
com[2] += cp->legacy.com[2] * cp_mass;
mass += cp_mass;
}
/* Otherwise, collect the multipole from local data. */
} else {
for (k = 0; k < count; k++) {
p = &parts[k];
float p_mass = p->mass;
com[0] += p->x[0] * p_mass;
com[1] += p->x[1] * p_mass;
com[2] += p->x[2] * p_mass;
mass += p_mass;
}
}
/* Finish multipole calculation */
if (mass > 0.0) {
float imass = 1.0f / mass;
c->legacy.com[0] = com[0] * imass;
c->legacy.com[1] = com[1] * imass;
c->legacy.com[2] = com[2] * imass;
c->legacy.mass = mass;
} else {
c->legacy.com[0] = 0.0;
c->legacy.com[1] = 0.0;
c->legacy.com[2] = 0.0;
c->legacy.mass = 0.0;
}
}
/**
* @brief Interacts a particle with a cell recursively using the original B-H
* tree walk procedure
*
* @param parts The array of particles
* @param i The particle of interest
* @param root The root of the tree under which we will search.
* @param monitor If set to @c parts[i].id, will produce debug output when
* ICHECK is set.
* @param cell The cell the particle interacts with
*/
void legacy_interact(struct part *parts, int i, struct cell *root, int monitor,
int *countMultipoles, int *countPairs) {
int j, k;
float r2, dx[3], ir, w;
float a[3] = {0.0, 0.0, 0.0};
double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]};
int pid = parts[i].id;
struct cell *cell = root;
/* Traverse the cells of the tree. */
while (cell != NULL) {
/* Are we in a leaf ? */
if (!cell->split) {
/* Interact the particle with the particles in the leaf */
for (j = 0; j < cell->count; ++j) {
if (cell->parts[j].id == pid) continue;
/* Compute the pairwise distance. */
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = cell->parts[j].x[k] - pix[k];
r2 += dx[k] * dx[k];
}
/* Apply the gravitational acceleration. */
ir = 1.0f / sqrtf(r2);
w = cell->parts[j].mass * const_G * ir * ir * ir;
for (k = 0; k < 3; k++) a[k] += w * dx[k];
(*countPairs)++;
#if ICHECK >= 0
if (pid == monitor)
message("[BH_] Interaction with particle id= %d a=( %e %e %e ) h= %f "
"loc=( %e %e %e )\n",
cell->parts[j].id, w * dx[0], w * dx[1], w * dx[2], cell->h,
cell->loc[0], cell->loc[1], cell->loc[2]);
#endif
}
cell = cell->sibling;
} else {
/* We are in a node */
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = cell->legacy.com[k] - pix[k];
r2 += dx[k] * dx[k];
}
#if ICHECK >= 0
if (pid == monitor)
message("This is a node with %d particles h= %f. r= %f theta= %f",
cell->count, cell->h, sqrt(r2), dist_min);
#endif
/* Is the cell far enough ? */
if (dist_min * dist_min * r2 < cell->h * cell->h) {
#if ICHECK >= 0
if (pid == monitor) printf("Recursing...\n");
#endif
cell = cell->firstchild;
continue;
}
#if ICHECK >= 0
if (pid == monitor)
message("[BH_] Can interact with the monopole. x= %f %f %f m= %f h= %f",
cell->legacy.com[0], cell->legacy.com[1], cell->legacy.com[2],
cell->legacy.mass, cell->h);
#endif
/* Apply the gravitational acceleration. */
ir = 1.0f / sqrtf(r2);
w = cell->legacy.mass * const_G * ir * ir * ir;
for (k = 0; k < 3; k++) a[k] += w * dx[k];
(*countMultipoles)++;
/* Move to the next node */
cell = cell->sibling;
}
}
/* Store the locally computed acceleration back to the particle. */
for (k = 0; k < 3; k++) parts[i].a_legacy[k] += a[k];
}
/**
* @brief Does a tree walk as in the B-H original work for all particles
*
* @param N The number of particles
* @param parts The array of particles
* @param root The root cell of the tree
* @param monitor ID of the particle to monitor and output interactions to
* stdout
*/
void legacy_tree_walk(int N, struct part *parts, struct cell *root, int monitor,
int *countMultipoles, int *countPairs, int *countCoMs) {
int i;
/* Compute multipoles (recursively) */
legacy_comp_com(root, countCoMs);
//#pragma omp parallel for
for (i = 0; i < N; ++i) {
if (parts[i].id == monitor)
message("tree walk for particle %d x= %f %f %f", parts[i].id,
parts[i].x[0], parts[i].x[1], parts[i].x[2]);
legacy_interact(parts, i, root, monitor, countMultipoles, countPairs);
if (parts[i].id == monitor)
message("[check] legacy acceleration for particle %d a= %.3e %.3e %.3e",
parts[i].id, parts[i].a_legacy[0], parts[i].a_legacy[1],
parts[i].a_legacy[2]);
}
}
/* -------------------------------------------------------------------------- */
/* Exact interaction */
/* -------------------------------------------------------------------------- */
/**
* @brief Solve the particle interactions using the stupid N^2 algorithm
*
* @param N The number of particles
* @param parts The array of particles
*/
void interact_exact(int N, struct part *parts, int monitor) {
int i, j, k;
float ir, w, r2, dx[3];
/* Loop over all particles. */
for (i = 0; i < N; ++i) {
/* Some things to store locally. */
double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]};
float mi = parts[i].mass;
/* Loop over every other particle. */
for (j = i + 1; j < N; ++j) {
/* Compute the pairwise distance. */
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = parts[j].x[k] - pix[k];
r2 += dx[k] * dx[k];
}
/* Apply the gravitational acceleration. */
ir = 1.0f / sqrtf(r2);
w = const_G * ir * ir * ir;
for (k = 0; k < 3; k++) {
float wdx = w * dx[k];
parts[j].a_exact[k] -= wdx * mi;
parts[i].a_exact[k] += wdx * parts[j].mass;
}
}
}
#if ICHECK >= 0
for (i = 0; i < N; ++i)
if (parts[i].id == monitor)
message("[check] exact acceleration for particle %d a= %.3e %.3e %.3e\n",
parts[i].id, parts[i].a_exact[0], parts[i].a_exact[1],
parts[i].a_exact[2]);
#endif
}
/**
* @brief Set up and run a task-based Barnes-Hutt N-body solver.
*
* @param N The number of random particles to use.
* @param nr_threads Number of threads to use.
* @param runs Number of force evaluations to use as a benchmark.
* @param fileName Input file name. If @c NULL or an empty string, random
* particle positions will be used.
*/
void test_bh(int N, int nr_threads, int runs, char *fileName) {
int i, k;
struct cell *root;
struct part *parts;
FILE *file;
struct qsched s;
ticks tic, toc_run, tot_setup = 0, tot_run = 0;
int countMultipoles = 0, countPairs = 0, countCoMs = 0;
/* Runner function. */
void runner(int type, void * data) {
ticks tic = getticks();
/* Decode the data. */
struct cell **d = (struct cell **)data;
/* Decode and execute the task. */
switch (type) {
case task_type_self:
iact_self_direct(d[0]);
break;
case task_type_pair:
iact_pair(d[0], d[1]);
break;
case task_type_pair_direct:
iact_pair_direct(d[0], d[1]);
break;
case task_type_self_pc:
init_multipole_walk(d[0], d[1]);
break;
case task_type_com:
comp_com(d[0]);
break;
default:
error("Unknown task type.");
}
atomic_add(&task_timers[type], getticks() - tic);
}
/* Initialize the per-task type timers. */
for (k = 0; k < task_type_count; k++) task_timers[k] = 0;
/* Initialize the scheduler. */
qsched_init(&s, nr_threads, qsched_flag_noreown);
/* Init and fill the particle array. */
if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL)
error("Failed to allocate particle buffer.");
/* If no input file was specified, generate random particle positions. */
if (fileName == NULL || fileName[0] == 0) {
for (k = 0; k < N; k++) {
parts[k].id = k;
parts[k].x[0] = ((double)rand()) / RAND_MAX;
parts[k].x[1] = ((double)rand()) / RAND_MAX;
parts[k].x[2] = ((double)rand()) / RAND_MAX;
parts[k].mass = ((double)rand()) / RAND_MAX;
parts[k].a_legacy[0] = 0.0;
parts[k].a_legacy[1] = 0.0;
parts[k].a_legacy[2] = 0.0;
}
/* Otherwise, read them from a file. */
} else {
file = fopen(fileName, "r");
if (file) {
for (k = 0; k < N; k++) {
if (fscanf(file, "%d", &parts[k].id) != 1)
error("Failed to read ID of part %i.", k);
if (fscanf(file, "%lf%lf%lf", &parts[k].x[0], &parts[k].x[1],
&parts[k].x[2]) !=
3)
error("Failed to read position of part %i.", k);
if (fscanf(file, "%f", &parts[k].mass) != 1)
error("Failed to read mass of part %i.", k);
}
fclose(file);
}
}
/* Init the cells. */
root = cell_get();
root->loc[0] = 0.0;
root->loc[1] = 0.0;
root->loc[2] = 0.0;
root->h = 1.0;
root->count = N;
root->parts = parts;
cell_split(root, &s);
/* Iterate over the cells and get the average number of particles
per leaf. */
struct cell *c = root;
int nr_leaves = 0;
while (c != NULL) {
if (!c->split) {
nr_leaves++;
c = c->sibling;
} else {
c = c->firstchild;
}
}
message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves);
#if ICHECK > 0
printf("----------------------------------------------------------\n");
/* Do a N^2 interactions calculation */
ticks tic_exact = getticks();
interact_exact(N, parts, ICHECK);
ticks toc_exact = getticks();
printf("Exact calculation (1 thread) took %lli (= %e) ticks\n",
toc_exact - tic_exact, (float)(toc_exact - tic_exact));
printf("----------------------------------------------------------\n");
#endif
/* Create the tasks. */
tic = getticks();
create_tasks(&s, root, NULL);
tot_setup += getticks() - tic;
/* Dump the number of tasks. */
message("total nr of tasks: %i.", s.count);
message("total nr of deps: %i.", s.count_deps);
message("total nr of res: %i.", s.count_res);
message("total nr of locks: %i.", s.count_locks);
message("total nr of uses: %i.", s.count_uses);
int counts[task_type_count];
for (k = 0; k < task_type_count; k++) counts[k] = 0;
for (k = 0; k < s.count; k++) counts[s.tasks[k].type] += 1;
// char buffer[200];
// sprintf(buffer, "timings_legacy_%d_%d.dat", cell_maxparts, nr_threads);
// FILE *fileTime = fopen(buffer, "w");
/* Loop over the number of runs. */
for (k = 0; k < 0 /* runs */; k++) {
countMultipoles = 0;
countPairs = 0;
countCoMs = 0;
/* Execute the legacy walk. */
tic = getticks();
legacy_tree_walk(N, parts, root, ICHECK, &countMultipoles, &countPairs,
&countCoMs);
toc_run = getticks();
/* Dump some timings. */
message("%ith legacy run took %lli (= %e) ticks...", k, toc_run - tic,
(float)(toc_run - tic));
tot_run += toc_run - tic;
// fprintf(fileTime, "%lli %e\n", toc_run - tic, (float)(toc_run - tic));
}
// fclose(fileTime);
#if ICHECK >= 0
message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK,
root->parts[ICHECK].a[0], root->parts[ICHECK].a[1],
root->parts[ICHECK].a[2]);
#endif
printf("task counts: [ %8s %8s %8s %8s %8s ]\n", "self", "pair", "m-poles",
"direct", "CoMs");
printf("task counts: [ %8i %8i %8i %8i %8i ] (legacy).\n", 0, 0,
countMultipoles, countPairs, countCoMs);
printf("task counts: [ ");
for (k = 0; k < task_type_count; k++) printf("%8i ", counts[k]);
printf("] (new).\n");
/* Loop over the number of runs. */
for (k = 0; k < runs; k++) {
for (i = 0; i < N; ++i) {
parts[i].a[0] = 0.0;
parts[i].a[1] = 0.0;
parts[i].a[2] = 0.0;
}
/* struct cell *finger = root;
while (finger != NULL) {
finger->sorted = 0;
if (finger->split)
finger = finger->firstchild;
else
finger = finger->sibling;
} */
/* Execute the tasks. */
tic = getticks();
qsched_run(&s, nr_threads, runner);
toc_run = getticks();
message("%ith run took %lli (= %e) ticks...", k, toc_run - tic,
(float)(toc_run - tic));
tot_run += toc_run - tic;
}
message("[check] root mass= %f %f", root->legacy.mass, root->new.mass);
message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]);
message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]);
message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]);
#if ICHECK >= 0
for (i = 0; i < N; ++i)
if (root->parts[i].id == ICHECK)
message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK,
root->parts[i].a[0], root->parts[i].a[1], root->parts[i].a[2]);
#endif
/* Dump the tasks. */
/* for ( k = 0 ; k < s.count ; k++ )
printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid ,
s.tasks[k].tic , s.tasks[k].toc ); */
/* Dump the costs. */
message("costs: setup=%lli ticks, run=%lli ticks.", tot_setup,
tot_run / runs);
/* Dump the timers. */
for (k = 0; k < qsched_timer_count; k++)
message("timer %s is %lli ticks.", qsched_timer_names[k],
s.timers[k] / runs);
/* Dump the per-task type timers. */
printf("task timers: [ ");
for (k = 0; k < task_type_count; k++) printf("%lli ", task_timers[k] / runs);
printf("] ticks.\n");
/* Dump the particles to a file */
file = fopen("particle_dump.dat", "w");
fprintf(file,
"# ID m x y z a_exact.x a_exact.y a_exact.z a_legacy.x "
"a_legacy.y a_legacy.z a_new.x a_new.y a_new.z d\n");
for (k = 0; k < N; ++k)
fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e %e %e\n", parts[k].id,
parts[k].mass, parts[k].x[0], parts[k].x[1], parts[k].x[2],
parts[k].a_exact[0], parts[k].a_exact[1], parts[k].a_exact[2],
parts[k].a_legacy[0], parts[k].a_legacy[1], parts[k].a_legacy[2],
parts[k].a[0], parts[k].a[1], parts[k].a[2], parts[k].d);
fclose(file);
/* Clean up. */
qsched_free(&s);
free(parts);
}
/* All 26 configurations for the cell tests */
const float cell_shift[27 * 3] = {1.0, 1.0, 1.0, /* 0 */
1.0, 1.0, 0.0, /* 1 */
1.0, 1.0, -1.0, /* 2 */
1.0, 0.0, 1.0, /* 3 */
1.0, 0.0, 0.0, /* 4 */
1.0, 0.0, -1.0, /* 5 */
1.0, -1.0, 1.0, /* 6 */
1.0, -1.0, 0.0, /* 7 */
1.0, -1.0, -1.0, /* 8 */
0.0, 1.0, 1.0, /* 9 */
0.0, 1.0, 0.0, /* 10 */
0.0, 1.0, -1.0, /* 11 */
0.0, 0.0, 1.0, /* 12 */
-1.0, -1.0, -1.0, /* 13 */
-1.0, -1.0, 0.0, /* 14 */
-1.0, -1.0, 1.0, /* 15 */
-1.0, 0.0, -1.0, /* 16 */
-1.0, 0.0, 0.0, /* 17 */
-1.0, 0.0, 1.0, /* 18 */
-1.0, 1.0, -1.0, /* 19 */
-1.0, 1.0, 0.0, /* 20 */
-1.0, 1.0, 1.0, /* 21 */
0.0, -1.0, -1.0, /* 22 */
0.0, -1.0, 0.0, /* 23 */
0.0, -1.0, 1.0, /* 24 */
0.0, 0.0, -1.0, /* 25 */
0.0, 0.0,
0.0 /* 26 */ /* <-- The cell itself */
};
/**
* @brief Creates two neighbouring cells wiht N_parts per celland makes them
* interact using both the
* sorted and unsortde interactions. Outputs then the tow sets of accelerations
* for accuracy tests.
* @param N_parts Number of particles in each cell
* @param orientation Orientation of the cells ( 0 <= orientation < 26 )
*/
void test_direct_neighbour(int N_parts, int orientation) {
int k;
struct part *parts;
struct cell left, right;
if (orientation >= 26) error("Wrong orientation !");
/* Select configuration */
float shift[3];
for (k = 0; k < 3; ++k) shift[k] = cell_shift[3 * orientation + k];
/* Init and fill the particle array. */
if ((parts = (struct part *)malloc(sizeof(struct part) * N_parts * 2)) ==
NULL)
error("Failed to allocate particle buffer.");
/* Create random set of particles in both cells */
for (k = 0; k < 2 * N_parts; k++) {
parts[k].id = k;
parts[k].x[0] = 1*((double)rand()) / RAND_MAX;
parts[k].x[1] = 1*((double)rand()) / RAND_MAX;
parts[k].x[2] = 1*((double)rand()) / RAND_MAX;
/* Shift the second cell */
if (k >= N_parts) {
parts[k].x[0] += shift[0];
parts[k].x[1] += shift[1];
parts[k].x[2] += shift[2];
}
parts[k].mass = 1.;//((double)rand()) / RAND_MAX;
parts[k].a[0] = 0.0;
parts[k].a[1] = 0.0;
parts[k].a[2] = 0.0;
}
/* Get the cell geometry right */
left.loc[0] = 0.;
left.loc[1] = 0.;
left.loc[2] = 0.;
left.h = 1.;
right.loc[0] = 1*shift[0];
right.loc[1] = 1*shift[1];
right.loc[2] = 1*shift[2];
right.h = 1.;
/* Put the particles in the cell */
left.parts = parts;
left.count = N_parts;
right.parts = parts + N_parts;
right.count = N_parts;
/* Get the linked list right (functions should not recurse but hey...) */
left.firstchild = NULL;
left.sibling = NULL;
left.split = 0;
right.firstchild = NULL;
right.sibling = NULL;
right.split = 0;
/* Get the multipoles right (should also be useless) */
comp_com(&left);
comp_com(&right);
/* Nothing has been sorted yet */
left.indices = NULL;
left.sorted = 0;
right.indices = NULL;
right.sorted = 0;
#ifdef COUNTERS
count_direct_unsorted = 0;
count_direct_sorted_pp = 0;
count_direct_sorted_pm_i = 0;
count_direct_sorted_pm_j = 0;
#endif
/* Do the interactions without sorting */
iact_pair_direct_unsorted(&left, &right);
message("Unsorted interactions done ");
/* Store accelerations */
for (k = 0; k < 2 * N_parts; k++) {
parts[k].a_exact[0] = parts[k].a[0];
parts[k].a_exact[1] = parts[k].a[1];
parts[k].a_exact[2] = parts[k].a[2];
parts[k].a[0] = 0.0;
parts[k].a[1] = 0.0;
parts[k].a[2] = 0.0;
}
/* Do the interactions with sorting */
iact_pair_direct(&left, &right);
message("Sorted interactions done ");
for (k = 0; k < 2 * N_parts; ++k) {
if (parts[k].id == ICHECK) {
message("Accel exact : [ %e %e %e ]", parts[k].a_exact[0],
parts[k].a_exact[1], parts[k].a_exact[2]);
message("Accel sorted: [ %e %e %e ]", parts[k].a[0], parts[k].a[1],
parts[k].a[2]);
}
}
/* Position along the axis */
double *position, *ortho_dist;
if ((position = (double *)malloc(sizeof(double) * N_parts * 2)) == NULL)
error("Failed to allocate position buffer.");
if ((ortho_dist = (double *)malloc(sizeof(double) * N_parts * 2)) == NULL)
error("Failed to allocate orthogonal distance buffer.");
float w = 1.0f / sqrtf(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]);
shift[0] *= w;
shift[1] *= w;
shift[2] *= w;
for (k = 0; k < 2 * N_parts; ++k) {
float dx = parts[k].x[0] * shift[0];
float dy = parts[k].x[1] * shift[1];
float dz = parts[k].x[2] * shift[2];
position[k] = dx + dy + dz;
dx = (parts[k].x[1] - 0.5f)*shift[2] - (parts[k].x[2] - 0.5f)*shift[1];
dy = (parts[k].x[2] - 0.5f)*shift[0] - (parts[k].x[0] - 0.5f)*shift[2];
dz = (parts[k].x[0] - 0.5f)*shift[1] - (parts[k].x[1] - 0.5f)*shift[0];
ortho_dist[k] = sqrtf(dx*dx + dy*dy + dz*dz);
}
/* Now, output everything */
char fileName[100];
sprintf(fileName, "interaction_dump_%d.dat", orientation);
message("Writing file '%s'", fileName);
FILE *file = fopen(fileName, "w");
fprintf(file,
"# ID m r x y z a_u.x a_u.y a_u.z a_s.x a_s.y a_s.z ortho d\n");
for (k = 0; k < 2 * N_parts; k++) {
fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e %e %e %e\n", parts[k].id,
parts[k].mass, position[k], parts[k].x[0], parts[k].x[1],
parts[k].x[2], parts[k].a_exact[0], parts[k].a_exact[1],
parts[k].a_exact[2], parts[k].a[0], parts[k].a[1], parts[k].a[2],
ortho_dist[k], parts[k].d, parts[k].d_min, parts[k].d_max);
}
fclose(file);
#ifdef COUNTERS
message("Unsorted intereactions: %d", count_direct_unsorted);
message("Sorted intereactions PP: %d", count_direct_sorted_pp);
message("Sorted intereactions PM (part i): %d", count_direct_sorted_pm_i);
message("Sorted intereactions PM (part j): %d", count_direct_sorted_pm_j);
message("Sorted intereactions total: %d",
count_direct_sorted_pm_j + count_direct_sorted_pm_i +
count_direct_sorted_pp);
// message( "%f %d %d %d %d\n", dist_cutoff_ratio, count_direct_unsorted,
// count_direct_sorted_pp, count_direct_sorted_pm_i, count_direct_sorted_pm_j );
#endif
/* Clean up */
free(parts);
}
/**
* @brief Creates 27 cells and makes the particles in the central one
* interact with the other cells
* using both the sorted and unsorted interactions
* Outputs then the two sets of accelerations for accuracy tests.
* @param N_parts Number of particles in each cell
* @param N_test Number of times the test has to be run
*/
void test_all_direct_neighbours(int N_parts, int N_test) {
int i, k, j;
struct part *parts;
struct cell cells[27];
/* Init and fill the particle array. */
if ((parts = (struct part *)malloc(sizeof(struct part) * N_parts * 27)) ==
NULL)
error("Failed to allocate particle buffer.");
#ifdef COUNTERS
count_direct_unsorted = 0;
count_direct_sorted_pp = 0;
count_direct_sorted_pm_i = 0;
count_direct_sorted_pm_j = 0;
#endif
int countMultipoles = 0;
int countPairs = 0;
int countCoMs = 0;
/* Loop over the number of tests */
for (i = 0; i < N_test; ++i) {
/* Loop over the 27 cells */
for (j = 0; j < 27; ++j) {
float shift[3];
for (k = 0; k < 3; ++k) shift[k] = cell_shift[3 * j + k];
/* Create random set of particles in all cells */
for (k = 0; k < N_parts; k++) {
parts[j * N_parts + k].id = j * N_parts + k;
parts[j * N_parts + k].x[0] = ((double)rand()) / RAND_MAX + shift[0];
parts[j * N_parts + k].x[1] = ((double)rand()) / RAND_MAX + shift[1];
parts[j * N_parts + k].x[2] = ((double)rand()) / RAND_MAX + shift[2];
parts[j * N_parts + k].mass = 1.;//((double)rand()) / RAND_MAX;
parts[j * N_parts + k].a[0] = 0.0;
parts[j * N_parts + k].a[1] = 0.0;
parts[j * N_parts + k].a[2] = 0.0;
parts[j * N_parts + k].a_exact[0] = 0.0;
parts[j * N_parts + k].a_exact[1] = 0.0;
parts[j * N_parts + k].a_exact[2] = 0.0;
parts[j * N_parts + k].a_legacy[0] = 0.0;
parts[j * N_parts + k].a_legacy[1] = 0.0;
parts[j * N_parts + k].a_legacy[2] = 0.0;
}
/* Get the cell geometry right */
cells[j].loc[0] = shift[0];
cells[j].loc[1] = shift[1];
cells[j].loc[2] = shift[2];
cells[j].h = 1.;
/* Put the particles in the cell */
cells[j].parts = parts + N_parts * j;
cells[j].count = N_parts;
/* Get the linked list right (functions should not recurse but hey...) */
cells[j].firstchild = NULL;
cells[j].sibling = NULL;
cells[j].split = 0;
/* Get the multipoles right (should also be useless) */
comp_com(&cells[j]);
/* Nothing has been sorted yet */
cells[j].indices = NULL;
cells[j].sorted = 0;
}
/* Do the interactions without sorting */
for (j = 0; j < 26; ++j) iact_pair_direct_unsorted(&cells[26], &cells[j]);
iact_self_direct(&cells[26]);
// message("Unsorted interactions done ");
/* Store accelerations */
for (k = 0; k < 27 * N_parts; k++) {
parts[k].a_exact[0] = parts[k].a[0];
parts[k].a_exact[1] = parts[k].a[1];
parts[k].a_exact[2] = parts[k].a[2];
parts[k].a[0] = 0.0;
parts[k].a[1] = 0.0;
parts[k].a[2] = 0.0;
}
/* Do the interactions with sorting */
for (j = 0; j < 26; ++j) iact_pair_direct(&cells[26], &cells[j]);
iact_self_direct(&cells[26]);
// message("Sorted interactions done ");
/* Now, do a B-H calculation on the same set of particles */
struct cell *root = cell_get();
root->loc[0] = -1.0;
root->loc[1] = -1.0;
root->loc[2] = -1.0;
root->h = 3.0;
root->count = 27 * N_parts;
root->parts = parts;
struct qsched s;
qsched_init(&s, 1, qsched_flag_noreown);
cell_split(root, &s);
legacy_tree_walk(27 * N_parts, parts, root, ICHECK, &countMultipoles,
&countPairs, &countCoMs);
// free(root);
//++countCoMs;
/* Now, output everything */
char fileName[100];
sprintf(fileName, "interaction_dump_all.dat");
// message("Writing file '%s'", fileName);
FILE *file = fopen(fileName, (i == 0 ? "w" : "a"));
if (i == 0)
fprintf(file,
"# ID m x y z a_u.x a_u.y a_u.z a_s.x a_s.y a_s.z"
" a_bh.x a_bh.y a_bh.z\n");
for (k = 0; k < 27 * N_parts; k++) {
if (parts[k].id >= 26 * N_parts)
fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e %e\n",
parts[k].id, parts[k].mass, parts[k].x[0], parts[k].x[1],
parts[k].x[2], parts[k].a_exact[0], parts[k].a_exact[1],
parts[k].a_exact[2], parts[k].a[0], parts[k].a[1],
parts[k].a[2], parts[k].a_legacy[0], parts[k].a_legacy[1],
parts[k].a_legacy[2]);
}
fclose(file);
}
#ifdef COUNTERS
message("Unsorted intereactions: %d", count_direct_unsorted);
message("B-H intereactions: PP %d", countPairs);
message("B-H intereactions: PM %d", countMultipoles);
message("B-H intereactions: total %d", countPairs + countMultipoles);
message("Sorted intereactions PP: %d", count_direct_sorted_pp);
message("Sorted intereactions PM (part i): %d", count_direct_sorted_pm_i);
message("Sorted intereactions PM (part j): %d", count_direct_sorted_pm_j);
message("Sorted intereactions total: %d",
count_direct_sorted_pm_j + count_direct_sorted_pm_i +
count_direct_sorted_pp);
// message( "%f %d %d %d %d\n", dist_cutoff_ratio, count_direct_unsorted,
// count_direct_sorted_pp, count_direct_sorted_pm_i, count_direct_sorted_pm_j );
#endif
/* Clean up */
free(parts);
}
/**
* @brief Main function.
*/
int main(int argc, char *argv[]) {
int c, k, nr_threads;
int N = 1000, runs = 1;
char fileName[100] = {0};
int N_parts = 0;
int N_iterations = 0;
/* Die on FP-exceptions. */
feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
srand(0);
/* Get the number of threads. */
#pragma omp parallel shared(nr_threads)
{
if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads();
}
/* Parse the options */
while ((c = getopt(argc, argv, "n:r:t:f:c:i:")) != -1) switch (c) {
case 'n':
if (sscanf(optarg, "%d", &N) != 1)
error("Error parsing number of particles.");
break;
case 'r':
if (sscanf(optarg, "%d", &runs) != 1)
error("Error parsing number of runs.");
break;
case 't':
if (sscanf(optarg, "%d", &nr_threads) != 1)
error("Error parsing number of threads.");
omp_set_num_threads(nr_threads);
break;
case 'f':
if (sscanf(optarg, "%s", &fileName[0]) != 1)
error("Error parsing file name.");
break;
case 'c':
if (sscanf(optarg, "%d", &N_parts) != 1)
error(
"Error parsing number of particles in neighbouring cells test.");
break;
case 'i':
if (sscanf(optarg, "%d", &N_iterations) != 1)
error(
"Error parsing number of particles in neighbouring cells test.");
break;
case '?':
fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] "
"[-c Nparts] [-i Niterations] \n",
argv[0]);
fprintf(stderr, "Solves the N-body problem using a Barnes-Hut\n"
"tree code with N random particles read from a file in "
"[0,1]^3 using"
"nr_threads threads.\n"
"A test of the neighbouring cells interaction with "
"Nparts per cell is also run Niterations times.\n");
exit(EXIT_FAILURE);
}
/* Tree node information */
printf("Size of cell: %zu bytes.\n", sizeof(struct cell));
/* Part information */
printf("Size of part: %zu bytes.\n", sizeof(struct part));
/* Run the neighbour direct integration test */
if (N_parts > 0) {
/* Dump arguments */
message("Interacting 2 neighbouring cells with %d particles per cell",
N_parts);
/* Run the test */
for (k = 0; k < 26; ++k) test_direct_neighbour(N_parts, k);
//test_direct_neighbour(N_parts, 0);
//test_direct_neighbour(N_parts, 1);
//test_direct_neighbour(N_parts, 4);
k++;
/* Dump arguments */
message("Interacting one cell with %d particles with its 27 neighbours"
" %d times to get the statistics.",
N_parts, N_iterations);
test_all_direct_neighbours(N_parts, N_iterations);
} else {
/* Dump arguments. */
if (fileName[0] == 0) {
message("Computing the N-body problem over %i random particles using %i "
"threads (%i runs).",
N, nr_threads, runs);
} else {
message("Computing the N-body problem over %i particles read from '%s' "
"using %i threads (%i runs).",
N, fileName, nr_threads, runs);
}
/* Run the BH test. */
test_bh(N, nr_threads, runs, fileName);
}
return 0;
}