diff --git a/examples/test_fmm_sorted.c b/examples/test_fmm.c similarity index 51% rename from examples/test_fmm_sorted.c rename to examples/test_fmm.c index ec33c0d53f412d723f93c24563d7cd0af07afc27..906fcfd4b883399677b8a7e81c59815a2bd9e734 100644 --- a/examples/test_fmm_sorted.c +++ b/examples/test_fmm.c @@ -37,37 +37,36 @@ /* Some local constants. */ #define cell_pool_grow 1000 -#define cell_maxparts 1 +#define cell_maxparts 100 #define task_limit 1e8 #define const_G 1 // 6.6738e-8 #define dist_min 0.5 /* Used for legacy walk only */ #define dist_cutoff_ratio 1.5 -#define iact_pair_direct iact_pair_direct_sorted_multipole + #define ICHECK -1 #define NO_SANITY_CHECKS #define NO_COM_AS_TASK -//#define COUNTERS - +#define NO_COUNTERS /** Data structure for the particles. */ struct part { double x[3]; - // union { - float a[3]; - float a_legacy[3]; - float a_exact[3]; - // }; + union { + float a[3]; + float a_legacy[3]; + float a_exact[3]; + }; float mass; int id; -}; // __attribute__((aligned(64))); +}; // __attribute__((aligned(32))); -/** Data structure for the sorted particle positions. */ -struct index { - int ind; - float d; -}; +struct part_local { + float x[3]; + float a[3]; + float mass; +} __attribute__((aligned(32))); -struct monopole { +struct multipole { double com[3]; float mass; }; @@ -89,10 +88,10 @@ struct cell { union { /* Information for the legacy walk */ - struct monopole legacy; + struct multipole legacy; /* Information for the QuickShed walk */ - struct monopole new; + struct multipole new; }; int res, com_tid; @@ -104,116 +103,11 @@ struct cell { enum task_type { task_type_self = 0, task_type_pair, - //task_type_self_pc, - task_type_pair_direct, + task_type_self_pc, task_type_com, task_type_count }; - - - -/** Multipole stuff. */ -struct multipole { - float mass; - float mu[3]; /* Centre of Mass */ - float sigma_00, sigma_11, sigma_22, sigma_01, sigma_02, sigma_12; -}; - -static inline void multipole_reset(struct multipole *d) { - d->mu[0] = 0.0; - d->mu[1] = 0.0; - d->mu[2] = 0.0; - - d->sigma_00 = 0.0; - d->sigma_11 = 0.0; - d->sigma_22 = 0.0; - d->sigma_01 = 0.0; - d->sigma_02 = 0.0; - d->sigma_12 = 0.0; - - d->mass = 0.0; -} - -static inline void multipole_add(struct multipole *d, const float *x, float mass) { - d->mu[0] += x[0] * mass; - d->mu[1] += x[1] * mass; - d->mu[2] += x[2] * mass; - - d->sigma_00 += mass * x[0] * x[0]; - d->sigma_11 += mass * x[1] * x[1]; - d->sigma_22 += mass * x[2] * x[2]; - d->sigma_01 += mass * x[0] * x[1]; - d->sigma_02 += mass * x[0] * x[2]; - d->sigma_12 += mass * x[1] * x[2]; - - d->mass += mass; -} - - -static inline void multipole_iact(struct multipole *d, const float *x, float* a) { - - int k; - float r2,ir,w, dx[3]; - - /* early abort */ - if (d->mass == 0.) return; - - float inv_mass = 1. / d->mass; - - /* Temporary quantity */ - dx[0] = d->sigma_00 - inv_mass*d->mu[0]*d->mu[0]; /* Abusing a so far unused */ - dx[1] = d->sigma_11 - inv_mass*d->mu[1]*d->mu[1]; /* variable temporarily... */ - dx[2] = d->sigma_22 - inv_mass*d->mu[2]*d->mu[2]; - - /* Construct quadrupole matrix */ - float Q_00 = 2.0f*dx[0] - dx[1] - dx[2]; - float Q_11 = 2.0f*dx[1] - dx[0] - dx[2]; - float Q_22 = Q_00 + Q_11; /* Q_ij is traceless */ - Q_22 *= -1.0f; - - float Q_01 = d->sigma_01 - inv_mass*d->mu[0]*d->mu[1]; - float Q_02 = d->sigma_02 - inv_mass*d->mu[0]*d->mu[2]; - float Q_12 = d->sigma_12 - inv_mass*d->mu[1]*d->mu[2]; - Q_01 *= 3.0f; - Q_02 *= 3.0f; - Q_12 *= 3.0f; - - /* Compute the distance to the CoM. */ - for (r2 = 0.0f, k = 0; k < 3; k++) { - dx[k] = x[k] - d->mu[k] * inv_mass; - r2 += dx[k] * dx[k]; - } - ir = 1.0f / sqrtf(r2); - - /* Compute the acceleration from the monopole */ - w = const_G * ir * ir * ir * d->mass; - for (k = 0; k < 3; k++) a[k] -= w * dx[k]; - - /* Compute some helpful terms */ - float w1 = const_G * ir * ir * ir * ir * ir; - float w2 = -2.5f * const_G * ir * ir * ir * ir * ir * ir * ir; - float xi = 0.; - - xi += dx[0] * dx[0] * Q_00; - xi += dx[1] * dx[1] * Q_11; - xi += dx[2] * dx[2] * Q_22; - xi += 2.f * dx[0] * dx[1] * Q_01; - xi += 2.f * dx[0] * dx[2] * Q_02; - xi += 2.f * dx[1] * dx[2] * Q_12; - - /* Compute the acceleration from the quadrupole */ - a[0] += w2 * xi * dx[0] + w1 * (dx[0]*Q_00 + dx[1]*Q_01 + dx[2]*Q_02); - a[1] += w2 * xi * dx[1] + w1 * (dx[0]*Q_01 + dx[1]*Q_11 + dx[2]*Q_12); - a[2] += w2 * xi * dx[2] + w1 * (dx[0]*Q_02 + dx[1]*Q_12 + dx[2]*Q_22); - -} - - - - - - #ifdef COUNTERS int count_direct_unsorted; int count_direct_sorted_pp; @@ -227,311 +121,6 @@ ticks task_timers[task_type_count]; /** Global variable for the pool of allocated cells. */ struct cell *cell_pool = NULL; -/** Constants for the sorting axes. */ -const float axis_shift[13 * 3] = { - 5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01, - 7.071067811865475e-01, 7.071067811865475e-01, 0.0, - 5.773502691896258e-01, 5.773502691896258e-01, -5.773502691896258e-01, - 7.071067811865475e-01, 0.0, 7.071067811865475e-01, - 1.0, 0.0, 0.0, - 7.071067811865475e-01, 0.0, -7.071067811865475e-01, - 5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01, - 7.071067811865475e-01, -7.071067811865475e-01, 0.0, - 5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01, - 0.0, 7.071067811865475e-01, 7.071067811865475e-01, - 0.0, 1.0, 0.0, - 0.0, 7.071067811865475e-01, -7.071067811865475e-01, - 0.0, 0.0, 1.0, -}; - -const char axis_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -/* Map shift vector to sortlist. */ -const int axis_sid[27] = { - /* ( -1 , -1 , -1 ) */ 0, - /* ( -1 , -1 , 0 ) */ 1, - /* ( -1 , -1 , 1 ) */ 2, - /* ( -1 , 0 , -1 ) */ 3, - /* ( -1 , 0 , 0 ) */ 4, - /* ( -1 , 0 , 1 ) */ 5, - /* ( -1 , 1 , -1 ) */ 6, - /* ( -1 , 1 , 0 ) */ 7, - /* ( -1 , 1 , 1 ) */ 8, - /* ( 0 , -1 , -1 ) */ 9, - /* ( 0 , -1 , 0 ) */ 10, - /* ( 0 , -1 , 1 ) */ 11, - /* ( 0 , 0 , -1 ) */ 12, - /* ( 0 , 0 , 0 ) */ 0, - /* ( 0 , 0 , 1 ) */ 12, - /* ( 0 , 1 , -1 ) */ 11, - /* ( 0 , 1 , 0 ) */ 10, - /* ( 0 , 1 , 1 ) */ 9, - /* ( 1 , -1 , -1 ) */ 8, - /* ( 1 , -1 , 0 ) */ 7, - /* ( 1 , -1 , 1 ) */ 6, - /* ( 1 , 0 , -1 ) */ 5, - /* ( 1 , 0 , 0 ) */ 4, - /* ( 1 , 0 , 1 ) */ 3, - /* ( 1 , 1 , -1 ) */ 2, - /* ( 1 , 1 , 0 ) */ 1, - /* ( 1 , 1 , 1 ) */ 0 -}; - -/* Some forward declarations. */ -void comp_com(struct cell *c); - -/** - * @brief Sort the entries in ascending order using QuickSort. - * - * @param sort The indices - * @param N The number of entries. - */ -void indices_sort(struct index *sort, int N) { - - struct { - short int lo, hi; - } qstack[10]; - int qpos, i, j, lo, hi, imin; - struct index temp; - float pivot; - - /* Sort parts in cell_i in decreasing order with quicksort */ - qstack[0].lo = 0; - qstack[0].hi = N - 1; - qpos = 0; - while (qpos >= 0) { - lo = qstack[qpos].lo; - hi = qstack[qpos].hi; - qpos -= 1; - if (hi - lo < 15) { - for (i = lo; i < hi; i++) { - imin = i; - for (j = i + 1; j <= hi; j++) - if (sort[j].d < sort[imin].d) imin = j; - if (imin != i) { - temp = sort[imin]; - sort[imin] = sort[i]; - sort[i] = temp; - } - } - } else { - pivot = sort[(lo + hi) / 2].d; - i = lo; - j = hi; - while (i <= j) { - while (sort[i].d < pivot) i++; - while (sort[j].d > pivot) j--; - if (i <= j) { - if (i < j) { - temp = sort[i]; - sort[i] = sort[j]; - sort[j] = temp; - } - i += 1; - j -= 1; - } - } - if (j > (lo + hi) / 2) { - if (lo < j) { - qpos += 1; - qstack[qpos].lo = lo; - qstack[qpos].hi = j; - } - if (i < hi) { - qpos += 1; - qstack[qpos].lo = i; - qstack[qpos].hi = hi; - } - } else { - if (i < hi) { - qpos += 1; - qstack[qpos].lo = i; - qstack[qpos].hi = hi; - } - if (lo < j) { - qpos += 1; - qstack[qpos].lo = lo; - qstack[qpos].hi = j; - } - } - } - } -} - -/** - * @brief Sort the particles in the given cell along the given axis. - * - * @param c The #cell. - * @param axis The normalized axis along which to sort. - * @param aid The axis ID at which to store the indices. - */ -void cell_sort(struct cell *c, const float *axis, int aid) { - - /* Has the indices array even been allocated? */ - if (c->indices == NULL) { - if ((c->indices = (struct index *)malloc((c->count + 1) * 13 * - sizeof(struct index))) == - NULL) - error("Failed to allocate cell sorting indices."); - } else if (c->sorted & (1 << aid)) { - return; - } - - /* If this cell has been split, merge from the progeny. */ - if (c->split) { - - /* Heap of pointers to the progeny's indices. */ - struct { - struct index *index; - int offset; - } temp, pindex[8]; - int k = 0; - - /* First, make sure all the progeny have been sorted, and get the pointers - to their first entries. */ - for (struct cell *cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - if (!(cp->sorted & (1 << aid))) cell_sort(cp, axis, aid); - pindex[k].index = &cp->indices[(cp->count + 1) * aid]; - pindex[k].offset = cp->parts - c->parts; - k++; - } - - /* Fill any remaining gaps with the sentinel. */ - c->indices[(c->count + 1) * aid + c->count].d = FLT_MAX; - for (; k < 8; k++) - pindex[k].index = &c->indices[(c->count + 1) * aid + c->count]; - - /* Heapify the pindices. */ - for (k = 3; k >= 0; k--) { - int j = k; - while (j < 4) { - int jj = 2 * j + 1; - if (jj + 1 < 8 && pindex[jj + 1].index->d < pindex[jj].index->d) - jj = jj + 1; - if (pindex[jj].index->d < pindex[j].index->d) { - temp = pindex[jj]; - pindex[jj] = pindex[j]; - pindex[j] = temp; - j = jj; - } else - break; - } - } - - /* Copy the indices into the local index in increasing order. */ - struct index *dest = &c->indices[(c->count + 1) * aid]; - for (int k = 0; k < c->count; k++) { - - /* Copy the top of the heap to the destination. */ - *dest = *pindex[0].index; - dest->ind += pindex[0].offset; - - /* Increase the pointers. */ - dest++; - pindex[0].index++; - - /* Fix the heap. */ - int j = 0; - while (j < 4) { - int jj = 2 * j + 1; - if (jj + 1 < 8 && pindex[jj + 1].index->d < pindex[jj].index->d) - jj = jj + 1; - if (pindex[jj].index->d < pindex[j].index->d) { - temp = pindex[jj]; - pindex[jj] = pindex[j]; - pindex[j] = temp; - j = jj; - } else - break; - } - } - - /* Add a sentinel at the end. */ - dest->ind = -1; - dest->d = FLT_MAX; - - /* Otherwise, just sort the entries. */ - } else { - - /* Fill the indices. */ - struct index *dest = &c->indices[(c->count + 1) * aid]; - for (int k = 0; k < c->count; k++) { - dest[k].ind = k; - dest[k].d = c->parts[k].x[0] * axis[0] + c->parts[k].x[1] * axis[1] + - c->parts[k].x[2] * axis[2]; - } - - /* Sort the indices. */ - indices_sort(&c->indices[(c->count + 1) * aid], c->count); - - /* Set the sentinel on the last entry. */ - dest[c->count].ind = -1; - dest[c->count].d = FLT_MAX; - } - - /* Mark this cell as sorted in the given direction. */ - c->sorted |= (1 << aid); - - /* Verify the sort. */ - /* int offset = (c->count + 1) * aid; - for (int k = 0; k < c->count; k++) - if (c->indices[offset + k].d > c->indices[offset + k + 1].d) - error( "Sorting failed." ); */ -} /* */ - -/** - * @brief Get all the data needed for computing a sorted pair. - * - * @param ci Pointer to a pointer to the first cell. - * @param cj Pointer to a pointer to the second cell. - * @param ind_i Sorted indices of the cell @c ci. - * @param ind_j Sorted indices of the cell @c cj. - */ -void get_axis(struct cell **ci, struct cell **cj, struct index **ind_i, - struct index **ind_j) { - - float axis[3]; - float dx[3]; - int aid = 0; - - /* Get the cell pair separation and the axis index. */ - for (int k = 0; k < 3; k++) { - dx[k] = (*cj)->loc[k] - (*ci)->loc[k]; - aid = 3 * aid + ((dx[k] < 0) ? 0 : (dx[k] > 0) ? 2 : 1); - } - - /* Flip the cells? */ - if (axis_flip[aid]) { - struct cell *temp = *ci; - *ci = *cj; - *cj = temp; - } - aid = axis_sid[aid]; - - /* Copy the shift vector to the output parameter. */ - axis[0] = axis_shift[aid * 3 + 0]; - axis[1] = axis_shift[aid * 3 + 1]; - axis[2] = axis_shift[aid * 3 + 2]; - - /* Make sure the cells are sorted. */ - if (!((*ci)->sorted & (1 << aid))) cell_sort(*ci, axis, aid); - if (!((*cj)->sorted & (1 << aid))) cell_sort(*cj, axis, aid); - - /* Set the indices. */ - *ind_i = &(*ci)->indices[aid * ((*ci)->count + 1)]; - *ind_j = &(*cj)->indices[aid * ((*cj)->count + 1)]; - - - /* Make sure the sorts are ok. */ - /* for (int k = 1; k < (*ci)->count; k++) - if ((*ind_i)[k].d < (*ind_i)[k-1].d) - error("Sorting failed."); - for (int k = 1; k < (*cj)->count; k++) - if ((*ind_j)[k].d < (*ind_j)[k-1].d) - error("Sorting failed."); */ -} - /** * @brief Get a #cell from the pool. */ @@ -566,6 +155,56 @@ struct cell *cell_get() { return res; } +/** + * @brief Compute the center of mass of a given cell. + * + * @param c The #cell. + */ +void comp_com(struct cell *c) { + + int k, count = c->count; + struct cell *cp; + struct part *parts = c->parts; + double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; + + if (c->split) { + + /* Loop over the projenitors and collect the multipole information. */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + float cp_mass = cp->new.mass; + com[0] += cp->new.com[0] * cp_mass; + com[1] += cp->new.com[1] * cp_mass; + com[2] += cp->new.com[2] * cp_mass; + mass += cp_mass; + } + + /* Otherwise, collect the multipole from the particles. */ + } else { + + for (k = 0; k < count; k++) { + float p_mass = parts[k].mass; + com[0] += parts[k].x[0] * p_mass; + com[1] += parts[k].x[1] * p_mass; + com[2] += parts[k].x[2] * p_mass; + mass += p_mass; + } + } + + /* Store the COM data, if any was collected. */ + if (mass > 0.0) { + float imass = 1.0f / mass; + c->new.com[0] = com[0] * imass; + c->new.com[1] = com[1] * imass; + c->new.com[2] = com[2] * imass; + c->new.mass = mass; + } else { + c->new.com[0] = 0.0; + c->new.com[1] = 0.0; + c->new.com[2] = 0.0; + c->new.mass = 0.0; + } +} + /** * @brief Sort the parts into eight bins along the given pivots and * fill the multipoles. Also adds the hierarchical resources @@ -724,8 +363,15 @@ void cell_split(struct cell *c, struct qsched *s) { #endif /* Otherwise, we're at a leaf, so create the cell's particle-cell task. */ - } - + } else { + struct cell *data[2] = {root, c}; + int tid = qsched_addtask(s, task_type_self_pc, task_flag_none, data, + 2 * sizeof(struct cell *), 1); + qsched_addlock(s, tid, c->res); +#ifdef COM_AS_TASK + qsched_addunlock(s, root->com_tid, tid); +#endif + } /* does the cell need to be split? */ /* Compute the cell's center of mass. */ #ifndef COM_AS_TASK @@ -741,63 +387,14 @@ void cell_split(struct cell *c, struct qsched *s) { /* New tree walk */ /* -------------------------------------------------------------------------- */ -/** - * @brief Compute the center of mass of a given cell. - * - * @param c The #cell. - */ -void comp_com(struct cell *c) { - - int k, count = c->count; - struct cell *cp; - struct part *parts = c->parts; - double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; - - if (c->split) { - - /* Loop over the projenitors and collect the multipole information. */ - for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - float cp_mass = cp->new.mass; - com[0] += cp->new.com[0] * cp_mass; - com[1] += cp->new.com[1] * cp_mass; - com[2] += cp->new.com[2] * cp_mass; - mass += cp_mass; - } - - /* Otherwise, collect the multipole from the particles. */ - } else { - - for (k = 0; k < count; k++) { - float p_mass = parts[k].mass; - com[0] += parts[k].x[0] * p_mass; - com[1] += parts[k].x[1] * p_mass; - com[2] += parts[k].x[2] * p_mass; - mass += p_mass; - } - } - - /* Store the COM data, if any was collected. */ - if (mass > 0.0) { - float imass = 1.0f / mass; - c->new.com[0] = com[0] * imass; - c->new.com[1] = com[1] * imass; - c->new.com[2] = com[2] * imass; - c->new.mass = mass; - } else { - c->new.com[0] = 0.0; - c->new.com[1] = 0.0; - c->new.com[2] = 0.0; - c->new.mass = 0.0; - } -} - /** * @brief Interacts all particles in ci with the monopole in cj */ -static inline void make_interact_pc(struct cell *leaf, struct cell *cj) { +static inline void make_interact_pc(struct part_local *parts, int count, + double *loc, struct cell *cj) { int j, k; - double com[3] = {0.0, 0.0, 0.0}; + float com[3] = {0.0, 0.0, 0.0}; float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; #ifdef SANITY_CHECKS @@ -819,35 +416,22 @@ static inline void make_interact_pc(struct cell *leaf, struct cell *cj) { #endif /* Init the com's data. */ - for (k = 0; k < 3; k++) com[k] = cj->new.com[k]; + for (k = 0; k < 3; k++) com[k] = cj->new.com[k] - loc[k]; mcom = cj->new.mass; /* Loop over every particle in leaf. */ - for (j = 0; j < leaf->count; j++) { + for (j = 0; j < count; j++) { /* Compute the pairwise distance. */ for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = com[k] - leaf->parts[j].x[k]; + dx[k] = com[k] - parts[j].x[k]; r2 += dx[k] * dx[k]; } /* Apply the gravitational acceleration. */ ir = 1.0f / sqrtf(r2); w = mcom * const_G * ir * ir * ir; - for (k = 0; k < 3; k++) leaf->parts[j].a[k] += w * dx[k]; - -#if ICHECK >= 0 - if (leaf->parts[j].id == ICHECK) - printf("[DEBUG] cell [%f,%f,%f] interacting with cj->loc=[%f,%f,%f] " - "m=%f h=%f\n", - leaf->loc[0], leaf->loc[1], leaf->loc[2], cj->loc[0], cj->loc[1], - cj->loc[2], mcom, cj->h); - - if (leaf->parts[j].id == ICHECK) - printf("[NEW] Interaction with monopole a=( %e %e %e ) h= %f Nj= %d m_j= " - "%f\n", - w * dx[0], w * dx[1], w * dx[2], cj->h, cj->count, mcom); -#endif + for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k]; } /* loop over every particle. */ } @@ -865,21 +449,19 @@ static inline int is_inside(struct cell *leaf, struct cell *c) { static inline int are_neighbours_different_size(struct cell *ci, struct cell *cj) { - int k; - float dx[3]; double cih = ci->h, cjh = cj->h; /* Maximum allowed distance */ float min_dist = 0.5 * (cih + cjh); /* (Manhattan) Distance between the cells */ - for (k = 0; k < 3; k++) { + for (int k = 0; k < 3; k++) { float center_i = ci->loc[k] + 0.5 * cih; float center_j = cj->loc[k] + 0.5 * cjh; - dx[k] = fabs(center_i - center_j); + if (fabsf(center_i - center_j) > min_dist) return 0; } - return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); + return 1; } /** @@ -888,9 +470,6 @@ static inline int are_neighbours_different_size(struct cell *ci, */ static inline int are_neighbours(struct cell *ci, struct cell *cj) { - int k; - float dx[3]; - #ifdef SANITY_CHECKS if (ci->h != cj->h) error(" Cells of different size in distance calculation."); @@ -900,138 +479,155 @@ static inline int are_neighbours(struct cell *ci, struct cell *cj) { float min_dist = ci->h; /* (Manhattan) Distance between the cells */ - for (k = 0; k < 3; k++) { + for (int k = 0; k < 3; k++) { float center_i = ci->loc[k]; float center_j = cj->loc[k]; - dx[k] = fabs(center_i - center_j); + if (fabsf(center_i - center_j) > min_dist) return 0; } - return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); + return 1; } -/* /\** */ -/* * @brief Compute the interactions between all particles in a cell leaf */ -/* * and the center of mass of all the cells in a part of the tree */ -/* * described by ci and cj */ -/* * */ -/* * @param ci The #cell containing the particle */ -/* * @param cj The #cell containing the center of mass. */ -/* *\/ */ -/* static inline void iact_pair_pc(struct cell *ci, struct cell *cj, */ -/* struct cell *leaf) { */ - -/* struct cell *cp, *cps; */ +/** + * @brief Compute the interactions between all particles in a cell leaf + * and the center of mass of all the cells in a part of the tree + * described by ci and cj + * + * @param ci The #cell containing the particle + * @param cj The #cell containing the center of mass. + */ +static inline void iact_pair_pc(struct cell *ci, struct cell *cj, + struct cell *leaf, struct part_local *parts, + int count, double *loc) { -/* #ifdef SANITY_CHECKS */ + struct cell *cp, *cps; -/* /\* Early abort? *\/ */ -/* if (ci->count == 0 || cj->count == 0) error("Empty cell !"); */ +#ifdef SANITY_CHECKS -/* /\* Sanity check *\/ */ -/* if (ci == cj) */ -/* error("The impossible has happened: pair interaction between a cell and " */ -/* "itself."); */ + /* Early abort? */ + if (ci->count == 0 || cj->count == 0) error("Empty cell !"); -/* /\* Sanity check *\/ */ -/* if (!is_inside(leaf, ci)) */ -/* error("The impossible has happened: The leaf is not within ci"); */ + /* Sanity check */ + if (ci == cj) + error("The impossible has happened: pair interaction between a cell and " + "itself."); -/* /\* Are the cells direct neighbours? *\/ */ -/* if (!are_neighbours(ci, cj)) error("Cells are not neighours"); */ + /* Sanity check */ + if (!is_inside(leaf, ci)) + error("The impossible has happened: The leaf is not within ci"); -/* /\* Are both cells split ? *\/ */ -/* if (!ci->split || !cj->split) error("One of the cells is not split !"); */ -/* #endif */ + /* Are the cells direct neighbours? */ + if (!are_neighbours(ci, cj)) error("Cells are not neighours"); -/* /\* Let's find in which subcell of ci the leaf is *\/ */ -/* for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { */ + /* Are both cells split ? */ + if (!ci->split || !cj->split) error("One of the cells is not split !"); +#endif -/* if (is_inside(leaf, cp)) break; */ -/* } */ + /* Let's find in which subcell of ci the leaf is */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { -/* if (are_neighbours_different_size(cp, cj)) { */ + if (is_inside(leaf, cp)) break; + } -/* /\* Now interact this subcell with all subcells of cj *\/ */ -/* for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { */ + if (are_neighbours_different_size(cp, cj)) { -/* /\* Check whether we have to recurse or can directly jump to the multipole */ -/* * calculation *\/ */ -/* if (are_neighbours(cp, cps)) { */ + /* Now interact this subcell with all subcells of cj */ + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { -/* /\* We only recurse if the children are split *\/ */ -/* if (cp->split && cps->split) { */ -/* iact_pair_pc(cp, cps, leaf); */ -/* } */ + /* Check whether we have to recurse or can directly jump to the multipole + * calculation */ + if (are_neighbours(cp, cps)) { -/* } else { */ -/* make_interact_pc(leaf, cps); */ -/* } */ -/* } */ -/* } else { */ + /* We only recurse if the children are split */ + if (cp->split && cps->split) { + iact_pair_pc(cp, cps, leaf, parts, count, loc); + } -/* /\* If cp is not a neoghbour of cj, we can directly interact with the */ -/* * multipoles *\/ */ -/* for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { */ + } else { + make_interact_pc(parts, count, loc, cps); + } + } + } else { -/* make_interact_pc(leaf, cps); */ -/* } */ -/* } */ -/* } */ + /* If cp is not a neoghbour of cj, we can directly interact with the + * multipoles */ + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { -/* /\** */ -/* * @brief Compute the interactions between all particles in a leaf and */ -/* * and all the monopoles in the cell c */ -/* * */ -/* * @param c The #cell containing the monopoles */ -/* * @param leaf The #cell containing the particles */ -/* *\/ */ -/* static inline void iact_self_pc(struct cell *c, struct cell *leaf) { */ + make_interact_pc(parts, count, loc, cps); + } + } +} -/* struct cell *cp, *cps; */ +/** + * @brief Compute the interactions between all particles in a leaf and + * and all the monopoles in the cell c + * + * @param c The #cell containing the monopoles + * @param leaf The #cell containing the particles + */ +static inline void iact_self_pc(struct cell *c, struct cell *leaf, + struct part_local *parts) { -/* #ifdef SANITY_CHECKS */ + struct cell *cp, *cps; + int collect_part_data = 0; -/* /\* Early abort? *\/ */ -/* if (c->count == 0) error("Empty cell !"); */ +#ifdef SANITY_CHECKS -/* if (!c->split) error("Cell is not split !"); */ + /* Early abort? */ + if (c->count == 0) error("Empty cell !"); -/* #endif */ + if (!c->split) error("Cell is not split !"); -/* /\* Find in which subcell of c the leaf is *\/ */ -/* for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { */ +#endif -/* /\* Only recurse if the leaf is in this part of the tree *\/ */ -/* if (is_inside(leaf, cp)) break; */ -/* } */ + /* Get local copies of the particle data. */ + if (parts == NULL) { + int count = leaf->count; + if ((parts = + (struct part_local *)malloc(sizeof(struct part_local) * count)) == + NULL) + error("Failed to allocate local parts."); + for (int k = 0; k < count; k++) { + for (int j = 0; j < 3; j++) { + parts[k].x[j] = leaf->parts[k].x[j] - leaf->loc[j]; + parts[k].a[j] = 0.0f; + } + parts[k].mass = leaf->parts[k].mass; + } + collect_part_data = 1; + } -/* if (cp->split) { */ + /* Find in which subcell of c the leaf is */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { -/* /\* Recurse if the cell can be split *\/ */ -/* iact_self_pc(cp, leaf); */ + /* Only recurse if the leaf is in this part of the tree */ + if (is_inside(leaf, cp)) break; + } -/* /\* Now, interact with every other subcell *\/ */ -/* for (cps = c->firstchild; cps != c->sibling; cps = cps->sibling) { */ + if (cp->split) { -/* /\* Since cp and cps will be direct neighbours it is only worth recursing */ -/* *\/ */ -/* /\* if the cells can both be split *\/ */ -/* if (cp != cps && cps->split) iact_pair_pc(cp, cps, leaf); */ -/* } */ -/* } */ -/* } */ + /* Recurse if the cell can be split */ + iact_self_pc(cp, leaf, parts); -/* /\** */ -/* * @brief Starts the recursive tree walk of a given leaf */ -/* *\/ */ -/* void init_multipole_walk(struct cell *root, struct cell *leaf) { */ + /* Now, interact with every other subcell */ + for (cps = c->firstchild; cps != c->sibling; cps = cps->sibling) { -/* /\* Pre-fetch the leaf's particles *\/ */ -/* __builtin_prefetch(leaf->parts, 1, 3); */ + /* Since cp and cps will be direct neighbours it is only worth recursing + */ + /* if the cells can both be split */ + if (cp != cps && cps->split) + iact_pair_pc(cp, cps, leaf, parts, leaf->count, leaf->loc); + } + } -/* /\* Start the recursion *\/ */ -/* iact_self_pc(root, leaf); */ -/* } */ + /* Clean up local parts? */ + if (collect_part_data) { + for (int k = 0; k < leaf->count; k++) { + for (int j = 0; j < 3; j++) leaf->parts[k].a[j] += parts[k].a[j]; + } + free(parts); + } +} /** * @brief Compute the interactions between all particles in a cell @@ -1040,13 +636,13 @@ static inline int are_neighbours(struct cell *ci, struct cell *cj) { * @param ci The #cell containing the particles. * @param cj The #cell containing the other particles */ -static inline void iact_pair_direct_unsorted(struct cell *ci, struct cell *cj) { +static inline void iact_pair_direct_dp(struct cell *ci, struct cell *cj) { int i, j, k; int count_i = ci->count, count_j = cj->count; - struct part *parts_i = ci->parts, *parts_j = cj->parts; double xi[3]; float dx[3], ai[3], mi, mj, r2, w, ir; + struct part *parts_i = ci->parts, *parts_j = cj->parts; #ifdef SANITY_CHECKS @@ -1109,26 +705,15 @@ static inline void iact_pair_direct_unsorted(struct cell *ci, struct cell *cj) { } /* loop over all particles. */ } - - - - - -static inline void iact_pair_direct_sorted_multipole(struct cell *ci, - struct cell *cj) { - - struct part_local { - float x[3]; - float a[3]; - float mass, d; - }; +static inline void iact_pair_direct(struct cell *ci, struct cell *cj) { int i, j, k; - int count_i, count_j; - struct part_local *parts_i, *parts_j; - float cjh = cj->h; + int count_i = ci->count, count_j = cj->count; float xi[3]; float dx[3], ai[3], mi, mj, r2, w, ir; + double com[3]; + struct part_local parts_j[count_j]; + struct part *parts_i = ci->parts; #ifdef SANITY_CHECKS @@ -1138,75 +723,32 @@ static inline void iact_pair_direct_sorted_multipole(struct cell *ci, #endif - /* Get the sorted indices and stuff. */ - struct index *ind_i, *ind_j; - struct multipole multi; - get_axis(&ci, &cj, &ind_i, &ind_j); - multipole_reset(&multi); - cjh = cj->h; - - /* Allocate and fill-in the local parts. */ - count_i = ci->count; - count_j = cj->count; - if ((parts_i = - (struct part_local *)alloca(sizeof(struct part_local) *count_i)) == - NULL || - (parts_j = - (struct part_local *)alloca(sizeof(struct part_local) * count_j)) == - NULL) - error("Failed to allocate local part arrays."); - for (i = 0; i < count_i; i++) { - int pid = ind_i[i].ind; - parts_i[i].d = ind_i[i].d; - for (k = 0; k < 3; k++) { - parts_i[i].x[k] = ci->parts[pid].x[k] - cj->loc[k]; - parts_i[i].a[k] = 0.0f; - } - parts_i[i].mass = ci->parts[pid].mass; + /* Find the center point of the interaction. */ + for (k = 0; k < 3; k++) { + com[k] = 0.5 * (ci->loc[k] + cj->loc[k]); } - for (j = 0; j < count_j; j++) { - int pjd = ind_j[j].ind; - parts_j[j].d = ind_j[j].d; - for (k = 0; k < 3; k++) { - parts_j[j].x[k] = cj->parts[pjd].x[k] - cj->loc[k]; - parts_j[j].a[k] = 0.0f; + + /* Init the local copies of the particles. */ + for (k = 0; k < count_j; k++) { + for (j = 0; j < 3; j++) { + parts_j[k].x[j] = cj->parts[k].x[j] - com[j]; + parts_j[k].a[j] = 0.0f; } - parts_j[j].mass = cj->parts[pjd].mass; + parts_j[k].mass = cj->parts[k].mass; } -#if ICHECK >= 0 - for (k = 0; k < count_i; k++) - if (parts_i[k].id == ICHECK) - message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and " - "loc=[%f,%f,%f], h=%f.", - ci->loc[0], ci->loc[1], ci->loc[2], ci->h, cj->loc[0], cj->loc[1], - cj->loc[2], cj->h); - for (k = 0; k < count_j; k++) - if (parts_j[k].id == ICHECK) - message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and " - "loc=[%f,%f,%f], h=%f.", - cj->loc[0], cj->loc[1], cj->loc[2], cj->h, ci->loc[0], ci->loc[1], - ci->loc[2], ci->h); -#endif - - /* Distance along the axis as of which we will use a multipole. */ - float d_max = dist_cutoff_ratio * cjh; - /* Loop over all particles in ci... */ - for (i = count_i - 1; i >= 0; i--) { - - /* Get a local copy of the distance along the axis. */ - float di = parts_i[i].d; + for (i = 0; i < count_i; i++) { /* Init the ith particle's data. */ for (k = 0; k < 3; k++) { - xi[k] = parts_i[i].x[k]; - ai[k] = 0.0; + xi[k] = parts_i[i].x[k] - com[k]; + ai[k] = 0.0f; } mi = parts_i[i].mass; - /* Loop over every following particle within d_max along the axis. */ - for (j = 0; j < count_j && (parts_j[j].d - di) < d_max; j++) { + /* Loop over every particle in the other cell. */ + for (j = 0; j < count_j; j++) { /* Compute the pairwise distance. */ for (r2 = 0.0, k = 0; k < 3; k++) { @@ -1224,103 +766,38 @@ static inline void iact_pair_direct_sorted_multipole(struct cell *ci, ai[k] -= wdx * mj; } -#if ICHECK >= 0 - if (parts_i[i].id == ICHECK) - message("Interaction with part %d - d= %f", parts_j[j].id, sqrt(r2)); -#endif - #ifdef COUNTERS - ++count_direct_sorted_pp; + ++count_direct_unsorted; #endif #if ICHECK >= 0 && 0 if (parts_i[i].id == ICHECK) - printf("[NEW] Interaction with particle id= %d (pair i)\n", - parts_j[pjd].id); + printf( + "[NEW] Interaction with particle id= %d (pair i) a=( %e %e %e )\n", + parts_j[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]); if (parts_j[j].id == ICHECK) - printf("[NEW] Interaction with particle id= %d (pair j) h_i= %f h_j= " - "%f ci= %p cj= %p count_i= %d count_j= %d d_i= %d d_j= %d\n", - parts_i[pid].id, ci->h, cj->h, ci, cj, count_i, count_j, ci->res, - cj->res); + printf( + "[NEW] Interaction with particle id= %d (pair j) a=( %e %e %e )\n", + parts_i[i].id, mj * w * dx[0], mj * w * dx[1], mj * w * dx[2]); #endif } /* loop over every other particle. */ - /* Add any remaining particles to the COM. */ - for (int jj = j; jj < count_j; jj++) { - multipole_add(&multi, parts_j[jj].x, parts_j[jj].mass); - } - - /* Shrink count_j to the latest valid particle. */ - count_j = j; - - /* Interact part_i with the center of mass. */ - multipole_iact(&multi, xi, ai); - -#if ICHECK >= 0 - if (parts_i[i].id == ICHECK) - message("Interaction with multipole"); //, parts_j[j].id ); -#endif - -#ifdef COUNTERS - ++count_direct_sorted_pm_i; -#endif - - /* Store the accumulated acceleration on the ith part. */ for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k]; - } /* loop over all particles in ci. */ - - - - /* Re-init some values. */ - count_j = cj->count; - int last_i = 0; - multipole_reset(&multi); - - /* Loop over the particles in cj, catch the COM interactions. */ - for (j = 0; j < count_j; j++) { - - /* Get the sorted index. */ - float dj = parts_j[j].d; - - /* Fill the COM with any new particles. */ - for (i = last_i; i < count_i && (dj - parts_i[i].d) > d_max; i++) { - multipole_add(&multi, parts_i[i].x, parts_i[i].mass); - } - - /* Set the new last_i to the last particle checked. */ - last_i = i; - - /* Interact part_j with the COM. */ - multipole_iact(&multi, parts_j[j].x, parts_j[j].a); - -#ifdef COUNTERS - ++count_direct_sorted_pm_j; -#endif - - } + } /* loop over all particles. */ - /* Copy the accelerations back to the original particles. */ - for (i = 0; i < count_i; i++) { - int pid = ind_i[i].ind; - for (k = 0; k < 3; k++) ci->parts[pid].a[k] += parts_i[i].a[k]; - } - for (j = 0; j < count_j; j++) { - int pjd = ind_j[j].ind; - for (k = 0; k < 3; k++) cj->parts[pjd].a[k] += parts_j[j].a[k]; + /* Copy the local particle data back. */ + for (k = 0; k < count_j; k++) { + for (j = 0; j < 3; j++) cj->parts[k].a[j] = parts_j[k].a[j]; } } - - - - /** * @brief Decides whether two cells use the direct summation interaction or the -* multipole interactions + * multipole interactions * * @param ci The #cell. * @param cj The other #cell. @@ -1372,12 +849,12 @@ void iact_pair(struct cell *ci, struct cell *cj) { * * @param c The #cell. */ -void iact_self_direct(struct cell *c) { +void iact_self_direct_dp(struct cell *c) { int i, j, k, count = c->count; double xi[3] = {0.0, 0.0, 0.0}; float ai[3] = {0.0, 0.0, 0.0}, mi, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; - struct part *parts = c->parts; struct cell *cp, *cps; + struct part *parts = c->parts; #ifdef SANITY_CHECKS @@ -1390,7 +867,7 @@ void iact_self_direct(struct cell *c) { each of its siblings. */ if (c->split) { for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - iact_self_direct(cp); + iact_self_direct_dp(cp); for (cps = cp->sibling; cps != c->sibling; cps = cps->sibling) iact_pair(cp, cps); } @@ -1447,6 +924,98 @@ void iact_self_direct(struct cell *c) { } /* otherwise, compute interactions directly. */ } +void iact_self_direct(struct cell *c) { + int i, j, k, count = c->count; + float xi[3] = {0.0, 0.0, 0.0}; + float ai[3] = {0.0, 0.0, 0.0}, mi, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; + struct cell *cp, *cps; + +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (count == 0) error("Empty cell !"); + +#endif + + /* If the cell is split, interact each progeny with itself, and with + each of its siblings. */ + if (c->split) { + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + iact_self_direct(cp); + for (cps = cp->sibling; cps != c->sibling; cps = cps->sibling) + iact_pair(cp, cps); + } + + /* Otherwise, compute the interactions directly. */ + } else { + + /* Init the local copies of the particles. */ + double loc[3]; + struct part_local parts[count]; + loc[0] = c->loc[0]; + loc[1] = c->loc[1]; + loc[2] = c->loc[2]; + for (k = 0; k < count; k++) { + for (j = 0; j < 3; j++) { + parts[k].x[j] = c->parts[k].x[j] - loc[j]; + parts[k].a[j] = 0.0f; + } + parts[k].mass = c->parts[k].mass; + } + + /* Loop over all particles... */ + for (i = 0; i < count; i++) { + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts[i].x[k]; + ai[k] = 0.0; + } + mi = parts[i].mass; + + /* Loop over every following particle. */ + for (j = i + 1; j < count; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#if ICHECK >= 0 + if (c->parts[i].id == ICHECK) + message("[NEW] Interaction with particle id= %d (self i)", + c->parts[j].id); + + if (c->parts[j].id == ICHECK) + message("[NEW] Interaction with particle id= %d (self j)", + c->parts[i].id); +#endif + + } /* loop over every other particle. */ + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts[i].a[k] += ai[k]; + + } /* loop over all particles. */ + + /* Copy the local particle data back. */ + for (k = 0; k < count; k++) { + for (j = 0; j < 3; j++) c->parts[k].a[j] += parts[k].a[j]; + } + } /* otherwise, compute interactions directly. */ +} + /** * @brief Create the tasks for the cell pair/self. * @@ -1543,8 +1112,6 @@ void create_tasks(struct qsched *s, struct cell *ci, struct cell *cj) { } /* Otherwise it's a pair */ } - - /* -------------------------------------------------------------------------- */ /* Legacy tree walk */ /* -------------------------------------------------------------------------- */ @@ -1827,12 +1394,9 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { case task_type_pair: iact_pair(d[0], d[1]); break; - case task_type_pair_direct: - iact_pair_direct(d[0], d[1]); + case task_type_self_pc: + iact_self_pc(d[0], d[1], NULL); break; - /* case task_type_self_pc: */ - /* init_multipole_walk(d[0], d[1]); */ - /* break; */ case task_type_com: comp_com(d[0]); break; @@ -1847,7 +1411,7 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { for (k = 0; k < task_type_count; k++) task_timers[k] = 0; /* Initialize the scheduler. */ - qsched_init(&s, nr_threads, qsched_flag_noreown); + qsched_init(&s, nr_threads, qsched_flag_noreown | qsched_flag_pthread); /* Init and fill the particle array. */ if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL) @@ -1966,8 +1530,8 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { #if ICHECK >= 0 message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK, - root->parts[ICHECK].a[0], root->parts[ICHECK].a[1], - root->parts[ICHECK].a[2]); + root->parts[ICHECK].a_legacy[0], root->parts[ICHECK].a_legacy[1], + root->parts[ICHECK].a_legacy[2]); #endif printf("task counts: [ %8s %8s %8s %8s %8s ]\n", "self", "pair", "m-poles", "direct", "CoMs"); @@ -1986,15 +1550,6 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { parts[i].a[2] = 0.0; } - struct cell *finger = root; - while (finger != NULL) { - finger->sorted = 0; - if (finger->split) - finger = finger->firstchild; - else - finger = finger->sibling; - } - /* Execute the tasks. */ tic = getticks(); qsched_run(&s, nr_threads, runner); @@ -2052,390 +1607,19 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { free(parts); } -/* All 26 configurations for the cell tests */ -const float cell_shift[27 * 3] = {1.0, 1.0, 1.0, /* 0 */ - 1.0, 1.0, 0.0, /* 1 */ - 1.0, 1.0, -1.0, /* 2 */ - 1.0, 0.0, 1.0, /* 3 */ - 1.0, 0.0, 0.0, /* 4 */ - 1.0, 0.0, -1.0, /* 5 */ - 1.0, -1.0, 1.0, /* 6 */ - 1.0, -1.0, 0.0, /* 7 */ - 1.0, -1.0, -1.0, /* 8 */ - 0.0, 1.0, 1.0, /* 9 */ - 0.0, 1.0, 0.0, /* 10 */ - 0.0, 1.0, -1.0, /* 11 */ - 0.0, 0.0, 1.0, /* 12 */ - -1.0, -1.0, -1.0, /* 13 */ - -1.0, -1.0, 0.0, /* 14 */ - -1.0, -1.0, 1.0, /* 15 */ - -1.0, 0.0, -1.0, /* 16 */ - -1.0, 0.0, 0.0, /* 17 */ - -1.0, 0.0, 1.0, /* 18 */ - -1.0, 1.0, -1.0, /* 19 */ - -1.0, 1.0, 0.0, /* 20 */ - -1.0, 1.0, 1.0, /* 21 */ - 0.0, -1.0, -1.0, /* 22 */ - 0.0, -1.0, 0.0, /* 23 */ - 0.0, -1.0, 1.0, /* 24 */ - 0.0, 0.0, -1.0, /* 25 */ - 0.0, 0.0, - 0.0 /* 26 */ /* <-- The cell itself */ -}; - -/** - * @brief Creates two neighbouring cells wiht N_parts per celland makes them - * interact using both the - * sorted and unsortde interactions. Outputs then the tow sets of accelerations - * for accuracy tests. - * @param N_parts Number of particles in each cell - * @param orientation Orientation of the cells ( 0 <= orientation < 26 ) - */ -void test_direct_neighbour(int N_parts, int orientation) { - - int k; - struct part *parts; - struct cell left, right; - - if (orientation >= 26) error("Wrong orientation !"); - - /* Select configuration */ - float shift[3]; - for (k = 0; k < 3; ++k) shift[k] = cell_shift[3 * orientation + k]; - - /* Init and fill the particle array. */ - if ((parts = (struct part *)malloc(sizeof(struct part) * N_parts * 2)) == - NULL) - error("Failed to allocate particle buffer."); - - /* Create random set of particles in both cells */ - for (k = 0; k < 2 * N_parts; k++) { - parts[k].id = k; - - parts[k].x[0] = ((double)rand()) / RAND_MAX; - parts[k].x[1] = ((double)rand()) / RAND_MAX; - parts[k].x[2] = ((double)rand()) / RAND_MAX; - - /* Shift the second cell */ - if (k >= N_parts) { - parts[k].x[0] += shift[0]; - parts[k].x[1] += shift[1]; - parts[k].x[2] += shift[2]; - } - - parts[k].mass = ((double)rand()) / RAND_MAX; - parts[k].a[0] = 0.0; - parts[k].a[1] = 0.0; - parts[k].a[2] = 0.0; - } - - /* Get the cell geometry right */ - left.loc[0] = 0.; - left.loc[1] = 0.; - left.loc[2] = 0.; - left.h = 1.; - - right.loc[0] = shift[0]; - right.loc[1] = shift[1]; - right.loc[2] = shift[2]; - right.h = 1.; - - /* Put the particles in the cell */ - left.parts = parts; - left.count = N_parts; - right.parts = parts + N_parts; - right.count = N_parts; - - /* Get the linked list right (functions should not recurse but hey...) */ - left.firstchild = NULL; - left.sibling = NULL; - left.split = 0; - right.firstchild = NULL; - right.sibling = NULL; - right.split = 0; - - /* Get the multipoles right (should also be useless) */ - comp_com(&left); - comp_com(&right); - - /* Nothing has been sorted yet */ - left.indices = NULL; - left.sorted = 0; - right.indices = NULL; - right.sorted = 0; - -#ifdef COUNTERS - count_direct_unsorted = 0; - count_direct_sorted_pp = 0; - count_direct_sorted_pm_i = 0; - count_direct_sorted_pm_j = 0; -#endif - - /* Do the interactions without sorting */ - iact_pair_direct_unsorted(&left, &right); - - message("Unsorted interactions done "); - - /* Store accelerations */ - for (k = 0; k < 2 * N_parts; k++) { - parts[k].a_exact[0] = parts[k].a[0]; - parts[k].a_exact[1] = parts[k].a[1]; - parts[k].a_exact[2] = parts[k].a[2]; - parts[k].a[0] = 0.0; - parts[k].a[1] = 0.0; - parts[k].a[2] = 0.0; - } - - /* Do the interactions with sorting */ - iact_pair_direct(&left, &right); - - message("Sorted interactions done "); - - for (k = 0; k < 2 * N_parts; ++k) { - if (parts[k].id == ICHECK) { - - message("Accel exact : [ %e %e %e ]", parts[k].a_exact[0], - parts[k].a_exact[1], parts[k].a_exact[2]); - message("Accel sorted: [ %e %e %e ]", parts[k].a[0], parts[k].a[1], - parts[k].a[2]); - } - } - - /* Position along the axis */ - double *position, *ortho_dist; - if ((position = (double *)malloc(sizeof(double) * N_parts * 2)) == NULL) - error("Failed to allocate position buffer."); - if ((ortho_dist = (double *)malloc(sizeof(double) * N_parts * 2)) == NULL) - error("Failed to allocate orthogonal distance buffer."); - - float w = 1.0f / sqrtf(shift[0]*shift[0] + shift[1]*shift[1] + shift[2]*shift[2]); - shift[0] *= w; - shift[1] *= w; - shift[2] *= w; - - for (k = 0; k < 2 * N_parts; ++k) { - float dx = parts[k].x[0] * shift[0]; - float dy = parts[k].x[1] * shift[1]; - float dz = parts[k].x[2] * shift[2]; - - position[k] = dx + dy + dz; - - dx = (parts[k].x[1] - 0.5f)*shift[2] - (parts[k].x[2] - 0.5f)*shift[1]; - dy = (parts[k].x[2] - 0.5f)*shift[0] - (parts[k].x[0] - 0.5f)*shift[2]; - dz = (parts[k].x[0] - 0.5f)*shift[1] - (parts[k].x[1] - 0.5f)*shift[0]; - - ortho_dist[k] = sqrtf(dx*dx + dy*dy + dz*dz); - } - - /* Now, output everything */ - char fileName[100]; - sprintf(fileName, "interaction_dump_%d.dat", orientation); - message("Writing file '%s'", fileName); - FILE *file = fopen(fileName, "w"); - fprintf(file, - "# ID m r x y z a_u.x a_u.y a_u.z a_s.x a_s.y a_s.z ortho\n"); - for (k = 0; k < 2 * N_parts; k++) { - fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e\n", parts[k].id, - parts[k].mass, position[k], parts[k].x[0], parts[k].x[1], - parts[k].x[2], parts[k].a_exact[0], parts[k].a_exact[1], - parts[k].a_exact[2], parts[k].a[0], parts[k].a[1], parts[k].a[2], - ortho_dist[k]); - } - fclose(file); - -#ifdef COUNTERS - message("Unsorted intereactions: %d", count_direct_unsorted); - message("Sorted intereactions PP: %d", count_direct_sorted_pp); - message("Sorted intereactions PM (part i): %d", count_direct_sorted_pm_i); - message("Sorted intereactions PM (part j): %d", count_direct_sorted_pm_j); - message("Sorted intereactions total: %d", - count_direct_sorted_pm_j + count_direct_sorted_pm_i + - count_direct_sorted_pp); -// message( "%f %d %d %d %d\n", dist_cutoff_ratio, count_direct_unsorted, -// count_direct_sorted_pp, count_direct_sorted_pm_i, count_direct_sorted_pm_j ); -#endif - - /* Clean up */ - free(parts); -} - -/** - * @brief Creates 27 cells and makes the particles in the central one - * interact with the other cells - * using both the sorted and unsorted interactions - * Outputs then the two sets of accelerations for accuracy tests. - * @param N_parts Number of particles in each cell - * @param N_test Number of times the test has to be run - */ -void test_all_direct_neighbours(int N_parts, int N_test) { - - int i, k, j; - struct part *parts; - struct cell cells[27]; - - /* Init and fill the particle array. */ - if ((parts = (struct part *)malloc(sizeof(struct part) * N_parts * 27)) == - NULL) - error("Failed to allocate particle buffer."); - -#ifdef COUNTERS - count_direct_unsorted = 0; - count_direct_sorted_pp = 0; - count_direct_sorted_pm_i = 0; - count_direct_sorted_pm_j = 0; -#endif - int countMultipoles = 0; - int countPairs = 0; - int countCoMs = 0; - - /* Loop over the number of tests */ - for (i = 0; i < N_test; ++i) { - - /* Loop over the 27 cells */ - for (j = 0; j < 27; ++j) { - - float shift[3]; - for (k = 0; k < 3; ++k) shift[k] = cell_shift[3 * j + k]; - - /* Create random set of particles in all cells */ - for (k = 0; k < N_parts; k++) { - parts[j * N_parts + k].id = j * N_parts + k; - - parts[j * N_parts + k].x[0] = ((double)rand()) / RAND_MAX + shift[0]; - parts[j * N_parts + k].x[1] = ((double)rand()) / RAND_MAX + shift[1]; - parts[j * N_parts + k].x[2] = ((double)rand()) / RAND_MAX + shift[2]; - - parts[j * N_parts + k].mass = ((double)rand()) / RAND_MAX; - parts[j * N_parts + k].a[0] = 0.0; - parts[j * N_parts + k].a[1] = 0.0; - parts[j * N_parts + k].a[2] = 0.0; - parts[j * N_parts + k].a_exact[0] = 0.0; - parts[j * N_parts + k].a_exact[1] = 0.0; - parts[j * N_parts + k].a_exact[2] = 0.0; - parts[j * N_parts + k].a_legacy[0] = 0.0; - parts[j * N_parts + k].a_legacy[1] = 0.0; - parts[j * N_parts + k].a_legacy[2] = 0.0; - } - - /* Get the cell geometry right */ - cells[j].loc[0] = shift[0]; - cells[j].loc[1] = shift[1]; - cells[j].loc[2] = shift[2]; - cells[j].h = 1.; - - /* Put the particles in the cell */ - cells[j].parts = parts + N_parts * j; - cells[j].count = N_parts; - - /* Get the linked list right (functions should not recurse but hey...) */ - cells[j].firstchild = NULL; - cells[j].sibling = NULL; - cells[j].split = 0; - - /* Get the multipoles right (should also be useless) */ - comp_com(&cells[j]); - - /* Nothing has been sorted yet */ - cells[j].indices = NULL; - cells[j].sorted = 0; - } - - /* Do the interactions without sorting */ - for (j = 0; j < 26; ++j) iact_pair_direct_unsorted(&cells[26], &cells[j]); - iact_self_direct(&cells[26]); - - // message("Unsorted interactions done "); - - /* Store accelerations */ - for (k = 0; k < 27 * N_parts; k++) { - parts[k].a_exact[0] = parts[k].a[0]; - parts[k].a_exact[1] = parts[k].a[1]; - parts[k].a_exact[2] = parts[k].a[2]; - parts[k].a[0] = 0.0; - parts[k].a[1] = 0.0; - parts[k].a[2] = 0.0; - } - - /* Do the interactions with sorting */ - for (j = 0; j < 26; ++j) iact_pair_direct(&cells[26], &cells[j]); - iact_self_direct(&cells[26]); - - // message("Sorted interactions done "); - - /* Now, do a B-H calculation on the same set of particles */ - struct cell *root = cell_get(); - root->loc[0] = -1.0; - root->loc[1] = -1.0; - root->loc[2] = -1.0; - root->h = 3.0; - root->count = 27 * N_parts; - root->parts = parts; - struct qsched s; - qsched_init(&s, 1, qsched_flag_noreown); - cell_split(root, &s); - legacy_tree_walk(27 * N_parts, parts, root, ICHECK, &countMultipoles, - &countPairs, &countCoMs); - - // free(root); - //++countCoMs; - - /* Now, output everything */ - char fileName[100]; - sprintf(fileName, "interaction_dump_all.dat"); - // message("Writing file '%s'", fileName); - FILE *file = fopen(fileName, (i == 0 ? "w" : "a")); - if (i == 0) - fprintf(file, - "# ID m x y z a_u.x a_u.y a_u.z a_s.x a_s.y a_s.z" - " a_bh.x a_bh.y a_bh.z\n"); - for (k = 0; k < 27 * N_parts; k++) { - if (parts[k].id >= 26 * N_parts) - fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e %e\n", - parts[k].id, parts[k].mass, parts[k].x[0], parts[k].x[1], - parts[k].x[2], parts[k].a_exact[0], parts[k].a_exact[1], - parts[k].a_exact[2], parts[k].a[0], parts[k].a[1], - parts[k].a[2], parts[k].a_legacy[0], parts[k].a_legacy[1], - parts[k].a_legacy[2]); - } - fclose(file); - } - -#ifdef COUNTERS - message("Unsorted intereactions: %d", count_direct_unsorted); - message("B-H intereactions: PP %d", countPairs); - message("B-H intereactions: PM %d", countMultipoles); - message("B-H intereactions: total %d", countPairs + countMultipoles); - message("Sorted intereactions PP: %d", count_direct_sorted_pp); - message("Sorted intereactions PM (part i): %d", count_direct_sorted_pm_i); - message("Sorted intereactions PM (part j): %d", count_direct_sorted_pm_j); - message("Sorted intereactions total: %d", - count_direct_sorted_pm_j + count_direct_sorted_pm_i + - count_direct_sorted_pp); -// message( "%f %d %d %d %d\n", dist_cutoff_ratio, count_direct_unsorted, -// count_direct_sorted_pp, count_direct_sorted_pm_i, count_direct_sorted_pm_j ); -#endif - - /* Clean up */ - free(parts); -} - /** * @brief Main function. */ int main(int argc, char *argv[]) { - int c, k, nr_threads; + int c, nr_threads; int N = 1000, runs = 1; char fileName[100] = {0}; - int N_parts = 0; - int N_iterations = 0; /* Die on FP-exceptions. */ feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); - srand(0); - /* Get the number of threads. */ #pragma omp parallel shared(nr_threads) { @@ -2461,16 +1645,6 @@ int main(int argc, char *argv[]) { if (sscanf(optarg, "%s", &fileName[0]) != 1) error("Error parsing file name."); break; - case 'c': - if (sscanf(optarg, "%d", &N_parts) != 1) - error( - "Error parsing number of particles in neighbouring cells test."); - break; - case 'i': - if (sscanf(optarg, "%d", &N_iterations) != 1) - error( - "Error parsing number of particles in neighbouring cells test."); - break; case '?': fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] " "[-c Nparts] [-i Niterations] \n", @@ -2490,40 +1664,19 @@ int main(int argc, char *argv[]) { /* Part information */ printf("Size of part: %zu bytes.\n", sizeof(struct part)); - /* Run the neighbour direct integration test */ - if (N_parts > 0) { - - /* Dump arguments */ - message("Interacting 2 neighbouring cells with %d particles per cell", - N_parts); - - /* Run the test */ - for (k = 0; k < 26; ++k) test_direct_neighbour(N_parts, k); - - /* Dump arguments */ - message("Interacting one cell with %d particles with its 27 neighbours" - " %d times to get the statistics.", - N_parts, N_iterations); - - test_all_direct_neighbours(N_parts, N_iterations); - + /* Dump arguments. */ + if (fileName[0] == 0) { + message("Computing the N-body problem over %i random particles using %i " + "threads (%i runs).", + N, nr_threads, runs); } else { - - /* Dump arguments. */ - if (fileName[0] == 0) { - message("Computing the N-body problem over %i random particles using %i " - "threads (%i runs).", - N, nr_threads, runs); - } else { - message("Computing the N-body problem over %i particles read from '%s' " - "using %i threads (%i runs).", - N, fileName, nr_threads, runs); - } - - /* Run the BH test. */ - test_bh(N, nr_threads, runs, fileName); + message("Computing the N-body problem over %i particles read from '%s' " + "using %i threads (%i runs).", + N, fileName, nr_threads, runs); } + /* Run the BH test. */ + test_bh(N, nr_threads, runs, fileName); + return 0; } -