diff --git a/autom4te.cache/requests b/autom4te.cache/requests index 5f6702525b223f35d76288fa655f62b87988382b..7675eba62587b513c5752f30ea1140a2981e22c3 100644 --- a/autom4te.cache/requests +++ b/autom4te.cache/requests @@ -58,9 +58,9 @@ '_LT_AC_LANG_CXX_CONFIG' => 1, 'AM_PROG_MKDIR_P' => 1, 'DX_IF_FEATURE' => 1, + 'DX_FEATURE_doc' => 1, 'AM_AUTOMAKE_VERSION' => 1, 'DX_CHM_FEATURE' => 1, - 'DX_FEATURE_doc' => 1, 'DX_PDF_FEATURE' => 1, 'AM_SUBST_NOTMAKE' => 1, 'AM_MISSING_PROG' => 1, diff --git a/examples/test_bh.c b/examples/test_bh.c index a7e2a9cb6912597369b633bcf85d6eed84ae6388..4c410a518c64d43e948fc4acbe6d6a3bfa95c062 100644 --- a/examples/test_bh.c +++ b/examples/test_bh.c @@ -1,22 +1,22 @@ /******************************************************************************* * This file is part of QuickSched. - * Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) - * + * Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ - + * +* *****************************************************************************/ /* Config parameters. */ #include "../config.h" @@ -27,98 +27,178 @@ #include <string.h> #include <unistd.h> #include <math.h> +#include <float.h> +#include <limits.h> #include <omp.h> +#include <fenv.h> /* Local includes. */ #include "quicksched.h" - /* Some local constants. */ -#define cell_pool_grow 100 +#define cell_pool_grow 1000 #define cell_maxparts 100 -#define task_limit 5000 -#define const_G 6.6738e-11 -#define dist_min 0.5 +#define task_limit 1e8 +#define const_G 1 // 6.6738e-8 +#define dist_min 0.5 /* Used for legacy walk only */ +#define dist_cutoff_ratio 1.5 +#define ICHECK -1 +#define NO_SANITY_CHECKS +#define NO_COM_AS_TASK +#define NO_COUNTERS /** Data structure for the particles. */ struct part { - double x[3]; - double a[3]; - double mass; - int id; - }; - - + double x[3]; + // union { + float a[3]; + float a_legacy[3]; + float a_exact[3]; + // }; + float mass; + int id; +}; // __attribute__((aligned(64))); + +struct multipole { + double com[3]; + float mass; +}; + /** Data structure for the BH tree cell. */ struct cell { - double loc[3]; - double h[3]; - int split, count; - struct part *parts; - struct cell *progeny[8]; - struct cell *parent; - double com[3]; - double mass; - int res, com_tid; - }; - - + double loc[3]; + double h; + + int count; + unsigned short int split, sorted; + struct part *parts; + + struct cell *firstchild; /* Next node if opening */ + struct cell *sibling; /* Next node */ + + /* We keep both CoMs and masses to make sure the comp_com calculation is + * correct (use an anonymous union to keep variable names compact). */ + union { + + /* Information for the legacy walk */ + struct multipole legacy; + + /* Information for the QuickShed walk */ + struct multipole new; + }; + + int res, com_tid; + struct index *indices; + +} __attribute__((aligned(128))); + /** Task types. */ enum task_type { - task_type_self = 0, - task_type_pair, - task_type_pair_pc, - task_type_com, - task_type_count - }; - + task_type_self = 0, + task_type_pair, + task_type_self_pc, + task_type_com, + task_type_count +}; + +#ifdef COUNTERS +int count_direct_unsorted; +int count_direct_sorted_pp; +int count_direct_sorted_pm_i; +int count_direct_sorted_pm_j; +#endif + /** Per-type timers. */ -ticks task_timers[ task_type_count ]; - - +ticks task_timers[task_type_count]; + /** Global variable for the pool of allocated cells. */ struct cell *cell_pool = NULL; - /** * @brief Get a #cell from the pool. */ - -struct cell *cell_get ( ) { +struct cell *cell_get() { - struct cell *res; - int k; + struct cell *res; + int k; - /* Allocate a new batch? */ - if ( cell_pool == NULL ) { - - /* Allocate the cell array. */ - if ( ( cell_pool = (struct cell *)malloc( sizeof(struct cell) * cell_pool_grow ) ) == NULL ) - error( "Failed to allocate fresh batch of cells." ); - - /* Clear the cells. */ - bzero( cell_pool , sizeof(struct cell) * cell_pool_grow ); - - /* Link them up via their progeny pointers. */ - for ( k = 1 ; k < 100 ; k++ ) - cell_pool[k-1].progeny[0] = &cell_pool[k]; - - } - - /* Pick a cell off the pool. */ - res = cell_pool; - cell_pool = cell_pool->progeny[0]; - - /* Clean up a few things. */ - res->res = qsched_res_none; - - /* Return the cell. */ - return res; + /* Allocate a new batch? */ + if (cell_pool == NULL) { + + /* Allocate the cell array. */ + if ((cell_pool = + (struct cell *)calloc(cell_pool_grow, sizeof(struct cell))) == + NULL) + error("Failed to allocate fresh batch of cells."); + + /* Link them up via their progeny pointers. */ + for (k = 1; k < cell_pool_grow; k++) + cell_pool[k - 1].firstchild = &cell_pool[k]; + } + + /* Pick a cell off the pool. */ + res = cell_pool; + cell_pool = cell_pool->firstchild; + + /* Clean up a few things. */ + res->res = qsched_res_none; + res->firstchild = 0; + + /* Return the cell. */ + return res; +} +/** + * @brief Compute the center of mass of a given cell. + * + * @param c The #cell. + */ +void comp_com(struct cell *c) { + + int k, count = c->count; + struct cell *cp; + struct part *parts = c->parts; + double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; + + if (c->split) { + + /* Loop over the projenitors and collect the multipole information. */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + float cp_mass = cp->new.mass; + com[0] += cp->new.com[0] * cp_mass; + com[1] += cp->new.com[1] * cp_mass; + com[2] += cp->new.com[2] * cp_mass; + mass += cp_mass; } - - + + /* Otherwise, collect the multipole from the particles. */ + } else { + + for (k = 0; k < count; k++) { + float p_mass = parts[k].mass; + com[0] += parts[k].x[0] * p_mass; + com[1] += parts[k].x[1] * p_mass; + com[2] += parts[k].x[2] * p_mass; + mass += p_mass; + } + } + + /* Store the COM data, if any was collected. */ + if (mass > 0.0) { + float imass = 1.0f / mass; + c->new.com[0] = com[0] * imass; + c->new.com[1] = com[1] * imass; + c->new.com[2] = com[2] * imass; + c->new.mass = mass; + } else { + c->new.com[0] = 0.0; + c->new.com[1] = 0.0; + c->new.com[2] = 0.0; + c->new.mass = 0.0; + } +} + /** * @brief Sort the parts into eight bins along the given pivots and * fill the multipoles. Also adds the hierarchical resources @@ -128,369 +208,614 @@ struct cell *cell_get ( ) { * @param N The total number of parts. * @param s The #sched to store the resources. */ - -void cell_split ( struct cell *c , struct qsched *s ) { - - int i, j, k, count = c->count; - struct part temp, *parts = c->parts; - struct cell *cp; - int left[8], right[8]; - double pivot[3]; - static struct cell *root = NULL; - - /* Set the root cell. */ - if ( root == NULL ) - root = c; - - /* Add a resource for this cell if it doesn't have one yet. */ - if ( c->res == qsched_res_none ) - c->res = qsched_addres( s , qsched_owner_none , qsched_res_none ); - - /* Attach a center-of-mass task to the cell. */ - c->com_tid = qsched_addtask( s , task_type_com , task_flag_none , &c , sizeof(struct cell *) , 1 ); - - /* Does this cell need to be split? */ - if ( count > cell_maxparts ) { - - /* Mark this cell as split. */ - c->split = 1; - - /* Create the progeny. */ - for ( k = 0 ; k < 8 ; k++ ) { - c->progeny[k] = cp = cell_get(); - cp->parent = c; - cp->loc[0] = c->loc[0]; - cp->loc[1] = c->loc[1]; - cp->loc[2] = c->loc[2]; - cp->h[0] = c->h[0]/2; - cp->h[1] = c->h[1]/2; - cp->h[2] = c->h[2]/2; - cp->res = qsched_addres( s , qsched_owner_none , c->res ); - if ( k & 4 ) - cp->loc[0] += cp->h[0]; - if ( k & 2 ) - cp->loc[1] += cp->h[1]; - if ( k & 1 ) - cp->loc[2] += cp->h[2]; - } - - /* Init the pivots. */ - for ( k = 0 ; k < 3 ; k++ ) - pivot[k] = c->loc[k] + c->h[k]/2; - - /* Split along the x-axis. */ - i = 0; j = count - 1; - while ( i <= j ) { - while ( i <= count-1 && parts[i].x[0] <= pivot[0] ) - i += 1; - while ( j >= 0 && parts[j].x[0] > pivot[0] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[1] = i; right[1] = count - 1; - left[0] = 0; right[0] = j; - - /* Split along the y axis, twice. */ - for ( k = 1 ; k >= 0 ; k-- ) { - i = left[k]; j = right[k]; - while ( i <= j ) { - while ( i <= right[k] && parts[i].x[1] <= pivot[1] ) - i += 1; - while ( j >= left[k] && parts[j].x[1] > pivot[1] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[2*k+1] = i; right[2*k+1] = right[k]; - left[2*k] = left[k]; right[2*k] = j; - } - - /* Split along the z axis, four times. */ - for ( k = 3 ; k >= 0 ; k-- ) { - i = left[k]; j = right[k]; - while ( i <= j ) { - while ( i <= right[k] && parts[i].x[2] <= pivot[2] ) - i += 1; - while ( j >= left[k] && parts[j].x[2] > pivot[2] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[2*k+1] = i; right[2*k+1] = right[k]; - left[2*k] = left[k]; right[2*k] = j; - } - - /* Store the counts and offsets. */ - for ( k = 0 ; k < 8 ; k++ ) { - c->progeny[k]->count = right[k] - left[k] + 1; - c->progeny[k]->parts = &c->parts[ left[k] ]; - } - - /* Recurse. */ - for ( k = 0 ; k < 8 ; k++ ) - cell_split( c->progeny[k] , s ); - - /* Link the COM tasks. */ - for ( k = 0 ; k < 8 ; k++ ) - qsched_addunlock( s , c->progeny[k]->com_tid , c->com_tid ); - - } /* does the cell need to be split? */ - - /* Set this cell's resources ownership. */ - qsched_res_own( s , c->res , s->nr_queues * ( c->parts - root->parts ) / root->count ); - +void cell_split(struct cell *c, struct qsched *s) { + + int i, j, k, kk, count = c->count; + struct part temp, *parts = c->parts; + struct cell *cp; + int left[8], right[8]; + double pivot[3]; + static struct cell *root = NULL; + struct cell *progenitors[8]; + + /* Set the root cell. */ + if (root == NULL) { + root = c; + c->sibling = 0; + } + + /* Add a resource for this cell if it doesn't have one yet. */ + if (c->res == qsched_res_none) + c->res = qsched_addres(s, qsched_owner_none, qsched_res_none); + +/* Attach a center-of-mass task to the cell. */ +#ifdef COM_AS_TASK + if (count > 0) + c->com_tid = qsched_addtask(s, task_type_com, task_flag_none, &c, + sizeof(struct cell *), 1); +#endif + + /* Does this cell need to be split? */ + if (count > cell_maxparts) { + + /* Mark this cell as split. */ + c->split = 1; + + /* Create the progeny. */ + for (k = 0; k < 8; k++) { + progenitors[k] = cp = cell_get(); + cp->loc[0] = c->loc[0]; + cp->loc[1] = c->loc[1]; + cp->loc[2] = c->loc[2]; + cp->h = c->h * 0.5; + cp->res = qsched_addres(s, qsched_owner_none, c->res); + if (k & 4) cp->loc[0] += cp->h; + if (k & 2) cp->loc[1] += cp->h; + if (k & 1) cp->loc[2] += cp->h; } - - + + /* Init the pivots. */ + for (k = 0; k < 3; k++) pivot[k] = c->loc[k] + c->h * 0.5; + + /* Split along the x-axis. */ + i = 0; + j = count - 1; + while (i <= j) { + while (i <= count - 1 && parts[i].x[0] < pivot[0]) i += 1; + while (j >= 0 && parts[j].x[0] >= pivot[0]) j -= 1; + if (i < j) { + temp = parts[i]; + parts[i] = parts[j]; + parts[j] = temp; + } + } + left[1] = i; + right[1] = count - 1; + left[0] = 0; + right[0] = j; + + /* Split along the y axis, twice. */ + for (k = 1; k >= 0; k--) { + i = left[k]; + j = right[k]; + while (i <= j) { + while (i <= right[k] && parts[i].x[1] < pivot[1]) i += 1; + while (j >= left[k] && parts[j].x[1] >= pivot[1]) j -= 1; + if (i < j) { + temp = parts[i]; + parts[i] = parts[j]; + parts[j] = temp; + } + } + left[2 * k + 1] = i; + right[2 * k + 1] = right[k]; + left[2 * k] = left[k]; + right[2 * k] = j; + } + + /* Split along the z axis, four times. */ + for (k = 3; k >= 0; k--) { + i = left[k]; + j = right[k]; + while (i <= j) { + while (i <= right[k] && parts[i].x[2] < pivot[2]) i += 1; + while (j >= left[k] && parts[j].x[2] >= pivot[2]) j -= 1; + if (i < j) { + temp = parts[i]; + parts[i] = parts[j]; + parts[j] = temp; + } + } + left[2 * k + 1] = i; + right[2 * k + 1] = right[k]; + left[2 * k] = left[k]; + right[2 * k] = j; + } + + /* Store the counts and offsets. */ + for (k = 0; k < 8; k++) { + progenitors[k]->count = right[k] - left[k] + 1; + progenitors[k]->parts = &c->parts[left[k]]; + } + + /* Find the first non-empty progenitor */ + for (k = 0; k < 8; k++) + if (progenitors[k]->count > 0) { + c->firstchild = progenitors[k]; + break; + } + +#ifdef SANITY_CHECKS + if (c->firstchild == NULL) + error("Cell has been split but all progenitors have 0 particles"); +#endif + + /* Prepare the pointers. */ + for (k = 0; k < 8; k++) { + + /* Find the next non-empty sibling */ + for (kk = k + 1; kk < 8; ++kk) { + if (progenitors[kk]->count > 0) { + progenitors[k]->sibling = progenitors[kk]; + break; + } + } + + /* No non-empty sibling ? Go back a level */ + if (kk == 8) progenitors[k]->sibling = c->sibling; + } + + /* Recurse. */ + for (k = 0; k < 8; k++) + if (progenitors[k]->count > 0) cell_split(progenitors[k], s); + +/* Link the COM tasks. */ +#ifdef COM_AS_TASK + for (k = 0; k < 8; k++) + if (progenitors[k]->count > 0) + qsched_addunlock(s, progenitors[k]->com_tid, c->com_tid); +#endif + + /* Otherwise, we're at a leaf, so create the cell's particle-cell task. */ + } else { + struct cell *data[2] = {root, c}; + int tid = qsched_addtask(s, task_type_self_pc, task_flag_none, data, + 2 * sizeof(struct cell *), 1); + qsched_addlock(s, tid, c->res); +#ifdef COM_AS_TASK + qsched_addunlock(s, root->com_tid, tid); +#endif + } /* does the cell need to be split? */ + +/* Compute the cell's center of mass. */ +#ifndef COM_AS_TASK + comp_com(c); +#endif + + /* Set this cell's resources ownership. */ + qsched_res_own(s, c->res, + s->nr_queues * (c->parts - root->parts) / root->count); +} + +/* -------------------------------------------------------------------------- */ +/* New tree walk */ +/* -------------------------------------------------------------------------- */ + /** - * @brief Compute the center of mass of a given cell. + * @brief Interacts all particles in ci with the monopole in cj + */ +static inline void make_interact_pc(struct cell *leaf, struct cell *cj) { + + int j, k, count = leaf->count; + double com[3] = {0.0, 0.0, 0.0}; + float mcom, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; + struct part *parts = leaf->parts; + +#ifdef SANITY_CHECKS + + /* Sanity checks */ + if (leaf->count == 0) error("Empty cell!"); + + /* Sanity check. */ + if (cj->new.mass == 0.0) { + message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1], + cj->new.com[2], cj->count, cj, cj->split, cj->sibling); + + for (j = 0; j < cj->count; ++j) + message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id); + + error("com does not seem to have been set."); + } + +#endif + + /* Init the com's data. */ + for (k = 0; k < 3; k++) com[k] = cj->new.com[k]; + mcom = cj->new.mass; + + /* Loop over every particle in leaf. */ + for (j = 0; j < count; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = com[k] - parts[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = mcom * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k]; + +#if ICHECK >= 0 + if (leaf->parts[j].id == ICHECK) + printf("[DEBUG] cell [%f,%f,%f] interacting with cj->loc=[%f,%f,%f] " + "m=%f h=%f\n", + leaf->loc[0], leaf->loc[1], leaf->loc[2], cj->loc[0], cj->loc[1], + cj->loc[2], mcom, cj->h); + + if (leaf->parts[j].id == ICHECK) + printf("[NEW] Interaction with monopole a=( %e %e %e ) h= %f Nj= %d m_j= " + "%f\n", + w * dx[0], w * dx[1], w * dx[2], cj->h, cj->count, mcom); +#endif + + } /* loop over every particle. */ +} + +/** + * @brief Checks whether the cell leaf is a subvolume of the cell c + */ +static inline int is_inside(struct cell *leaf, struct cell *c) { + return (leaf->parts >= c->parts) && (leaf->parts < c->parts + c->count); +} + +/** + * @brief Checks whether the cells are direct neighbours ot not + */ +static inline int are_neighbours_different_size(struct cell *ci, + struct cell *cj) { + + int k; + float dx[3]; + double cih = ci->h, cjh = cj->h; + + /* Maximum allowed distance */ + float min_dist = 0.5 * (cih + cjh); + + /* (Manhattan) Distance between the cells */ + for (k = 0; k < 3; k++) { + float center_i = ci->loc[k] + 0.5 * cih; + float center_j = cj->loc[k] + 0.5 * cjh; + dx[k] = fabs(center_i - center_j); + } + + return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); +} + +/** + * @brief Checks whether the cells are direct neighbours ot not. Both cells have + * to be of the same size + */ +static inline int are_neighbours(struct cell *ci, struct cell *cj) { + + int k; + float dx[3]; + +#ifdef SANITY_CHECKS + if (ci->h != cj->h) + error(" Cells of different size in distance calculation."); +#endif + + /* Maximum allowed distance */ + float min_dist = ci->h; + + /* (Manhattan) Distance between the cells */ + for (k = 0; k < 3; k++) { + float center_i = ci->loc[k]; + float center_j = cj->loc[k]; + dx[k] = fabs(center_i - center_j); + } + + return (dx[0] <= min_dist) && (dx[1] <= min_dist) && (dx[2] <= min_dist); +} + +/** + * @brief Compute the interactions between all particles in a cell leaf + * and the center of mass of all the cells in a part of the tree + * described by ci and cj * - * @param c The #cell. + * @param ci The #cell containing the particle + * @param cj The #cell containing the center of mass. */ - -void comp_com ( struct cell *c ) { +static inline void iact_pair_pc(struct cell *ci, struct cell *cj, + struct cell *leaf) { - int k, count = c->count; - struct cell *cp; - struct part *p, *parts = c->parts; + struct cell *cp, *cps; - /* Is the cell split? */ - if ( c->split ) { - - /* Collect the centre of gravity and mass from the progeny. */ - for ( k = 0 ; k < 8 ; k++ ) { - cp = c->progeny[k]; - c->com[0] += cp->com[0]*cp->mass; - c->com[1] += cp->com[1]*cp->mass; - c->com[2] += cp->com[2]*cp->mass; - c->mass += cp->mass; - } - c->com[0] /= c->mass; c->com[1] /= c->mass; c->com[2] /= c->mass; - - } - - /* Otherwise, collect the multipole from local data. */ - else { - - for ( k = 0 ; k < count ; k++ ) { - p = &parts[k]; - c->com[0] += p->x[0]*p->mass; - c->com[1] += p->x[1]*p->mass; - c->com[2] += p->x[2]*p->mass; - c->mass += p->mass; - } - c->com[0] /= c->mass; c->com[1] /= c->mass; c->com[2] /= c->mass; - +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (ci->count == 0 || cj->count == 0) error("Empty cell !"); + + /* Sanity check */ + if (ci == cj) + error("The impossible has happened: pair interaction between a cell and " + "itself."); + + /* Sanity check */ + if (!is_inside(leaf, ci)) + error("The impossible has happened: The leaf is not within ci"); + + /* Are the cells direct neighbours? */ + if (!are_neighbours(ci, cj)) error("Cells are not neighours"); + + /* Are both cells split ? */ + if (!ci->split || !cj->split) error("One of the cells is not split !"); +#endif + + /* Let's find in which subcell of ci the leaf is */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { + + if (is_inside(leaf, cp)) break; + } + + if (are_neighbours_different_size(cp, cj)) { + + /* Now interact this subcell with all subcells of cj */ + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { + + /* Check whether we have to recurse or can directly jump to the multipole + * calculation */ + if (are_neighbours(cp, cps)) { + + /* We only recurse if the children are split */ + if (cp->split && cps->split) { + iact_pair_pc(cp, cps, leaf); } - + + } else { + make_interact_pc(leaf, cps); + } } - - + } else { + + /* If cp is not a neoghbour of cj, we can directly interact with the + * multipoles */ + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { + + make_interact_pc(leaf, cps); + } + } +} + +/** + * @brief Compute the interactions between all particles in a leaf and + * and all the monopoles in the cell c + * + * @param c The #cell containing the monopoles + * @param leaf The #cell containing the particles + */ +static inline void iact_self_pc(struct cell *c, struct cell *leaf) { + + struct cell *cp, *cps; + +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (c->count == 0) error("Empty cell !"); + + if (!c->split) error("Cell is not split !"); + +#endif + + /* Find in which subcell of c the leaf is */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + + /* Only recurse if the leaf is in this part of the tree */ + if (is_inside(leaf, cp)) break; + } + + if (cp->split) { + + /* Recurse if the cell can be split */ + iact_self_pc(cp, leaf); + + /* Now, interact with every other subcell */ + for (cps = c->firstchild; cps != c->sibling; cps = cps->sibling) { + + /* Since cp and cps will be direct neighbours it is only worth recursing + */ + /* if the cells can both be split */ + if (cp != cps && cps->split) iact_pair_pc(cp, cps, leaf); + } + } +} + /** * @brief Compute the interactions between all particles in a cell - * and the center of mass of another cell. + * and all particles in the other cell (N^2 algorithm) * * @param ci The #cell containing the particles. - * @param cj The #cell containing the center of mass. + * @param cj The #cell containing the other particles */ - -void iact_pair_pc ( struct cell *ci , struct cell *cj ) { +static inline void iact_pair_direct(struct cell *ci, struct cell *cj) { - int j, k, count = ci->count; - double com[3], mcom, dx[3], r2, ir, w; - struct part *parts = ci->parts; - - /* Early abort? */ - if ( count == 0 ) - return; - - /* Init the com's data. */ - for ( k = 0 ; k < 3 ; k++ ) - com[k] = cj->com[k]; - mcom = cj->mass; + int i, j, k; + int count_i = ci->count, count_j = cj->count; + struct part *parts_i = ci->parts, *parts_j = cj->parts; + double xi[3]; + float dx[3], ai[3], mi, mj, r2, w, ir; - /* Loop over every following particle. */ - for ( j = 0 ; j < count ; j++ ) { +#ifdef SANITY_CHECKS - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = com[k] - parts[j].x[k]; - r2 += dx[k]*dx[k]; - } + /* Bad stuff will happen if cell sizes are different */ + if (ci->h != cj->h) + error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h); - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = mcom * const_G * ir * ir * ir; - for ( k = 0 ; k < 3 ; k++ ) - parts[j].a[k] += w * dx[k]; +#endif - } /* loop over every other particle. */ - + /* Loop over all particles in ci... */ + for (i = 0; i < count_i; i++) { + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts_i[i].x[k]; + ai[k] = 0.0f; } - + mi = parts_i[i].mass; + + /* Loop over every particle in the other cell. */ + for (j = 0; j < count_j; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts_j[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts_j[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts_j[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#ifdef COUNTERS + ++count_direct_unsorted; +#endif + +#if ICHECK >= 0 && 0 + if (parts_i[i].id == ICHECK) + printf( + "[NEW] Interaction with particle id= %d (pair i) a=( %e %e %e )\n", + parts_j[j].id, -mi * w * dx[0], -mi * w * dx[1], -mi * w * dx[2]); + + if (parts_j[j].id == ICHECK) + printf( + "[NEW] Interaction with particle id= %d (pair j) a=( %e %e %e )\n", + parts_i[i].id, mj * w * dx[0], mj * w * dx[1], mj * w * dx[2]); +#endif + + } /* loop over every other particle. */ + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k]; + + } /* loop over all particles. */ +} /** - * @brief Compute the interactions between all particles in a cell. + * @brief Decides whether two cells use the direct summation interaction or the + * multipole interactions * * @param ci The #cell. * @param cj The other #cell. */ - -void iact_pair ( struct cell *ci , struct cell *cj ) { +void iact_pair(struct cell *ci, struct cell *cj) { - int i, j, k; - int count_i = ci->count, count_j = cj->count; - double xi[3], ai[3], mi, mj, dx[3], r2, ir, w; - struct part *parts_i = ci->parts, *parts_j = cj->parts; - - /* Early abort? */ - if ( count_i == 0 || count_j == 0 ) - return; - - /* Recurse? */ - if ( ci->split && cj->split ) - for ( j = 0 ; j < 8 ; j++ ) - for ( k = j+1 ; k < 8 ; k++ ) - iact_pair( ci->progeny[j] , cj->progeny[k] ); - - /* Get the minimum distance between both cells. */ - for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) { - dx[k] = fabs( ci->loc[k] - cj->loc[k] ); - if ( dx[k] > 0 ) - dx[k] -= ci->h[k]; - r2 += dx[k]*dx[k]; - } + struct cell *cp, *cps; - /* Sufficiently well-separated? */ - if ( r2/ci->h[0] > dist_min*dist_min ) { +#ifdef SANITY_CHECKS - /* Compute the center of mass interactions. */ - iact_pair_pc( ci , cj ); - iact_pair_pc( cj , ci ); + /* Early abort? */ + if (ci->count == 0 || cj->count == 0) error("Empty cell !"); - } + /* Bad stuff will happen if cell sizes are different */ + if (ci->h != cj->h) + error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h); + + /* Sanity check */ + if (ci == cj) + error("The impossible has happened: pair interaction between a cell and " + "itself."); + +#endif - /* Recurse? */ - else if ( ci->split && cj->split ) - for ( j = 0 ; j < 8 ; j++ ) - for ( k = j+1 ; k < 8 ; k++ ) - iact_pair( ci->progeny[j] , cj->progeny[k] ); - - /* Otherwise, do direct interactions. */ - else { - - /* Loop over all particles... */ - for ( i = 0 ; i < count_i ; i++ ) { - - /* Init the ith particle's data. */ - for ( k = 0 ; k < 3 ; k++ ) { - xi[k] = parts_i[i].x[k]; - ai[k] = 0.0; - } - mi = parts_i[i].mass; - - /* Loop over every following particle. */ - for ( j = 0 ; j < count_j ; j++ ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = xi[k] - parts_j[j].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = const_G * ir * ir * ir; - mj = parts_j[j].mass; - for ( k = 0 ; k < 3 ; k++ ) { - parts_j[j].a[k] += w * dx[k] * mi; - ai[k] -= w * dx[k] * mj; - } - - } /* loop over every other particle. */ - - /* Store the accumulated acceleration on the ith part. */ - for ( k = 0 ; k < 3 ; k++ ) - parts_i[i].a[k] += ai[k]; - - } /* loop over all particles. */ - - } /* otherwise, compute interactions directly. */ + /* Are the cells direct neighbours? */ + if (are_neighbours(ci, cj)) { + /* Are both cells split ? */ + if (ci->split && cj->split) { + + /* Let's split both cells and build all possible pairs */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { + + /* If the cells are neighbours, recurse. */ + if (are_neighbours(cp, cps)) { + iact_pair(cp, cps); + } + } + } + } else {/* Otherwise, compute the interactions at this level directly. */ + iact_pair_direct(ci, cj); } - + } +} /** * @brief Compute the interactions between all particles in a cell. * * @param c The #cell. */ - -void iact_self ( struct cell *c ) { +void iact_self_direct(struct cell *c) { + int i, j, k, count = c->count; + double xi[3] = {0.0, 0.0, 0.0}; + float ai[3] = {0.0, 0.0, 0.0}, mi, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; + struct part *parts = c->parts; + struct cell *cp, *cps; - int i, j, k, count = c->count; - double xi[3], ai[3], mi, mj, dx[3], r2, ir, w; - struct part *parts = c->parts; - - /* Early abort? */ - if ( count == 0 ) - return; - - /* Recurse? */ - if ( c->split ) - for ( j = 0 ; j < 8 ; j++ ) { - iact_self( c->progeny[j] ); - for ( k = j+1 ; k < 8 ; k++ ) - iact_pair( c->progeny[j] , c->progeny[k] ); - } - - /* Otherwise, compute interactions directly. */ - else { - - /* Loop over all particles... */ - for ( i = 0 ; i < count ; i++ ) { - - /* Init the ith particle's data. */ - for ( k = 0 ; k < 3 ; k++ ) { - xi[k] = parts[i].x[k]; - ai[k] = 0.0; - } - mi = parts[i].mass; - - /* Loop over every following particle. */ - for ( j = i+1 ; j < count ; j++ ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = xi[k] - parts[j].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = const_G * ir * ir * ir; - mj = parts[j].mass; - for ( k = 0 ; k < 3 ; k++ ) { - parts[j].a[k] += w * dx[k] * mi; - ai[k] -= w * dx[k] * mj; - } - - } /* loop over every other particle. */ - - /* Store the accumulated acceleration on the ith part. */ - for ( k = 0 ; k < 3 ; k++ ) - parts[i].a[k] += ai[k]; - - } /* loop over all particles. */ - - } /* otherwise, compute interactions directly. */ +#ifdef SANITY_CHECKS + + /* Early abort? */ + if (count == 0) error("Empty cell !"); +#endif + + /* If the cell is split, interact each progeny with itself, and with + each of its siblings. */ + if (c->split) { + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + iact_self_direct(cp); + for (cps = cp->sibling; cps != c->sibling; cps = cps->sibling) + iact_pair(cp, cps); } - + + /* Otherwise, compute the interactions directly. */ + } else { + + /* Loop over all particles... */ + for (i = 0; i < count; i++) { + + /* Init the ith particle's data. */ + for (k = 0; k < 3; k++) { + xi[k] = parts[i].x[k]; + ai[k] = 0.0; + } + mi = parts[i].mass; + + /* Loop over every following particle. */ + for (j = i + 1; j < count; j++) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = xi[k] - parts[j].x[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + mj = parts[j].mass; + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts[j].a[k] += wdx * mi; + ai[k] -= wdx * mj; + } + +#if ICHECK >= 0 + if (parts[i].id == ICHECK) + message("[NEW] Interaction with particle id= %d (self i)", + parts[j].id); + + if (parts[j].id == ICHECK) + message("[NEW] Interaction with particle id= %d (self j)", + parts[i].id); +#endif + + } /* loop over every other particle. */ + + /* Store the accumulated acceleration on the ith part. */ + for (k = 0; k < 3; k++) parts[i].a[k] += ai[k]; + + } /* loop over all particles. */ + + } /* otherwise, compute interactions directly. */ +} /** * @brief Create the tasks for the cell pair/self. @@ -499,298 +824,661 @@ void iact_self ( struct cell *c ) { * @param ci The first #cell. * @param cj The second #cell. */ - -void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) { +void create_tasks(struct qsched *s, struct cell *ci, struct cell *cj) { - int j, k; - qsched_task_t tid; - struct cell *data[2]; - double dx, r2; - - /* If either cell is empty, stop. */ - if ( ci->count == 0 || ( cj != NULL && cj->count == 0 ) ) - return; + qsched_task_t tid; + struct cell *data[2], *cp, *cps; + +#ifdef SANITY_CHECKS + + /* If either cell is empty, stop. */ + if (ci->count == 0 || (cj != NULL && cj->count == 0)) error("Empty cell !"); + +#endif + + /* Single cell? */ + if (cj == NULL) { + + /* Is this cell split and above the task limit ? */ + if (ci->split && ci->count > task_limit / ci->count) { + + /* Loop over each of this cell's progeny. */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { + + /* Make self-interaction task. */ + create_tasks(s, cp, NULL); + + /* Make all pair-interaction tasks. */ + for (cps = cp->sibling; cps != ci->sibling; cps = cps->sibling) + create_tasks(s, cp, cps); + } + + /* Otherwise, add a self-interaction task. */ + } else { + + /* Set the data. */ + data[0] = ci; + data[1] = NULL; + + /* Create the task. */ + tid = + qsched_addtask(s, task_type_self, task_flag_none, data, + sizeof(struct cell *) * 2, ci->count * ci->count / 2); + + /* Add the resource (i.e. the cell) to the new task. */ + qsched_addlock(s, tid, ci->res); + +/* If this call might recurse, add a dependency on the cell's COM + task. */ +#ifdef COM_AS_TASK + if (ci->split) qsched_addunlock(s, ci->com_tid, tid); +#endif + } - /* Single cell? */ - if ( cj == NULL ) { - - /* Is this cell split? */ - if ( ci->split && ci->count > task_limit ) { - - /* Recurse over the progeny. */ - for ( j = 0 ; j < 8 ; j++ ) { - - /* Make self-task. */ - create_tasks( s , ci->progeny[j] , NULL ); - - /* Make all pair tasks. */ - for ( k = j+1 ; k < 8 ; k++ ) - create_tasks( s , ci->progeny[j] , ci->progeny[k] ); - } - - } - - /* Otherwise, add a self-interaction task. */ - else { - - /* Set the data. */ - data[0] = ci; data[1] = NULL; - - /* Create the task. */ - tid = qsched_addtask( s , task_type_self , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*ci->count/2 ); - - /* Add the resource. */ - qsched_addlock( s , tid , ci->res ); - - } - - } - /* Otherwise, it's a pair. */ - else { - - /* Get the minimum distance between both cells. */ - for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) { - dx = fabs( ci->loc[k] - cj->loc[k] ); - if ( dx > 0 ) - dx -= ci->h[k]; - r2 += dx*dx; - } - - /* Are the cells sufficiently well separated? */ - if ( r2/ci->h[0] > dist_min*dist_min ) { - - /* Interact ci's parts with cj as a cell. */ - data[0] = ci; data[1] = cj; - tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count ); - qsched_addlock( s , tid , ci->res ); - qsched_addunlock( s , cj->com_tid , tid ); - - /* Interact cj's parts with ci as a cell. */ - data[0] = cj; data[1] = ci; - tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count ); - qsched_addlock( s , tid , cj->res ); - qsched_addunlock( s , ci->com_tid , tid ); - - } - - /* Does this task need to be broken-down further? */ - else if ( ci->split && cj->split && - ci->count > task_limit && cj->count > task_limit ) { - - /* Loop over all pairs between ci and cj's progeny. */ - for ( j = 0 ; j < 8 ; j++ ) - for ( k = 0 ; k < 8 ; k++ ) - create_tasks( s , ci->progeny[j] , cj->progeny[k] ); - - } - - /* Otherwise, generate a part-part task. */ - else { - - /* Set the data. */ - data[0] = ci; data[1] = cj; - - /* Create the task. */ - tid = qsched_addtask( s , task_type_pair , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*cj->count ); - - /* Add the resources. */ - qsched_addlock( s , tid , ci->res ); - qsched_addlock( s , tid , cj->res ); - - /* Depend on the COMs in case this task recurses. */ - if ( ci->split || cj->split ) { - qsched_addunlock( s , ci->com_tid , tid ); - qsched_addunlock( s , cj->com_tid , tid ); - } - - } - - } /* otherwise, it's a pair. */ + } else { + + /* Are the cells NOT neighbours ? */ + if (!are_neighbours(ci, cj)) { + + } else {/* Cells are direct neighbours */ + /* Are both cells split ? */ + if (ci->split && cj->split && ci->count > task_limit / cj->count) { + + /* Let's split both cells and build all possible pairs */ + for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { + for (cps = cj->firstchild; cps != cj->sibling; cps = cps->sibling) { + /* Recurse */ + create_tasks(s, cp, cps); + } + } + /* Otherwise, at least one of the cells is not split, build a direct + * interaction. */ + } else { + + /* Set the data. */ + data[0] = ci; + data[1] = cj; + + /* Create the task. */ + tid = qsched_addtask(s, task_type_pair, task_flag_none, data, + sizeof(struct cell *) * 2, ci->count * cj->count); + + /* Add the resources. */ + qsched_addlock(s, tid, ci->res); + qsched_addlock(s, tid, cj->res); + } + } /* Cells are direct neighbours */ + } /* Otherwise it's a pair */ +} + +/* -------------------------------------------------------------------------- */ +/* Legacy tree walk */ +/* -------------------------------------------------------------------------- */ + +/** + * @brief Compute the center of mass of a given cell recursively. + * + * @param c The #cell. + */ +void legacy_comp_com(struct cell *c, int *countCoMs) { + + int k, count = c->count; + struct cell *cp; + struct part *p, *parts = c->parts; + double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; + + ++(*countCoMs); + + /* Is the cell split? */ + if (c->split) { + + /* Loop over the progeny. */ + for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { + /* Recurse */ + legacy_comp_com(cp, countCoMs); + + /* Collect multipole information */ + float cp_mass = cp->legacy.mass; + com[0] += cp->legacy.com[0] * cp_mass; + com[1] += cp->legacy.com[1] * cp_mass; + com[2] += cp->legacy.com[2] * cp_mass; + mass += cp_mass; } - - + + /* Otherwise, collect the multipole from local data. */ + } else { + + for (k = 0; k < count; k++) { + p = &parts[k]; + float p_mass = p->mass; + com[0] += p->x[0] * p_mass; + com[1] += p->x[1] * p_mass; + com[2] += p->x[2] * p_mass; + mass += p_mass; + } + } + + /* Finish multipole calculation */ + if (mass > 0.0) { + float imass = 1.0f / mass; + c->legacy.com[0] = com[0] * imass; + c->legacy.com[1] = com[1] * imass; + c->legacy.com[2] = com[2] * imass; + c->legacy.mass = mass; + } else { + c->legacy.com[0] = 0.0; + c->legacy.com[1] = 0.0; + c->legacy.com[2] = 0.0; + c->legacy.mass = 0.0; + } +} + +/** + * @brief Interacts a particle with a cell recursively using the original B-H + * tree walk procedure + * + * @param parts The array of particles + * @param i The particle of interest + * @param root The root of the tree under which we will search. + * @param monitor If set to @c parts[i].id, will produce debug output when + * ICHECK is set. + * @param cell The cell the particle interacts with + */ +void legacy_interact(struct part *parts, int i, struct cell *root, int monitor, + int *countMultipoles, int *countPairs) { + + int j, k; + float r2, dx[3], ir, w; + float a[3] = {0.0, 0.0, 0.0}; + double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]}; + int pid = parts[i].id; + struct cell *cell = root; + + /* Traverse the cells of the tree. */ + while (cell != NULL) { + + /* Are we in a leaf ? */ + if (!cell->split) { + + /* Interact the particle with the particles in the leaf */ + for (j = 0; j < cell->count; ++j) { + if (cell->parts[j].id == pid) continue; + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = cell->parts[j].x[k] - pix[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = cell->parts[j].mass * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) a[k] += w * dx[k]; + + (*countPairs)++; + +#if ICHECK >= 0 + if (pid == monitor) + message("[BH_] Interaction with particle id= %d a=( %e %e %e ) h= %f " + "loc=( %e %e %e )\n", + cell->parts[j].id, w * dx[0], w * dx[1], w * dx[2], cell->h, + cell->loc[0], cell->loc[1], cell->loc[2]); +#endif + } + + cell = cell->sibling; + } else { + + /* We are in a node */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = cell->legacy.com[k] - pix[k]; + r2 += dx[k] * dx[k]; + } + +#if ICHECK >= 0 + if (pid == monitor) + message("This is a node with %d particles h= %f. r= %f theta= %f", + cell->count, cell->h, sqrt(r2), dist_min); +#endif + + /* Is the cell far enough ? */ + if (dist_min * dist_min * r2 < cell->h * cell->h) { + +#if ICHECK >= 0 + if (pid == monitor) printf("Recursing...\n"); +#endif + cell = cell->firstchild; + continue; + } + +#if ICHECK >= 0 + if (pid == monitor) + message("[BH_] Can interact with the monopole. x= %f %f %f m= %f h= %f", + cell->legacy.com[0], cell->legacy.com[1], cell->legacy.com[2], + cell->legacy.mass, cell->h); +#endif + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = cell->legacy.mass * const_G * ir * ir * ir; + for (k = 0; k < 3; k++) a[k] += w * dx[k]; + + (*countMultipoles)++; + + /* Move to the next node */ + cell = cell->sibling; + } + } + + /* Store the locally computed acceleration back to the particle. */ + for (k = 0; k < 3; k++) parts[i].a_legacy[k] += a[k]; +} + +/** + * @brief Does a tree walk as in the B-H original work for all particles + * + * @param N The number of particles + * @param parts The array of particles + * @param root The root cell of the tree + * @param monitor ID of the particle to monitor and output interactions to + * stdout + */ +void legacy_tree_walk(int N, struct part *parts, struct cell *root, int monitor, + int *countMultipoles, int *countPairs, int *countCoMs) { + + int i; + + /* Compute multipoles (recursively) */ + legacy_comp_com(root, countCoMs); + + //#pragma omp parallel for + for (i = 0; i < N; ++i) { + if (parts[i].id == monitor) + message("tree walk for particle %d x= %f %f %f", parts[i].id, + parts[i].x[0], parts[i].x[1], parts[i].x[2]); + + legacy_interact(parts, i, root, monitor, countMultipoles, countPairs); + + if (parts[i].id == monitor) + message("[check] legacy acceleration for particle %d a= %.3e %.3e %.3e", + parts[i].id, parts[i].a_legacy[0], parts[i].a_legacy[1], + parts[i].a_legacy[2]); + } +} + +/* -------------------------------------------------------------------------- */ +/* Exact interaction */ +/* -------------------------------------------------------------------------- */ + +/** + * @brief Solve the particle interactions using the stupid N^2 algorithm + * + * @param N The number of particles + * @param parts The array of particles + */ +void interact_exact(int N, struct part *parts, int monitor) { + + int i, j, k; + float ir, w, r2, dx[3]; + + /* Loop over all particles. */ + for (i = 0; i < N; ++i) { + + /* Some things to store locally. */ + double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]}; + float mi = parts[i].mass; + + /* Loop over every other particle. */ + for (j = i + 1; j < N; ++j) { + + /* Compute the pairwise distance. */ + for (r2 = 0.0, k = 0; k < 3; k++) { + dx[k] = parts[j].x[k] - pix[k]; + r2 += dx[k] * dx[k]; + } + + /* Apply the gravitational acceleration. */ + ir = 1.0f / sqrtf(r2); + w = const_G * ir * ir * ir; + + for (k = 0; k < 3; k++) { + float wdx = w * dx[k]; + parts[j].a_exact[k] -= wdx * mi; + parts[i].a_exact[k] += wdx * parts[j].mass; + } + } + } + +#if ICHECK >= 0 + for (i = 0; i < N; ++i) + if (parts[i].id == monitor) + message("[check] exact acceleration for particle %d a= %.3e %.3e %.3e\n", + parts[i].id, parts[i].a_exact[0], parts[i].a_exact[1], + parts[i].a_exact[2]); +#endif +} + /** * @brief Set up and run a task-based Barnes-Hutt N-body solver. * * @param N The number of random particles to use. * @param nr_threads Number of threads to use. + * @param runs Number of force evaluations to use as a benchmark. + * @param fileName Input file name. If @c NULL or an empty string, random + * particle positions will be used. */ - -void test_bh ( int N , int nr_threads , int runs ) { - - int k; - struct cell *root; - struct part *parts; - struct qsched s; - ticks tic, toc_run, tot_setup = 0, tot_run = 0; - - /* Runner function. */ - void runner ( int type , void *data ) { - - ticks tic = getticks(); - - /* Decode the data. */ - struct cell **d = (struct cell **)data; - - /* Decode and execute the task. */ - switch ( type ) { - case task_type_self: - iact_self( d[0] ); - break; - case task_type_pair: - iact_pair( d[0] , d[1] ); - break; - case task_type_pair_pc: - iact_pair_pc( d[0] , d[1] ); - break; - case task_type_com: - comp_com( d[0] ); - break; - default: - error( "Unknown task type." ); - } - - atomic_add( &task_timers[type] , getticks() - tic ); - - } - - /* Initialize the per-task type timers. */ - for ( k = 0 ; k < task_type_count ; k++ ) - task_timers[k] = 0; - - /* Initialize the scheduler. */ - qsched_init( &s , nr_threads , qsched_flag_noreown ); - - /* Init and fill the particle array. */ - if ( ( parts = (struct part *)malloc( sizeof(struct part) * N ) ) == NULL ) - error( "Failed to allocate particle buffer." ); - for ( k = 0 ; k < N ; k++ ) { - parts[k].id = k; - parts[k].x[0] = ((double)rand())/RAND_MAX; - parts[k].x[1] = ((double)rand())/RAND_MAX; - parts[k].x[2] = ((double)rand())/RAND_MAX; - parts[k].mass = ((double)rand())/RAND_MAX; - parts[k].a[0] = 0.0; - parts[k].a[1] = 0.0; - parts[k].a[2] = 0.0; - } - - /* Init the cells. */ - root = cell_get(); - root->loc[0] = 0.0; root->loc[1] = 0.0; root->loc[2] = 0.0; - root->h[0] = 1.0; root->h[1] = 1.0; root->h[2] = 1.0; - root->count = N; - root->parts = parts; - cell_split( root , &s ); - - /* Create the tasks. */ +void test_bh(int N, int nr_threads, int runs, char *fileName) { + + int i, k; + struct cell *root; + struct part *parts; + FILE *file; + struct qsched s; + ticks tic, toc_run, tot_setup = 0, tot_run = 0; + int countMultipoles = 0, countPairs = 0, countCoMs = 0; + + /* Runner function. */ + void runner(int type, void * data) { + + ticks tic = getticks(); + + /* Decode the data. */ + struct cell **d = (struct cell **)data; + + /* Decode and execute the task. */ + switch (type) { + case task_type_self: + iact_self_direct(d[0]); + break; + case task_type_pair: + iact_pair(d[0], d[1]); + break; + case task_type_self_pc: + iact_self_pc(d[0], d[1]); + break; + case task_type_com: + comp_com(d[0]); + break; + default: + error("Unknown task type."); + } + + atomic_add(&task_timers[type], getticks() - tic); + } + + /* Initialize the per-task type timers. */ + for (k = 0; k < task_type_count; k++) task_timers[k] = 0; + + /* Initialize the scheduler. */ + qsched_init(&s, nr_threads, qsched_flag_noreown | + qsched_flag_pthread); + + /* Init and fill the particle array. */ + if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL) + error("Failed to allocate particle buffer."); + + /* If no input file was specified, generate random particle positions. */ + if (fileName == NULL || fileName[0] == 0) { + for (k = 0; k < N; k++) { + parts[k].id = k; + parts[k].x[0] = ((double)rand()) / RAND_MAX; + parts[k].x[1] = ((double)rand()) / RAND_MAX; + parts[k].x[2] = ((double)rand()) / RAND_MAX; + parts[k].mass = ((double)rand()) / RAND_MAX; + parts[k].a_legacy[0] = 0.0; + parts[k].a_legacy[1] = 0.0; + parts[k].a_legacy[2] = 0.0; + } + + /* Otherwise, read them from a file. */ + } else { + file = fopen(fileName, "r"); + if (file) { + for (k = 0; k < N; k++) { + if (fscanf(file, "%d", &parts[k].id) != 1) + error("Failed to read ID of part %i.", k); + if (fscanf(file, "%lf%lf%lf", &parts[k].x[0], &parts[k].x[1], + &parts[k].x[2]) != + 3) + error("Failed to read position of part %i.", k); + if (fscanf(file, "%f", &parts[k].mass) != 1) + error("Failed to read mass of part %i.", k); + } + fclose(file); + } + } + + /* Init the cells. */ + root = cell_get(); + root->loc[0] = 0.0; + root->loc[1] = 0.0; + root->loc[2] = 0.0; + root->h = 1.0; + root->count = N; + root->parts = parts; + cell_split(root, &s); + + /* Iterate over the cells and get the average number of particles + per leaf. */ + struct cell *c = root; + int nr_leaves = 0; + while (c != NULL) { + if (!c->split) { + nr_leaves++; + c = c->sibling; + } else { + c = c->firstchild; + } + } + message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves); + +#if ICHECK > 0 + printf("----------------------------------------------------------\n"); + + /* Do a N^2 interactions calculation */ + + ticks tic_exact = getticks(); + interact_exact(N, parts, ICHECK); + ticks toc_exact = getticks(); + + printf("Exact calculation (1 thread) took %lli (= %e) ticks\n", + toc_exact - tic_exact, (float)(toc_exact - tic_exact)); + + printf("----------------------------------------------------------\n"); +#endif + + /* Create the tasks. */ + tic = getticks(); + create_tasks(&s, root, NULL); + tot_setup += getticks() - tic; + + /* Dump the number of tasks. */ + message("total nr of tasks: %i.", s.count); + message("total nr of deps: %i.", s.count_deps); + message("total nr of res: %i.", s.count_res); + message("total nr of locks: %i.", s.count_locks); + message("total nr of uses: %i.", s.count_uses); + int counts[task_type_count]; + for (k = 0; k < task_type_count; k++) counts[k] = 0; + for (k = 0; k < s.count; k++) counts[s.tasks[k].type] += 1; + + // char buffer[200]; + // sprintf(buffer, "timings_legacy_%d_%d.dat", cell_maxparts, nr_threads); + // FILE *fileTime = fopen(buffer, "w"); + + /* Loop over the number of runs. */ + for (k = 0; k < 0 /* runs */; k++) { + + countMultipoles = 0; + countPairs = 0; + countCoMs = 0; + + /* Execute the legacy walk. */ tic = getticks(); - create_tasks( &s , root , NULL ); - tot_setup += getticks() - tic; - - /* Dump the number of tasks. */ - message( "total nr of tasks: %i." , s.count ); - message( "total nr of deps: %i." , s.count_deps ); - message( "total nr of res: %i." , s.count_res ); - message( "total nr of locks: %i." , s.count_locks ); - message( "total nr of uses: %i." , s.count_uses ); - int counts[ task_type_count ]; - for ( k = 0 ; k < task_type_count ; k++ ) - counts[k] = 0; - for ( k = 0 ; k < s.count ; k++ ) - counts[ s.tasks[k].type ] += 1; - printf( "task counts: [ " ); - for ( k = 0 ; k < task_type_count ; k++ ) - printf( "%i " , counts[k] ); - printf( "].\n" ); - - /* Loop over the number of runs. */ - for ( k = 0 ; k < runs ; k++ ) { - - /* Execute the tasks. */ - tic = getticks(); - qsched_run( &s , nr_threads , runner ); - toc_run = getticks(); - message( "%ith run took %lli ticks..." , k , toc_run - tic ); - tot_run += toc_run - tic; - - } - - /* Dump the tasks. */ - /* for ( k = 0 ; k < s.count ; k++ ) - printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , s.tasks[k].tic , s.tasks[k].toc ); */ - - /* Dump the costs. */ - message( "costs: setup=%lli ticks, run=%lli ticks." , - tot_setup , tot_run/runs ); - - /* Dump the timers. */ - for ( k = 0 ; k < qsched_timer_count ; k++ ) - message( "timer %s is %lli ticks." , qsched_timer_names[k] , s.timers[k]/runs ); - - /* Dump the per-task type timers. */ - printf( "task timers: [ " ); - for ( k = 0 ; k < task_type_count ; k++ ) - printf( "%lli " , task_timers[k]/runs ); - printf( "] ticks.\n" ); - - /* Clean up. */ - qsched_free( &s ); - + legacy_tree_walk(N, parts, root, ICHECK, &countMultipoles, &countPairs, + &countCoMs); + toc_run = getticks(); + + /* Dump some timings. */ + message("%ith legacy run took %lli (= %e) ticks...", k, toc_run - tic, + (float)(toc_run - tic)); + tot_run += toc_run - tic; + // fprintf(fileTime, "%lli %e\n", toc_run - tic, (float)(toc_run - tic)); + } + +// fclose(fileTime); + +#if ICHECK >= 0 + message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK, + root->parts[ICHECK].a[0], root->parts[ICHECK].a[1], + root->parts[ICHECK].a[2]); +#endif + printf("task counts: [ %8s %8s %8s %8s %8s ]\n", "self", "pair", "m-poles", + "direct", "CoMs"); + printf("task counts: [ %8i %8i %8i %8i %8i ] (legacy).\n", 0, 0, + countMultipoles, countPairs, countCoMs); + printf("task counts: [ "); + for (k = 0; k < task_type_count; k++) printf("%8i ", counts[k]); + printf("] (new).\n"); + + /* Loop over the number of runs. */ + for (k = 0; k < runs; k++) { + + for (i = 0; i < N; ++i) { + parts[i].a[0] = 0.0; + parts[i].a[1] = 0.0; + parts[i].a[2] = 0.0; } + /* Execute the tasks. */ + tic = getticks(); + qsched_run(&s, nr_threads, runner); + toc_run = getticks(); + message("%ith run took %lli (= %e) ticks...", k, toc_run - tic, + (float)(toc_run - tic)); + tot_run += toc_run - tic; + } + + message("[check] root mass= %f %f", root->legacy.mass, root->new.mass); + message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]); + message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]); + message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]); +#if ICHECK >= 0 + for (i = 0; i < N; ++i) + if (root->parts[i].id == ICHECK) + message("[check] accel of part %i is [%.3e,%.3e,%.3e]", ICHECK, + root->parts[i].a[0], root->parts[i].a[1], root->parts[i].a[2]); +#endif + + /* Dump the tasks. */ + /* for ( k = 0 ; k < s.count ; k++ ) + printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , + s.tasks[k].tic , s.tasks[k].toc ); */ + + /* Dump the costs. */ + message("costs: setup=%lli ticks, run=%lli ticks.", tot_setup, + tot_run / runs); + + /* Dump the timers. */ + for (k = 0; k < qsched_timer_count; k++) + message("timer %s is %lli ticks.", qsched_timer_names[k], + s.timers[k] / runs); + + /* Dump the per-task type timers. */ + printf("task timers: [ "); + for (k = 0; k < task_type_count; k++) printf("%lli ", task_timers[k] / runs); + printf("] ticks.\n"); + + /* Dump the particles to a file */ + file = fopen("particle_dump.dat", "w"); + fprintf(file, + "# ID m x y z a_exact.x a_exact.y a_exact.z a_legacy.x " + "a_legacy.y a_legacy.z a_new.x a_new.y a_new.z\n"); + for (k = 0; k < N; ++k) + fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e %e\n", parts[k].id, + parts[k].mass, parts[k].x[0], parts[k].x[1], parts[k].x[2], + parts[k].a_exact[0], parts[k].a_exact[1], parts[k].a_exact[2], + parts[k].a_legacy[0], parts[k].a_legacy[1], parts[k].a_legacy[2], + parts[k].a[0], parts[k].a[1], parts[k].a[2]); + fclose(file); + + /* Clean up. */ + qsched_free(&s); + free(parts); +} /** * @brief Main function. */ - -int main ( int argc , char *argv[] ) { - int c, nr_threads; - int N = 1000, runs = 1; - - /* Get the number of threads. */ - #pragma omp parallel shared(nr_threads) - { - if ( omp_get_thread_num() == 0 ) - nr_threads = omp_get_num_threads(); - } - - /* Parse the options */ - while ( ( c = getopt( argc , argv , "n:r:t:" ) ) != -1 ) - switch( c ) { - case 'n': - if ( sscanf( optarg , "%d" , &N ) != 1 ) - error( "Error parsing number of particles." ); - break; - case 'r': - if ( sscanf( optarg , "%d" , &runs ) != 1 ) - error( "Error parsing number of runs." ); - break; - case 't': - if ( sscanf( optarg , "%d" , &nr_threads ) != 1 ) - error( "Error parsing number of threads." ); - omp_set_num_threads( nr_threads ); - break; - case '?': - fprintf( stderr , "Usage: %s [-t nr_threads] [-n N] [-r runs]\n" , argv[0] ); - fprintf( stderr , "Solves the N-body problem using a Barnes-Hutt\n" - "tree code with N random particles in [0,1]^3 using\n" - "nr_threads threads.\n" ); - exit( EXIT_FAILURE ); - } - - /* Dump arguments. */ - message( "Computing the N-body problem over %i particles using %i threads (%i runs)." , - N , nr_threads , runs ); - - /* Run the test. */ - test_bh( N , nr_threads, runs ); - - return 0; - +int main(int argc, char *argv[]) { + + int c, nr_threads; + int N = 1000, runs = 1; + char fileName[100] = {0}; + + /* Die on FP-exceptions. */ + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); + +/* Get the number of threads. */ +#pragma omp parallel shared(nr_threads) + { + if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads(); + } + + /* Parse the options */ + while ((c = getopt(argc, argv, "n:r:t:f:c:i:")) != -1) switch (c) { + case 'n': + if (sscanf(optarg, "%d", &N) != 1) + error("Error parsing number of particles."); + break; + case 'r': + if (sscanf(optarg, "%d", &runs) != 1) + error("Error parsing number of runs."); + break; + case 't': + if (sscanf(optarg, "%d", &nr_threads) != 1) + error("Error parsing number of threads."); + omp_set_num_threads(nr_threads); + break; + case 'f': + if (sscanf(optarg, "%s", &fileName[0]) != 1) + error("Error parsing file name."); + break; + case '?': + fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] " + "[-c Nparts] [-i Niterations] \n", + argv[0]); + fprintf(stderr, "Solves the N-body problem using a Barnes-Hut\n" + "tree code with N random particles read from a file in " + "[0,1]^3 using" + "nr_threads threads.\n" + "A test of the neighbouring cells interaction with " + "Nparts per cell is also run Niterations times.\n"); + exit(EXIT_FAILURE); } - - + + /* Tree node information */ + printf("Size of cell: %zu bytes.\n", sizeof(struct cell)); + + /* Part information */ + printf("Size of part: %zu bytes.\n", sizeof(struct part)); + + /* Dump arguments. */ + if (fileName[0] == 0) { + message("Computing the N-body problem over %i random particles using %i " + "threads (%i runs).", + N, nr_threads, runs); + } else { + message("Computing the N-body problem over %i particles read from '%s' " + "using %i threads (%i runs).", + N, fileName, nr_threads, runs); + } + + /* Run the BH test. */ + test_bh(N, nr_threads, runs, fileName); + + return 0; +} diff --git a/examples/test_bh.cu b/examples/test_bh.cu index f269cd47bf8de54327dbac53a1acef6fbef05821..cfc5af5e9802a1a85e1431b793c78eee7d4b5a73 100644 --- a/examples/test_bh.cu +++ b/examples/test_bh.cu @@ -128,12 +128,12 @@ __device__ struct part *parts; #ifdef TODO +__device__ double *part_x0; __device__ double *part_x1; __device__ double *part_x2; -__device__ double *part_x3; +__device__ float *part_a0; __device__ float *part_a1; __device__ float *part_a2; -__device__ float *part_a3; __device__ float *part_mass; #endif @@ -243,10 +243,10 @@ __device__ void comp_com_CUDA(struct cell *c) { } else { for (k = threadIdx.x; k < count; k+= blockDim.x) { - float p_mass = parts[k].mass; - com[0] += parts[k].x[0] * p_mass; - com[1] += parts[k].x[1] * p_mass; - com[2] += parts[k].x[2] * p_mass; + float p_mass = part_mass[k]; + com[0] += part_x0[k] * p_mass; + com[1] += part_x1[k] * p_mass; + com[2] += part_x2[k] * p_mass; mass += p_mass; } reduceSumDouble(com); @@ -308,11 +308,12 @@ __device__ __forceinline__ int are_neighbours_CUDA(struct cell *ci, struct cell * @param ci The #cell containing the particles. * @param cj The #cell containing the other particles */ -__device__ __forceinline__ void iact_pair_direct_CUDA(struct cell *ci, struct cell *cj) +__device__ __forceinline__ void iact_pair_direct_CUDA(struct cell_device *ci, struct cell_device *cj) { int i, j, k; int count_i = ci->count, count_j = cj->count; - struct part *parts_i = ci->parts, *parts_j = cj->parts; + //struct part *parts_i = ci->parts, *parts_j = cj->parts; + int parts_it = ci->part_loc, parts_jt = cj->part_loc; double xi[3]; float dx[3], ai[3], r2, w, ir; @@ -328,8 +329,10 @@ __device__ __forceinline__ void iact_pair_direct_CUDA(struct cell *ci, struct ce for(i=0; i < count_i; i+= blockDim.x) { /* Init the ith particle's data. */ + xi[0] = part_x0[parts_it+i]; + xi[1] = part_x1[parts_it+i]; + xi[2] = part_x2[parts_it+i]; for (k = 0; k < 3; k++) { - xi[k] = parts_i[i].x[k]; ai[k] = 0.0f; } // mi = parts_i[i].mass; @@ -337,19 +340,24 @@ __device__ __forceinline__ void iact_pair_direct_CUDA(struct cell *ci, struct ce for(j = 0; j < count_j; j++) { /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = xi[k] - parts_j[j].x[k]; - r2 += dx[k] * dx[k]; - } +// for (r2 = 0.0, k = 0; k < 3; k++) { + dx[0] = xi[0] - part_x0[parts_jt+j]; + dx[1] = xi[1] - part_x1[parts_jt+j]; + dx[2] = xi[2] - part_x2[parts_jt+j]; + r2 = dx[0] * dx[0]; + r2 += dx[1] * dx[1]; + r2 += dx[2] * dx[2]; + // } /* Apply the gravitational acceleration. */ ir = 1.0f / sqrtf(r2); w = const_G * ir * ir * ir; for(k = 0; k < 3; k++) { - ai[k] = w * dx[k] * parts_j[j].mass; + ai[k] += w * dx[k] * part_mass[parts_jt+j];; } } - - for (k = 0; k < 3; k++) atomicAdd( &parts[i].a[k] , ai[k]); + atomicAdd(&parts_a0[parts_it+i], ai[0]); + atomicAdd(&parts_a1[parts_it+i], ai[1]); + atomicAdd(&parts_a2[parts_it+i], ai[2]); }/* Loop over particles in cell i.*/ } @@ -403,12 +411,13 @@ __device__ void iact_pair_CUDA(struct cell *ci, struct cell *cj) { } } -__device__ void iact_self_direct_CUDA(struct cell *c) +__device__ void iact_self_direct_CUDA(struct cell_device *c) { int i, j, k, count = c->count; double xi[3] = {0.0, 0.0, 0.0}; - float ai[3] = {0.0, 0.0, 0.0}, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; + // float ai[3] = {0.0, 0.0, 0.0}, mj, dx[3] = {0.0, 0.0, 0.0}, r2, ir, w; struct part *parts = c->parts; + int partst = c->part_loc; struct cell *cp, *cps; #ifdef SANITY_CHECKS @@ -442,7 +451,7 @@ __device__ void iact_self_direct_CUDA(struct cell *c) } // mi = parts[i].mass; - /* Loop all particles - note this was an improvement in quicksched so we use it here. */ + /* Loop all particles - note this was an improvement in mdcore so we use it here. */ for(j = 0; j < count; j++) { /* Compute the pairwise distance. */ @@ -1045,13 +1054,6 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { parts[k].a_legacy[0] = 0.0; parts[k].a_legacy[1] = 0.0; parts[k].a_legacy[2] = 0.0; - partsx0[k] = parts[k].x[0]; - partsx1[k] = parts[k].x[1]; - partsx2[k] = parts[k].x[2]; - partsmass[k] = parts[k].mass; - partsa0[k] = 0.0f; - partsa1[k] = 0.0f; - partsa2[k] = 0.0f; } /* Otherwise, read them from a file. */ @@ -1104,6 +1106,17 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { } message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves); + for(k = 0; k < N; k++) + { + partsx0[k] = parts[k].x[0]; + partsx1[k] = parts[k].x[1]; + partsx2[k] = parts[k].x[2]; + partsmass[k] = parts[k].mass; + partsa0[k] = 0.0f; + partsa1[k] = 0.0f; + partsa2[k] = 0.0f; + } + #if ICHECK > 0 printf("----------------------------------------------------------\n"); @@ -1160,6 +1173,9 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { parts[i].a[0] = 0.0; parts[i].a[1] = 0.0; parts[i].a[2] = 0.0; + partsa0[k] = 0.0f; + partsa1[k] = 0.0f; + partsa2[k] = 0.0f; } /* Execute the tasks. */ diff --git a/examples/test_bh_2.cu b/examples/test_bh_2.cu index 534b9b38fa3ff18c2b1e1f33617585622a28fbb1..be5dcd4ba73a655bd6d43cb27befa0cdc06aaaf8 100644 --- a/examples/test_bh_2.cu +++ b/examples/test_bh_2.cu @@ -479,5 +479,113 @@ void test_bh(int N, int runs, char *fileName) { //Create host particle arrays. if( cudaMallocHost(&parts_pos_xy_host, sizeof(double2) * N) != cudaSuccess) - error("Failed to allocated parts array"); + error("Failed to allocate parts array"); + if( cudaMallocHost(&parts_pos_z_host, sizeof(double) * N) != cudaSuccess) + error("Failed to allocate parts array"); + if( cudaMallocHost( &parts_a_m_host, sizeof(float4) * N) != cudaSuccess ) + error("Failed to allocate parts array"); + + if(fileName == NULL || fileName[0] == 0) { + for(k = 0; k < N; k++) { + parts_pos_xy_host.x = ((double)rand())/ RAND_MAX; + parts_pos_xy_host.y = ((double)rand())/ RAND_MAX; + parts_pos_z_host = ((double)rand())/ RAND_MAX; + parts_a_m_host.w = ((double)rand()) / RAND_MAX; + } + + } else { + file = fopen(fileName, "r"); + if(file) { + for(k = 0; k < N; k++) { + if(fscanf(file, "%lf%lf%lf", &parts_pos_xy_host.x, &parts_pos_xy_host.y, &parts_pos_z_host) != 3) + error("Failed to read position of part %i.", k); + if(fscanf(file, "%f", &parts[k].mass != 1) + error("Failed to read mass of part %i.", k); + } + fclose(file); + } + } + + /* Init the cells. */ + root = cell_get(); + root->loc_xy.x = 0.0; + root->loc_xy.y = 0.0; + root->loc_z = 0.0; + root->h = 1.0; + root->count = N; + root->parts = 0; + cell_split(root, &s); + + int c = root - cell_pool; + int nr_leaves = 0; + while(c >= 0) { + if(!cell_pool[c].split) { + nr_leaves++; + c = c->sibling; + } else { + c = c->firstchild; + } + } + + message("Average number of parts per leaf is %lf.", ((double)N) / ((double)nr_leaves)); + +} + + +int main(int argc, char *argv[]) { + int c, nr_threads; + int N = 1000, runs = 1; + char fileName[100] = {0}; + + /* Parse the options */ + while ((c = getopt(argc, argv, "n:r:t:f:c:i:")) != -1) switch (c) { + case 'n': + if (sscanf(optarg, "%d", &N) != 1) + error("Error parsing number of particles."); + break; + case 'r': + if (sscanf(optarg, "%d", &runs) != 1) + error("Error parsing number of runs."); + break; + case 't': + if (sscanf(optarg, "%d", &nr_threads) != 1) + error("Error parsing number of threads."); +// omp_set_num_threads(nr_threads); + break; + case 'f': + if (sscanf(optarg, "%s", &fileName[0]) != 1) + error("Error parsing file name."); + break; + case '?': + fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] " + "[-c Nparts] [-i Niterations] \n", + argv[0]); + fprintf(stderr, "Solves the N-body problem using a Barnes-Hut\n" + "tree code with N random particles read from a file in " + "[0,1]^3 using" + "nr_threads threads.\n" + "A test of the neighbouring cells interaction with " + "Nparts per cell is also run Niterations times.\n"); + exit(EXIT_FAILURE); + } + + /* Tree node information */ + printf("Size of cell: %zu bytes.\n", sizeof(struct cell)); + + /* Part information */ + printf("Size of part: %zu bytes.\n", sizeof(struct part)); + + /* Dump arguments. */ + if (fileName[0] == 0) { + message("Computing the N-body problem over %i random particles using %i " + "threads (%i runs).", + N, nr_threads, runs); + } else { + message("Computing the N-body problem over %i particles read from '%s' " + "using %i threads (%i runs).", + N, fileName, nr_threads, runs); + } + + /* Run the BH test. */ + test_bh(N, nr_threads, runs, fileName); } diff --git a/src/Makefile.in b/src/Makefile.in deleted file mode 100644 index 4a3e029bb2bceef56d7fcdaf5167ff8d03c78f52..0000000000000000000000000000000000000000 --- a/src/Makefile.in +++ /dev/null @@ -1,659 +0,0 @@ -# Makefile.in generated by automake 1.11.1 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, -# Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -# This file is part of Quickqsched. -# Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk), -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -VPATH = @srcdir@ -pkgdatadir = $(datadir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkglibexecdir = $(libexecdir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ - -# Build a CUDA-enabled version too? -@HAVE_CUDA_TRUE@am__append_1 = libquicksched_cuda.la -subdir = src -DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ - $(srcdir)/Makefile.in -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \ - $(top_srcdir)/m4/ax_check_compile_flag.m4 \ - $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ - $(top_srcdir)/m4/ax_ext.m4 \ - $(top_srcdir)/m4/ax_func_posix_memalign.m4 \ - $(top_srcdir)/m4/ax_gcc_archflag.m4 \ - $(top_srcdir)/m4/ax_gcc_x86_cpuid.m4 \ - $(top_srcdir)/m4/ax_openmp.m4 \ - $(top_srcdir)/m4/ax_prog_doxygen.m4 \ - $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ - $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ - $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.in -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -mkinstalldirs = $(install_sh) -d -CONFIG_HEADER = $(top_builddir)/config.h -CONFIG_CLEAN_FILES = -CONFIG_CLEAN_VPATH_FILES = -am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; -am__vpath_adj = case $$p in \ - $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ - *) f=$$p;; \ - esac; -am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; -am__install_max = 40 -am__nobase_strip_setup = \ - srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` -am__nobase_strip = \ - for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" -am__nobase_list = $(am__nobase_strip_setup); \ - for p in $$list; do echo "$$p $$p"; done | \ - sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ - $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ - if (++n[$$2] == $(am__install_max)) \ - { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ - END { for (dir in files) print dir, files[dir] }' -am__base_list = \ - sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ - sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' -am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" -LTLIBRARIES = $(lib_LTLIBRARIES) -libquicksched_la_LIBADD = -am_libquicksched_la_OBJECTS = qsched.lo queue.lo -libquicksched_la_OBJECTS = $(am_libquicksched_la_OBJECTS) -libquicksched_cuda_la_LIBADD = -am__libquicksched_cuda_la_SOURCES_DIST = qsched.c queue.c \ - cuda_queue.cu -@HAVE_CUDA_TRUE@am__objects_1 = libquicksched_cuda_la-qsched.lo \ -@HAVE_CUDA_TRUE@ libquicksched_cuda_la-queue.lo cuda_queue.lo -@HAVE_CUDA_TRUE@am_libquicksched_cuda_la_OBJECTS = $(am__objects_1) -libquicksched_cuda_la_OBJECTS = $(am_libquicksched_cuda_la_OBJECTS) -libquicksched_cuda_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) \ - $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ - $(LDFLAGS) -o $@ -@HAVE_CUDA_TRUE@am_libquicksched_cuda_la_rpath = -rpath $(libdir) -DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -depcomp = $(SHELL) $(top_srcdir)/depcomp -am__depfiles_maybe = depfiles -am__mv = mv -f -COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ - $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ - $(LDFLAGS) -o $@ -SOURCES = $(libquicksched_la_SOURCES) $(libquicksched_cuda_la_SOURCES) -DIST_SOURCES = $(libquicksched_la_SOURCES) \ - $(am__libquicksched_cuda_la_SOURCES_DIST) -HEADERS = $(include_HEADERS) -ETAGS = etags -CTAGS = ctags -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = @ACLOCAL@ -AMTAR = @AMTAR@ -AR = @AR@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -CC = @CC@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CPP = @CPP@ -CPPFLAGS = @CPPFLAGS@ -CUDA_CFLAGS = @CUDA_CFLAGS@ -CUDA_LIBS = @CUDA_LIBS@ -CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ -DSYMUTIL = @DSYMUTIL@ -DUMPBIN = @DUMPBIN@ -DX_CONFIG = @DX_CONFIG@ -DX_DOCDIR = @DX_DOCDIR@ -DX_DOT = @DX_DOT@ -DX_DOXYGEN = @DX_DOXYGEN@ -DX_DVIPS = @DX_DVIPS@ -DX_EGREP = @DX_EGREP@ -DX_ENV = @DX_ENV@ -DX_FLAG_chi = @DX_FLAG_chi@ -DX_FLAG_chm = @DX_FLAG_chm@ -DX_FLAG_doc = @DX_FLAG_doc@ -DX_FLAG_dot = @DX_FLAG_dot@ -DX_FLAG_html = @DX_FLAG_html@ -DX_FLAG_man = @DX_FLAG_man@ -DX_FLAG_pdf = @DX_FLAG_pdf@ -DX_FLAG_ps = @DX_FLAG_ps@ -DX_FLAG_rtf = @DX_FLAG_rtf@ -DX_FLAG_xml = @DX_FLAG_xml@ -DX_HHC = @DX_HHC@ -DX_LATEX = @DX_LATEX@ -DX_MAKEINDEX = @DX_MAKEINDEX@ -DX_PDFLATEX = @DX_PDFLATEX@ -DX_PERL = @DX_PERL@ -DX_PROJECT = @DX_PROJECT@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGREP = @EGREP@ -EXEEXT = @EXEEXT@ -FGREP = @FGREP@ -GREP = @GREP@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LD = @LD@ -LDFLAGS = @LDFLAGS@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LIBTOOL = @LIBTOOL@ -LIPO = @LIPO@ -LN_S = @LN_S@ -LTLIBOBJS = @LTLIBOBJS@ -MAKEINFO = @MAKEINFO@ -MKDIR_P = @MKDIR_P@ -NM = @NM@ -NMEDIT = @NMEDIT@ -NVCC = @NVCC@ -OBJDUMP = @OBJDUMP@ -OBJEXT = @OBJEXT@ -OPENMP_CFLAGS = @OPENMP_CFLAGS@ -OTOOL = @OTOOL@ -OTOOL64 = @OTOOL64@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -PRTDIAG = @PRTDIAG@ -PTHREAD_CC = @PTHREAD_CC@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ -PTHREAD_LIBS = @PTHREAD_LIBS@ -RANLIB = @RANLIB@ -SED = @SED@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -SIMD_FLAGS = @SIMD_FLAGS@ -STRIP = @STRIP@ -VERSION = @VERSION@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ -acx_pthread_config = @acx_pthread_config@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_vendor = @build_vendor@ -builddir = @builddir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -lt_ECHO = @lt_ECHO@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target_alias = @target_alias@ -top_build_prefix = @top_build_prefix@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ - -# Automake stuff -AUTOMAKE_OPTIONS = gnu - -# Add the debug flag to the whole thing -AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \ - -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) -DTIMERS \ - #-fsanitize=address -fno-omit-frame-pointer - - -# Assign a "safe" version number -AM_LDFLAGS = -version-info 0:0:0 - -# Build the libquicksched library -lib_LTLIBRARIES = libquicksched.la $(am__append_1) -libquicksched_la_SOURCES = qsched.c queue.c - -# List required headers -include_HEADERS = atomic.h lock.h queue.h qsched.h task.h res.h error.h qsched.h -@HAVE_CUDA_FALSE@SOURCES_CUDA = - -# CUDA sources -@HAVE_CUDA_TRUE@SOURCES_CUDA = qsched.c queue.c cuda_queue.cu -@HAVE_CUDA_TRUE@CUDA_MYFLAGS = -O3 -g -DCPU_TPS=3.1e9 -Xnvlink -rdc=true -lineinfo -src-in-ptx --maxrregcount=32 -Xptxas="-v" -Xptxas -dlcm=cg -gencode arch=compute_30,code=sm_30 -ftz=true -fmad=true -DFPTYPE_SINGLE -DWITH_CUDA #-fsanitize=address -fno-omit-frame-pointer -@HAVE_CUDA_TRUE@libquicksched_cuda_la_SOURCES = $(SOURCES_CUDA) -@HAVE_CUDA_TRUE@libquicksched_cuda_la_CFLAGS = -DFPTYPE_SINGLE $(AM_CFLAGS) -DWITH_CUDA $(CUDA_CFLAGS) -all: all-am - -.SUFFIXES: -.SUFFIXES: .c .cu .lo .o .obj -$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ - && { if test -f $@; then exit 0; else break; fi; }; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \ - $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --gnu src/Makefile -.PRECIOUS: Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -$(top_srcdir)/configure: $(am__configure_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): $(am__aclocal_m4_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(am__aclocal_m4_deps): -install-libLTLIBRARIES: $(lib_LTLIBRARIES) - @$(NORMAL_INSTALL) - test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" - @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ - list2=; for p in $$list; do \ - if test -f $$p; then \ - list2="$$list2 $$p"; \ - else :; fi; \ - done; \ - test -z "$$list2" || { \ - echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ - $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ - } - -uninstall-libLTLIBRARIES: - @$(NORMAL_UNINSTALL) - @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ - for p in $$list; do \ - $(am__strip_dir) \ - echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ - $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ - done - -clean-libLTLIBRARIES: - -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) - @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ - dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ - test "$$dir" != "$$p" || dir=.; \ - echo "rm -f \"$${dir}/so_locations\""; \ - rm -f "$${dir}/so_locations"; \ - done -libquicksched.la: $(libquicksched_la_OBJECTS) $(libquicksched_la_DEPENDENCIES) - $(LINK) -rpath $(libdir) $(libquicksched_la_OBJECTS) $(libquicksched_la_LIBADD) $(LIBS) -libquicksched_cuda.la: $(libquicksched_cuda_la_OBJECTS) $(libquicksched_cuda_la_DEPENDENCIES) - $(libquicksched_cuda_la_LINK) $(am_libquicksched_cuda_la_rpath) $(libquicksched_cuda_la_OBJECTS) $(libquicksched_cuda_la_LIBADD) $(LIBS) - -mostlyclean-compile: - -rm -f *.$(OBJEXT) - -distclean-compile: - -rm -f *.tab.c - -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libquicksched_cuda_la-qsched.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libquicksched_cuda_la-queue.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/qsched.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/queue.Plo@am__quote@ - -.c.o: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c $< - -.c.obj: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` - -.c.lo: -@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< - -libquicksched_cuda_la-qsched.lo: qsched.c -@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -MT libquicksched_cuda_la-qsched.lo -MD -MP -MF $(DEPDIR)/libquicksched_cuda_la-qsched.Tpo -c -o libquicksched_cuda_la-qsched.lo `test -f 'qsched.c' || echo '$(srcdir)/'`qsched.c -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libquicksched_cuda_la-qsched.Tpo $(DEPDIR)/libquicksched_cuda_la-qsched.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='qsched.c' object='libquicksched_cuda_la-qsched.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -c -o libquicksched_cuda_la-qsched.lo `test -f 'qsched.c' || echo '$(srcdir)/'`qsched.c - -libquicksched_cuda_la-queue.lo: queue.c -@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -MT libquicksched_cuda_la-queue.lo -MD -MP -MF $(DEPDIR)/libquicksched_cuda_la-queue.Tpo -c -o libquicksched_cuda_la-queue.lo `test -f 'queue.c' || echo '$(srcdir)/'`queue.c -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libquicksched_cuda_la-queue.Tpo $(DEPDIR)/libquicksched_cuda_la-queue.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='queue.c' object='libquicksched_cuda_la-queue.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libquicksched_cuda_la_CFLAGS) $(CFLAGS) -c -o libquicksched_cuda_la-queue.lo `test -f 'queue.c' || echo '$(srcdir)/'`queue.c - -mostlyclean-libtool: - -rm -f *.lo - -clean-libtool: - -rm -rf .libs _libs -install-includeHEADERS: $(include_HEADERS) - @$(NORMAL_INSTALL) - test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" - @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ - for p in $$list; do \ - if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ - echo "$$d$$p"; \ - done | $(am__base_list) | \ - while read files; do \ - echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ - $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ - done - -uninstall-includeHEADERS: - @$(NORMAL_UNINSTALL) - @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ - files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ - test -n "$$files" || exit 0; \ - echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \ - cd "$(DESTDIR)$(includedir)" && rm -f $$files - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - mkid -fID $$unique -tags: TAGS - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - set x; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - shift; \ - if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - if test $$# -gt 0; then \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - "$$@" $$unique; \ - else \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$unique; \ - fi; \ - fi -ctags: CTAGS -CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - test -z "$(CTAGS_ARGS)$$unique" \ - || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$unique - -GTAGS: - here=`$(am__cd) $(top_builddir) && pwd` \ - && $(am__cd) $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) "$$here" - -distclean-tags: - -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags - -distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d "$(distdir)/$$file"; then \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ - cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ - else \ - test -f "$(distdir)/$$file" \ - || cp -p $$d/$$file "$(distdir)/$$file" \ - || exit 1; \ - fi; \ - done -check-am: all-am -check: check-am -all-am: Makefile $(LTLIBRARIES) $(HEADERS) -installdirs: - for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \ - test -z "$$dir" || $(MKDIR_P) "$$dir"; \ - done -install: install-am -install-exec: install-exec-am -install-data: install-data-am -uninstall: uninstall-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." -clean: clean-am - -clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ - mostlyclean-am - -distclean: distclean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -distclean-am: clean-am distclean-compile distclean-generic \ - distclean-tags - -dvi: dvi-am - -dvi-am: - -html: html-am - -html-am: - -info: info-am - -info-am: - -install-data-am: install-includeHEADERS - -install-dvi: install-dvi-am - -install-dvi-am: - -install-exec-am: install-libLTLIBRARIES - -install-html: install-html-am - -install-html-am: - -install-info: install-info-am - -install-info-am: - -install-man: - -install-pdf: install-pdf-am - -install-pdf-am: - -install-ps: install-ps-am - -install-ps-am: - -installcheck-am: - -maintainer-clean: maintainer-clean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-am - -mostlyclean-am: mostlyclean-compile mostlyclean-generic \ - mostlyclean-libtool - -pdf: pdf-am - -pdf-am: - -ps: ps-am - -ps-am: - -uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES - -.MAKE: install-am install-strip - -.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ - clean-libLTLIBRARIES clean-libtool ctags distclean \ - distclean-compile distclean-generic distclean-libtool \ - distclean-tags distdir dvi dvi-am html html-am info info-am \ - install install-am install-data install-data-am install-dvi \ - install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-includeHEADERS install-info \ - install-info-am install-libLTLIBRARIES install-man install-pdf \ - install-pdf-am install-ps install-ps-am install-strip \ - installcheck installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am uninstall-includeHEADERS \ - uninstall-libLTLIBRARIES - -@HAVE_CUDA_TRUE@.cu: qsched.c queue.c -@HAVE_CUDA_TRUE@.cu.o: -@HAVE_CUDA_TRUE@ $(NVCC) -c $(NVCCFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS) $< -o $@ -@HAVE_CUDA_TRUE@.cu.lo: -@HAVE_CUDA_TRUE@ $(top_srcdir)/cudalt.py $@ $(NVCC) -c $(NVCCFLAGS) $(CUDA_CFLAGS) $(CUDA_MYFLAGS) $< - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: