diff --git a/examples/Makefile.am b/examples/Makefile.am index 9708f27ed737f34bc9bb425d4b6bb0529b8feb8a..11013c9414642beb4672e8cec435854ef39507ec 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -24,6 +24,9 @@ AM_CFLAGS = -g -O3 -Wall -Werror -I../src -ffast-math -fstrict-aliasing \ -ftree-vectorize -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \ -DCPU_TPS=2.67e9 -DTIMERS \ # -fsanitize=address -fno-omit-frame-pointer +# AM_CFLAGS = -g -O0 -Wall -Werror -I../src \ +# -DCPU_TPS=2.67e9 -DTIMERS $(OPENMP_CFLAGS) \ +# -fsanitize=address -fno-omit-frame-pointer AM_LDFLAGS = -lm # -fsanitize=address diff --git a/examples/mkplots_tasks.m b/examples/mkplots_tasks.m index 9812bfa5a82adc111866e0065e0c9f868524e175..ba585eb8d4c61435a652a28e190b895f3fd914b7 100644 --- a/examples/mkplots_tasks.m +++ b/examples/mkplots_tasks.m @@ -72,9 +72,9 @@ end hold off; xlabel('time (ms)'); ylabel('core ID'); -set(gca,'YTick',0:(max(tasks(:,2)))) +set(gca,'YTick',0:8:(max(tasks(:,2)))) title('QuickSched tiled QR decomposition tasks'); -axis([ 0 , 110 , -0.5 , nr_cores-0.5 ]); +axis([ 0 , 250 , -0.5 , nr_cores-0.5 ]); % Print this plot set( gcf , 'PaperSize' , 2.3*[ 16 4 ] ); @@ -108,9 +108,9 @@ end hold off; xlabel('time (ms)'); ylabel('core ID'); -set(gca,'YTick',0:(max(tasks(:,1)))) +set(gca,'YTick',0:8:(max(tasks(:,1)))) title('OmpSs tiled QR decomposition tasks'); -axis([ 0 , 110 , -0.5 , nr_cores-0.5 ]); +axis([ 0 , 250 , -0.5 , nr_cores-0.5 ]); % Print this plot set( gcf , 'PaperSize' , 2.3*[ 16 4 ] ); @@ -123,7 +123,7 @@ print -depsc2 figures/tasks_qr_ompss.eps %% Plot the task timelines for tasks allocation % Load the data -tasks = importdata( 'test.dump' ); +tasks = dlmread( 'test_bh_sorted.tasks' ); tasks(:,4) = ( tasks(:,4) - tasks(:,3) ) * tpms; start = min( tasks(:,3) ); tasks(:,3) = ( tasks(:,3) - start ) * tpms; @@ -132,7 +132,11 @@ nr_cores = max( tasks(:,2) ) + 1; % Init the plot clf; subplot('position',[ 0.05 , 0.1 , 0.9 , 0.8 ]); -colours = [ 1 0 0 ; 0 1 0 ; 0 0 1 ; 1 1 0 ]; +colours = [ 1 0 0 ; 0 1 0 ; 0 0 1 ; 1 1 0 ; 0 1 1 ]; +xlabel('time (ms)'); +ylabel('core ID'); +title('Barnes-Hut tasks'); +axis([ 0 , max( tasks(:,3) + tasks(:,4) ) , -0.5 , nr_cores-0.5 ]); hold on; % Plot the tasks @@ -144,11 +148,6 @@ end % Set the axes and stuff. hold off; -xlabel('time (ms)'); -ylabel('core ID'); -set(gca,'YTick',0:(max(tasks(:,1)))) -title('Barnes-Hut tasks'); -axis([ 0 , max( tasks(:,3) + tasks(:,4) ) , -0.5 , nr_cores-0.5 ]); % Print this plot set( gcf , 'PaperSize' , 2.3*[ 16 4 ] ); diff --git a/examples/test_bh.c b/examples/test_bh.c deleted file mode 100644 index 7c563a74a38046038e6ba5d5b865642d935ab8a2..0000000000000000000000000000000000000000 --- a/examples/test_bh.c +++ /dev/null @@ -1,847 +0,0 @@ -/******************************************************************************* - * This file is part of QuickSched. - * Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ - - -/* Config parameters. */ -#include "../config.h" - -/* Standard includes. */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <math.h> -#include <omp.h> - -/* Local includes. */ -#include "quicksched.h" - - -/* Some local constants. */ -#define cell_pool_grow 100 -#define cell_maxparts 1 -#define task_limit 5000 -#define const_G 6.6738e-11 -#define dist_min 0.5 - - -/** Data structure for the particles. */ -struct part { - double x[3]; - double a[3]; - double mass; - int id; - }; - - -/** Data structure for the BH tree cell. */ -struct cell { - double loc[3]; - double h[3]; - int split, count; - struct part *parts; - struct cell *progeny[8]; - struct cell *parent; - double com[3]; - double mass; - int res, com_tid, depth; - }; - - -/** Task types. */ -enum task_type { - task_type_self = 0, - task_type_pair, - task_type_pair_pc, - task_type_com, - task_type_count - }; - -/** Per-type timers. */ -ticks task_timers[ task_type_count ]; - - -/** Global variable for the pool of allocated cells. */ -struct cell *cell_pool = NULL; - - -/** - * @brief Get a #cell from the pool. - */ - -struct cell *cell_get ( ) { - - struct cell *res; - int k; - - /* Allocate a new batch? */ - if ( cell_pool == NULL ) { - - /* Allocate the cell array. */ - if ( ( cell_pool = (struct cell *)malloc( sizeof(struct cell) * cell_pool_grow ) ) == NULL ) - error( "Failed to allocate fresh batch of cells." ); - - /* Clear the cells. */ - bzero( cell_pool , sizeof(struct cell) * cell_pool_grow ); - - /* Link them up via their progeny pointers. */ - for ( k = 1 ; k < cell_pool_grow ; k++ ) - cell_pool[k-1].progeny[0] = &cell_pool[k]; - - } - - /* Pick a cell off the pool. */ - res = cell_pool; - cell_pool = cell_pool->progeny[0]; - - /* Clean up a few things. */ - res->res = qsched_res_none; - - /* Return the cell. */ - return res; - - } - - -/** - * @brief Sort the parts into eight bins along the given pivots and - * fill the multipoles. Also adds the hierarchical resources - * to the sched. - * - * @param c The #cell to be split. - * @param N The total number of parts. - * @param s The #sched to store the resources. - */ - -void cell_split ( struct cell *c , struct qsched *s ) { - - int i, j, k, count = c->count; - struct part temp, *parts = c->parts; - struct cell *cp; - int left[8], right[8]; - double pivot[3]; - static struct cell *root = NULL; - - /* Set the root cell. */ - if ( root == NULL ) { - root = c; - c->depth = 0; - } - - /* Add a resource for this cell if it doesn't have one yet. */ - if ( c->res == qsched_res_none ) - c->res = qsched_addres( s , qsched_owner_none , qsched_res_none ); - - /* Attach a center-of-mass task to the cell. */ - c->com_tid = qsched_addtask( s , task_type_com , task_flag_none , &c , sizeof(struct cell *) , 1 ); - - /* Does this cell need to be split? */ - if ( count > cell_maxparts ) { - - /* Mark this cell as split. */ - c->split = 1; - - /* Create the progeny. */ - for ( k = 0 ; k < 8 ; k++ ) { - c->progeny[k] = cp = cell_get(); - cp->parent = c; - cp->depth = c->depth + 1; - cp->loc[0] = c->loc[0]; - cp->loc[1] = c->loc[1]; - cp->loc[2] = c->loc[2]; - cp->h[0] = c->h[0]/2; - cp->h[1] = c->h[1]/2; - cp->h[2] = c->h[2]/2; - cp->res = qsched_addres( s , qsched_owner_none , c->res ); - if ( k & 4 ) - cp->loc[0] += cp->h[0]; - if ( k & 2 ) - cp->loc[1] += cp->h[1]; - if ( k & 1 ) - cp->loc[2] += cp->h[2]; - } - - /* Init the pivots. */ - for ( k = 0 ; k < 3 ; k++ ) - pivot[k] = c->loc[k] + c->h[k]/2; - - /* Split along the x-axis. */ - i = 0; j = count - 1; - while ( i <= j ) { - while ( i <= count-1 && parts[i].x[0] < pivot[0] ) - i += 1; - while ( j >= 0 && parts[j].x[0] >= pivot[0] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[1] = i; right[1] = count - 1; - left[0] = 0; right[0] = j; - - /* Split along the y axis, twice. */ - for ( k = 1 ; k >= 0 ; k-- ) { - i = left[k]; j = right[k]; - while ( i <= j ) { - while ( i <= right[k] && parts[i].x[1] < pivot[1] ) - i += 1; - while ( j >= left[k] && parts[j].x[1] >= pivot[1] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[2*k+1] = i; right[2*k+1] = right[k]; - left[2*k] = left[k]; right[2*k] = j; - } - - /* Split along the z axis, four times. */ - for ( k = 3 ; k >= 0 ; k-- ) { - i = left[k]; j = right[k]; - while ( i <= j ) { - while ( i <= right[k] && parts[i].x[2] < pivot[2] ) - i += 1; - while ( j >= left[k] && parts[j].x[2] >= pivot[2] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[2*k+1] = i; right[2*k+1] = right[k]; - left[2*k] = left[k]; right[2*k] = j; - } - - /* Store the counts and offsets. */ - for ( k = 0 ; k < 8 ; k++ ) { - c->progeny[k]->count = right[k] - left[k] + 1; - c->progeny[k]->parts = &c->parts[ left[k] ]; - } - - /* Recurse. */ - for ( k = 0 ; k < 8 ; k++ ) - cell_split( c->progeny[k] , s ); - - /* Link the COM tasks. */ - for ( k = 0 ; k < 8 ; k++ ) - qsched_addunlock( s , c->progeny[k]->com_tid , c->com_tid ); - - } /* does the cell need to be split? */ - - /* Set this cell's resources ownership. */ - qsched_res_own( s , c->res , s->nr_queues * ( c->parts - root->parts ) / root->count ); - - } - - -/** - * @brief Compute the center of mass of a given cell. - * - * @param c The #cell. - */ - -void comp_com ( struct cell *c ) { - - int k, count = c->count; - struct cell *cp; - struct part *p, *parts = c->parts; - - /* Is the cell split? */ - if ( c->split ) { - - /* Collect the centre of gravity and mass from the progeny. */ - for ( k = 0 ; k < 8 ; k++ ) { - cp = c->progeny[k]; - c->com[0] += cp->com[0]*cp->mass; - c->com[1] += cp->com[1]*cp->mass; - c->com[2] += cp->com[2]*cp->mass; - c->mass += cp->mass; - } - c->com[0] /= c->mass; c->com[1] /= c->mass; c->com[2] /= c->mass; - - } - - /* Otherwise, collect the multipole from local data. */ - else { - - for ( k = 0 ; k < count ; k++ ) { - p = &parts[k]; - c->com[0] += p->x[0]*p->mass; - c->com[1] += p->x[1]*p->mass; - c->com[2] += p->x[2]*p->mass; - c->mass += p->mass; - } - c->com[0] /= c->mass; c->com[1] /= c->mass; c->com[2] /= c->mass; - - } - - } - - -/** - * @brief Compute the interactions between all particles in a cell - * and the center of mass of another cell. - * - * @param ci The #cell containing the particles. - * @param cj The #cell containing the center of mass. - */ - -void iact_pair_pc ( struct cell *ci , struct cell *cj ) { - - int j, k, count = ci->count; - double com[3], mcom, dx[3], r2, ir, w; - struct part *parts = ci->parts; - - /* Early abort? */ - if ( count == 0 || cj->count == 0 ) - return; - - /* message( "ci=[%.3e,%.3e,%.3e], cj=[%.3e,%.3e,%.3e], h=%.3e/%.3e.", - ci->loc[0], ci->loc[1], ci->loc[2], - cj->loc[0], cj->loc[1], cj->loc[2], - ci->h[0], cj->h[0] ); */ - - /* Sanity check. */ - if ( cj->mass == 0.0 ) - error( "com does not seem to have been set." ); - - /* Init the com's data. */ - for ( k = 0 ; k < 3 ; k++ ) - com[k] = cj->com[k]; - mcom = cj->mass; - - /* Loop over every following particle. */ - for ( j = 0 ; j < count ; j++ ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = com[k] - parts[j].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = mcom * const_G * ir * ir * ir; - for ( k = 0 ; k < 3 ; k++ ) - parts[j].a[k] += w * dx[k]; - - } /* loop over every other particle. */ - - } - - -/** - * @brief Compute the interactions between all particles in a cell. - * - * @param ci The #cell. - * @param cj The other #cell. - */ - -void iact_pair ( struct cell *ci , struct cell *cj ) { - - int i, j, k; - int count_i = ci->count, count_j = cj->count; - double xi[3], ai[3], mi, mj, dx[3], r2, ir, w; - struct part *parts_i = ci->parts, *parts_j = cj->parts; - - /* Early abort? */ - if ( count_i == 0 || count_j == 0 ) - return; - - /* Get the minimum distance between both cells. */ - for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) { - dx[k] = ci->loc[k] - cj->loc[k]; - if ( dx[k] < 0 ) - dx[k] = -dx[k] - ci->h[k]; - else if ( dx[k] > 0 ) - dx[k] -= cj->h[k]; - r2 += dx[k]*dx[k]; - } - - // Cells at different level? - if ( ci->depth > cj->depth ) { - - // If well separated, go ahead! - if ( r2 > dist_min*dist_min*ci->h[0]*cj->h[0] ) - iact_pair_pc( ci , cj ); - - // Otherwise, split cj as well. - else - for ( k = 0 ; k < 8 ; k++ ) - iact_pair( ci , cj->progeny[k] ); - - } - - /* Sufficiently well-separated? */ - else if ( r2 > dist_min*dist_min*ci->h[0]*cj->h[0] ) { - - /* Compute the center of mass interactions. */ - iact_pair_pc( ci , cj ); - - } - - /* Recurse? */ - else if ( ci->split && cj->split ) - for ( k = 0 ; k < 8 ; k++ ) { - iact_pair( cj->progeny[k] , ci ); - iact_pair( ci->progeny[k] , cj ); - } - - /* Otherwise, do direct interactions. */ - else if ( ci < cj ) { - - /* message( "ci=[%.3e,%.3e,%.3e], cj=[%.3e,%.3e,%.3e], h=%.3e/%.3e.", - ci->loc[0], ci->loc[1], ci->loc[2], - cj->loc[0], cj->loc[1], cj->loc[2], - ci->h[0], cj->h[0] ); */ - - /* Loop over all particles... */ - for ( i = 0 ; i < count_i ; i++ ) { - - /* Init the ith particle's data. */ - for ( k = 0 ; k < 3 ; k++ ) { - xi[k] = parts_i[i].x[k]; - ai[k] = 0.0; - } - mi = parts_i[i].mass; - - /* Loop over every following particle. */ - for ( j = 0 ; j < count_j ; j++ ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = xi[k] - parts_j[j].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = const_G * ir * ir * ir; - mj = parts_j[j].mass; - for ( k = 0 ; k < 3 ; k++ ) { - parts_j[j].a[k] += w * dx[k] * mi; - ai[k] -= w * dx[k] * mj; - } - - } /* loop over every other particle. */ - - /* Store the accumulated acceleration on the ith part. */ - for ( k = 0 ; k < 3 ; k++ ) - parts_i[i].a[k] += ai[k]; - - } /* loop over all particles. */ - - } /* otherwise, compute interactions directly. */ - - } - - -/** - * @brief Compute the interactions between all particles in a cell. - * - * @param c The #cell. - */ - -void iact_self ( struct cell *c ) { - - int i, j, k, count = c->count; - double xi[3], ai[3], mi, mj, dx[3], r2, ir, w; - struct part *parts = c->parts; - - /* Early abort? */ - if ( count == 0 ) - return; - - /* message( "cell=[%.3e,%.3e,%.3e], h=%.3e.", - c->loc[0], c->loc[1], c->loc[2], c->h[0] ); */ - - /* Recurse? */ - if ( c->split ) - for ( j = 0 ; j < 8 ; j++ ) { - iact_self( c->progeny[j] ); - for ( k = j+1 ; k < 8 ; k++ ) - iact_pair( c->progeny[j] , c->progeny[k] ); - } - - /* Otherwise, compute interactions directly. */ - else { - - /* Loop over all particles... */ - for ( i = 0 ; i < count ; i++ ) { - - /* Init the ith particle's data. */ - for ( k = 0 ; k < 3 ; k++ ) { - xi[k] = parts[i].x[k]; - ai[k] = 0.0; - } - mi = parts[i].mass; - - /* Loop over every following particle. */ - for ( j = i+1 ; j < count ; j++ ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = xi[k] - parts[j].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = const_G * ir * ir * ir; - mj = parts[j].mass; - for ( k = 0 ; k < 3 ; k++ ) { - parts[j].a[k] += w * dx[k] * mi; - ai[k] -= w * dx[k] * mj; - } - - } /* loop over every other particle. */ - - /* Store the accumulated acceleration on the ith part. */ - for ( k = 0 ; k < 3 ; k++ ) - parts[i].a[k] += ai[k]; - - } /* loop over all particles. */ - - } /* otherwise, compute interactions directly. */ - - } - - -/** - * @brief Create the tasks for the cell pair/self. - * - * @param s The #sched in which to create the tasks. - * @param ci The first #cell. - * @param cj The second #cell. - */ - -void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) { - - int j, k; - qsched_task_t tid; - struct cell *data[2]; - double dx, r2; - - /* If either cell is empty, stop. */ - if ( ci->count == 0 || ( cj != NULL && cj->count == 0 ) ) - return; - - /* Single cell? */ - if ( cj == NULL ) { - - /* Is this cell split? */ - if ( ci->split && ci->count > task_limit ) { - - /* Recurse over the progeny. */ - for ( j = 0 ; j < 8 ; j++ ) { - - /* Make self-task. */ - create_tasks( s , ci->progeny[j] , NULL ); - - /* Make all pair tasks. */ - for ( k = j+1 ; k < 8 ; k++ ) - create_tasks( s , ci->progeny[j] , ci->progeny[k] ); - } - - } - - /* Otherwise, add a self-interaction task. */ - else { - - /* Set the data. */ - data[0] = ci; data[1] = NULL; - - /* Create the task. */ - tid = qsched_addtask( s , task_type_self , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*ci->count/2 ); - - /* Add the resource. */ - qsched_addlock( s , tid , ci->res ); - - /* If this call might recurse, add a dependency on the cell's COM. */ - if ( ci->split ) - qsched_addunlock( s , ci->com_tid , tid ); - - } - - } - - /* Otherwise, it's a pair. */ - else { - - /* Get the minimum distance between both cells. */ - for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) { - dx = ci->loc[k] - cj->loc[k]; - if ( dx < 0 ) - dx = -dx - ci->h[k]; - else if ( dx > 0 ) - dx -= cj->h[k]; - r2 += dx*dx; - } - - // Cells at different level? - if ( cj->depth < ci->depth ) { - - // If well separated, go ahead! - if ( r2 > dist_min*dist_min*ci->h[0]*cj->h[0] ) { - data[0] = ci; data[1] = cj; - tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count ); - qsched_addlock( s , tid , ci->res ); - qsched_addunlock( s , cj->com_tid , tid ); - } - - // Otherwise, split cj as well. - else - for ( k = 0 ; k < 8 ; k++ ) - create_tasks( s , ci , cj->progeny[k] ); - - } - - /* Are the cells sufficiently well separated? */ - else if ( r2 > dist_min*dist_min*ci->h[0]*ci->h[0] ) { - - /* Interact ci's parts with cj as a cell. */ - data[0] = ci; data[1] = cj; - tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count ); - qsched_addlock( s , tid , ci->res ); - qsched_addunlock( s , cj->com_tid , tid ); - - } - - /* Does this task need to be broken-down further? */ - else if ( ci->split && cj->split && - ci->count > task_limit && cj->count > task_limit ) { - - /* Loop over all pairs between ci and cj's progeny. */ - for ( k = 0 ; k < 8 ; k++ ) { - create_tasks( s , ci->progeny[k] , cj ); - create_tasks( s , cj->progeny[k] , ci ); - } - - } - - /* Otherwise, generate a part-part task. */ - else if ( ci < cj ) { - - /* Set the data. */ - data[0] = ci; data[1] = cj; - - /* Create the task. */ - tid = qsched_addtask( s , task_type_pair , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*cj->count ); - - /* Add the resources. */ - qsched_addlock( s , tid , ci->res ); - qsched_addlock( s , tid , cj->res ); - - /* Depend on the COMs in case this task recurses. */ - if ( ci->split || cj->split ) { - qsched_addunlock( s , ci->com_tid , tid ); - qsched_addunlock( s , cj->com_tid , tid ); - } - - } - - } /* otherwise, it's a pair. */ - - } - - -/** - * @brief Set up and run a task-based Barnes-Hutt N-body solver. - * - * @param N The number of random particles to use. - * @param nr_threads Number of threads to use. - */ - -void test_bh ( int N , int nr_threads , int runs ) { - - int k; - struct cell *root; - struct part *parts; - struct qsched s; - ticks tic, toc_run, tot_setup = 0, tot_run = 0; - - /* Runner function. */ - void runner ( int type , void *data ) { - - ticks tic = getticks(); - - /* Decode the data. */ - struct cell **d = (struct cell **)data; - - /* Decode and execute the task. */ - switch ( type ) { - case task_type_self: - iact_self( d[0] ); - break; - case task_type_pair: - iact_pair( d[0] , d[1] ); - break; - case task_type_pair_pc: - iact_pair_pc( d[0] , d[1] ); - break; - case task_type_com: - comp_com( d[0] ); - break; - default: - error( "Unknown task type." ); - } - - atomic_add( &task_timers[type] , getticks() - tic ); - - } - - /* Initialize the per-task type timers. */ - for ( k = 0 ; k < task_type_count ; k++ ) - task_timers[k] = 0; - - /* Initialize the scheduler. */ - qsched_init( &s , nr_threads , qsched_flag_noreown ); - - /* Init and fill the particle array. */ - if ( ( parts = (struct part *)malloc( sizeof(struct part) * N ) ) == NULL ) - error( "Failed to allocate particle buffer." ); - // int hN = ceil( cbrt( N ) ); - // double h = 1.0 / hN; - for ( k = 0 ; k < N ; k++ ) { - parts[k].id = k; - parts[k].x[0] = ((double)rand())/RAND_MAX; // h*(k % hN); - parts[k].x[1] = ((double)rand())/RAND_MAX; // h*((k / hN) % hN); - parts[k].x[2] = ((double)rand())/RAND_MAX; // h*(k / hN / hN); - parts[k].mass = ((double)rand())/RAND_MAX; // 1.0; - parts[k].a[0] = 0.0; - parts[k].a[1] = 0.0; - parts[k].a[2] = 0.0; - } - - /* Init the cells. */ - root = cell_get(); - root->loc[0] = 0.0; root->loc[1] = 0.0; root->loc[2] = 0.0; - root->h[0] = 1.0; root->h[1] = 1.0; root->h[2] = 1.0; - root->count = N; - root->parts = parts; - cell_split( root , &s ); - - /* Create the tasks. */ - tic = getticks(); - create_tasks( &s , root , NULL ); - tot_setup += getticks() - tic; - - /* Dump the number of tasks. */ - message( "total nr of tasks: %i." , s.count ); - message( "total nr of deps: %i." , s.count_deps ); - message( "total nr of res: %i." , s.count_res ); - message( "total nr of locks: %i." , s.count_locks ); - message( "total nr of uses: %i." , s.count_uses ); - int counts[ task_type_count ]; - for ( k = 0 ; k < task_type_count ; k++ ) - counts[k] = 0; - for ( k = 0 ; k < s.count ; k++ ) - counts[ s.tasks[k].type ] += 1; - printf( "task counts: [ " ); - for ( k = 0 ; k < task_type_count ; k++ ) - printf( "%i " , counts[k] ); - printf( "].\n" ); - - /* Loop over the number of runs. */ - for ( k = 0 ; k < runs ; k++ ) { - - /* Execute the tasks. */ - tic = getticks(); - qsched_run( &s , nr_threads , runner ); - toc_run = getticks(); - message( "%ith run took %lli ticks..." , k , toc_run - tic ); - tot_run += toc_run - tic; - - } - - /* Dump the tasks. */ - /* for ( k = 0 ; k < s.count ; k++ ) - printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , s.tasks[k].tic , s.tasks[k].toc ); */ - - /* Dump the costs. */ - message( "costs: setup=%lli ticks, run=%lli ticks." , - tot_setup , tot_run/runs ); - - /* Dump the timers. */ - for ( k = 0 ; k < qsched_timer_count ; k++ ) - message( "timer %s is %lli ticks." , qsched_timer_names[k] , s.timers[k]/runs ); - - /* Dump the per-task type timers. */ - printf( "task timers: [ " ); - for ( k = 0 ; k < task_type_count ; k++ ) - printf( "%lli " , task_timers[k]/runs ); - printf( "] ticks.\n" ); - - /* Clean up. */ - qsched_free( &s ); - - } - - -/** - * @brief Main function. - */ - -int main ( int argc , char *argv[] ) { - - int c, nr_threads; - int N = 1000, runs = 1; - - /* Get the number of threads. */ - #pragma omp parallel shared(nr_threads) - { - if ( omp_get_thread_num() == 0 ) - nr_threads = omp_get_num_threads(); - } - - /* Parse the options */ - while ( ( c = getopt( argc , argv , "n:r:t:" ) ) != -1 ) - switch( c ) { - case 'n': - if ( sscanf( optarg , "%d" , &N ) != 1 ) - error( "Error parsing number of particles." ); - break; - case 'r': - if ( sscanf( optarg , "%d" , &runs ) != 1 ) - error( "Error parsing number of runs." ); - break; - case 't': - if ( sscanf( optarg , "%d" , &nr_threads ) != 1 ) - error( "Error parsing number of threads." ); - omp_set_num_threads( nr_threads ); - break; - case '?': - fprintf( stderr , "Usage: %s [-t nr_threads] [-n N] [-r runs]\n" , argv[0] ); - fprintf( stderr , "Solves the N-body problem using a Barnes-Hutt\n" - "tree code with N random particles in [0,1]^3 using\n" - "nr_threads threads.\n" ); - exit( EXIT_FAILURE ); - } - - /* Dump arguments. */ - message( "Computing the N-body problem over %i particles using %i threads (%i runs)." , - N , nr_threads , runs ); - - /* Run the test. */ - test_bh( N , nr_threads, runs ); - - return 0; - - } - - diff --git a/examples/test_bh_2.c b/examples/test_bh_2.c deleted file mode 100644 index b4c06898b9f00cde39a81c46e6b4df1a2aa1e05f..0000000000000000000000000000000000000000 --- a/examples/test_bh_2.c +++ /dev/null @@ -1,1252 +0,0 @@ -/******************************************************************************* - * This file is part of QuickSched. - * Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ - - -/* Config parameters. */ -#include "../config.h" - -/* Standard includes. */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <math.h> -#include <omp.h> - -/* Local includes. */ -#include "quicksched.h" - - -/* Some local constants. */ -#define cell_pool_grow 100 -#define cell_maxparts 20 -#define task_limit 5000 -#define const_G 6.6738e-11 -#define dist_min 0.5 // 0.5 - - -#define ICHECK -1 - -/** Data structure for the particles. */ -struct part { - double x[3]; - double a[3]; - double a_legacy[3]; - double a_exact[3]; - double mass; - int id; - }; - - -/** Data structure for the BH tree cell. */ -struct cell { - double loc[3]; - double h[3]; - int split, count; - struct part *parts; - struct cell *progeny[8]; - struct cell *parent; - double com[3]; - double mass; - double com_legacy[3]; - double mass_legacy; - int res, com_tid, depth; - }; - - -/** Task types. */ -enum task_type { - task_type_self = 0, - task_type_pair, - task_type_pair_pc, - task_type_com, - task_type_count - }; - -/** Per-type timers. */ -ticks task_timers[ task_type_count ]; - - -/** Global variable for the pool of allocated cells. */ -struct cell *cell_pool = NULL; - - -/** - * @brief Get a #cell from the pool. - */ - -struct cell *cell_get ( ) { - - struct cell *res; - int k; - - /* Allocate a new batch? */ - if ( cell_pool == NULL ) { - - /* Allocate the cell array. */ - if ( ( cell_pool = (struct cell *)malloc( sizeof(struct cell) * cell_pool_grow ) ) == NULL ) - error( "Failed to allocate fresh batch of cells." ); - - /* Clear the cells. */ - bzero( cell_pool , sizeof(struct cell) * cell_pool_grow ); - - /* Link them up via their progeny pointers. */ - for ( k = 1 ; k < cell_pool_grow ; k++ ) - cell_pool[k-1].progeny[0] = &cell_pool[k]; - - } - - /* Pick a cell off the pool. */ - res = cell_pool; - cell_pool = cell_pool->progeny[0]; - - /* Clean up a few things. */ - res->res = qsched_res_none; - - /* Return the cell. */ - return res; - - } - - -/** - * @brief Sort the parts into eight bins along the given pivots and - * fill the multipoles. Also adds the hierarchical resources - * to the sched. - * - * @param c The #cell to be split. - * @param N The total number of parts. - * @param s The #sched to store the resources. - */ - -void cell_split ( struct cell *c , struct qsched *s ) { - - int i, j, k, count = c->count; - struct part temp, *parts = c->parts; - struct cell *cp; - int left[8], right[8]; - double pivot[3]; - static struct cell *root = NULL; - - /* Set the root cell. */ - if ( root == NULL ) { - root = c; - c->depth = 0; - } - - /* Add a resource for this cell if it doesn't have one yet. */ - if ( c->res == qsched_res_none ) - c->res = qsched_addres( s , qsched_owner_none , qsched_res_none ); - - /* Attach a center-of-mass task to the cell. */ - c->com_tid = qsched_addtask( s , task_type_com , task_flag_none , &c , sizeof(struct cell *) , 1 ); - - /* Does this cell need to be split? */ - if ( count > cell_maxparts ) { - - /* Mark this cell as split. */ - c->split = 1; - - /* Create the progeny. */ - for ( k = 0 ; k < 8 ; k++ ) { - c->progeny[k] = cp = cell_get(); - cp->parent = c; - cp->depth = c->depth + 1; - cp->loc[0] = c->loc[0]; - cp->loc[1] = c->loc[1]; - cp->loc[2] = c->loc[2]; - cp->h[0] = c->h[0]/2; - cp->h[1] = c->h[1]/2; - cp->h[2] = c->h[2]/2; - cp->res = qsched_addres( s , qsched_owner_none , c->res ); - if ( k & 4 ) - cp->loc[0] += cp->h[0]; - if ( k & 2 ) - cp->loc[1] += cp->h[1]; - if ( k & 1 ) - cp->loc[2] += cp->h[2]; - } - - /* Init the pivots. */ - for ( k = 0 ; k < 3 ; k++ ) - pivot[k] = c->loc[k] + c->h[k]/2; - - /* Split along the x-axis. */ - i = 0; j = count - 1; - while ( i <= j ) { - while ( i <= count-1 && parts[i].x[0] < pivot[0] ) - i += 1; - while ( j >= 0 && parts[j].x[0] >= pivot[0] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[1] = i; right[1] = count - 1; - left[0] = 0; right[0] = j; - - /* Split along the y axis, twice. */ - for ( k = 1 ; k >= 0 ; k-- ) { - i = left[k]; j = right[k]; - while ( i <= j ) { - while ( i <= right[k] && parts[i].x[1] < pivot[1] ) - i += 1; - while ( j >= left[k] && parts[j].x[1] >= pivot[1] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[2*k+1] = i; right[2*k+1] = right[k]; - left[2*k] = left[k]; right[2*k] = j; - } - - /* Split along the z axis, four times. */ - for ( k = 3 ; k >= 0 ; k-- ) { - i = left[k]; j = right[k]; - while ( i <= j ) { - while ( i <= right[k] && parts[i].x[2] < pivot[2] ) - i += 1; - while ( j >= left[k] && parts[j].x[2] >= pivot[2] ) - j -= 1; - if ( i < j ) { - temp = parts[i]; parts[i] = parts[j]; parts[j] = temp; - } - } - left[2*k+1] = i; right[2*k+1] = right[k]; - left[2*k] = left[k]; right[2*k] = j; - } - - /* Store the counts and offsets. */ - for ( k = 0 ; k < 8 ; k++ ) { - c->progeny[k]->count = right[k] - left[k] + 1; - c->progeny[k]->parts = &c->parts[ left[k] ]; - } - - /* Recurse. */ - for ( k = 0 ; k < 8 ; k++ ) - cell_split( c->progeny[k] , s ); - - /* Link the COM tasks. */ - for ( k = 0 ; k < 8 ; k++ ) - qsched_addunlock( s , c->progeny[k]->com_tid , c->com_tid ); - - } /* does the cell need to be split? */ - - /* Set this cell's resources ownership. */ - qsched_res_own( s , c->res , s->nr_queues * ( c->parts - root->parts ) / root->count ); - - } - - -/** - * @brief Compute the center of mass of a given cell. - * - * @param c The #cell. - */ - -void comp_com ( struct cell *c ) { - - int k, count = c->count; - struct cell *cp; - struct part *p, *parts = c->parts; - - c->com[0] = c->com[1] = c->com[2] = c->mass = 0.; - - /* Is the cell split? */ - if ( c->split ) { - - /* Collect the centre of gravity and mass from the progeny. */ - for ( k = 0 ; k < 8 ; k++ ) { - cp = c->progeny[k]; - c->com[0] += cp->com[0]*cp->mass; - c->com[1] += cp->com[1]*cp->mass; - c->com[2] += cp->com[2]*cp->mass; - c->mass += cp->mass; - } - - if ( c-> mass > 0. ) - { - c->com[0] /= c->mass; - c->com[1] /= c->mass; - c->com[2] /= c->mass; - } - else - { - c->com[0] = 0.; - c->com[1] = 0.; - c->com[2] = 0.; - } - - } - - /* Otherwise, collect the multipole from local data. */ - else { - - for ( k = 0 ; k < count ; k++ ) { - p = &parts[k]; - c->com[0] += p->x[0]*p->mass; - c->com[1] += p->x[1]*p->mass; - c->com[2] += p->x[2]*p->mass; - c->mass += p->mass; - } - - if ( c-> mass > 0. ) - { - c->com[0] /= c->mass; - c->com[1] /= c->mass; - c->com[2] /= c->mass; - } - else - { - c->com[0] = 0.; - c->com[1] = 0.; - c->com[2] = 0.; - } - } - - } - - -/** - * @brief Compute the interactions between all particles in a cell - * and the center of mass of another cell. - * - * @param ci The #cell containing the particles. - * @param cj The #cell containing the center of mass. - */ - -void iact_pair_pc ( struct cell *ci , struct cell *cj ) { - - int j, k, count = ci->count; - double com[3], mcom, dx[3], r2, ir, w; - struct part *parts = ci->parts; - - /* Early abort? */ - if ( count == 0 || cj->count == 0 ) - return; - - /* message( "ci=[%.3e,%.3e,%.3e], cj=[%.3e,%.3e,%.3e], h=%.3e/%.3e.", - ci->loc[0], ci->loc[1], ci->loc[2], - cj->loc[0], cj->loc[1], cj->loc[2], - ci->h[0], cj->h[0] ); */ - - /* Sanity check. */ - if ( cj->mass == 0.0 ){ - printf("%e %e %e %d %p\n", cj->com[0], cj->com[1], cj->com[2], cj->count, cj); - - for ( j = 0 ; j < cj->count ; ++j ) - printf("part %d mass= %e\n", j, cj->parts[j].mass ); - - error( "com does not seem to have been set." ); - } - /* Init the com's data. */ - for ( k = 0 ; k < 3 ; k++ ) - com[k] = cj->com[k]; - mcom = cj->mass; - - /* Loop over every particle in ci. */ - for ( j = 0 ; j < count ; j++ ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = com[k] - parts[j].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = mcom * const_G * ir * ir * ir; - for ( k = 0 ; k < 3 ; k++ ) - parts[j].a[k] += w * dx[k]; - -#if ICHECK >= 0 - if ( parts[j].id == ICHECK ) - printf("[NEW] Can interact with the monopole. x= %f %f %f m= %f h= %f\n", com[0], com[1], com[2], mcom, cj->h[0]); -#endif - - } /* loop over every particle. */ - - } - - -/** - * @brief Compute the interactions between all particles in a cell. - * - * @param ci The #cell. - * @param cj The other #cell. - */ - -void iact_pair ( struct cell *ci , struct cell *cj ) { - - int i, j, k; - int count_i = ci->count, count_j = cj->count; - double dx[3], xi[3], ai[3], mi, mj, r2, w, ir; - struct part *parts_i = ci->parts, *parts_j = cj->parts; - - /* Early abort? */ - if ( count_i == 0 || count_j == 0 ) - return; - - /* Distance between the CoMs */ - for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) { - dx[k] = fabs( ci->com[k] - cj->com[k] ); - r2 += dx[k]*dx[k]; - } - - double s_max_i = ci->h[0]; - double s_max_j = cj->h[0]; - - if ( ( dist_min * dist_min * r2 > s_max_i * s_max_i ) && ( dist_min * dist_min * r2 > s_max_j * s_max_j ) ) - { - iact_pair_pc( ci, cj ); - iact_pair_pc( cj, ci ); - } - else if ( ci->split == 0 && cj->split == 0 ) - { - /* Do direct summation */ - - /* Loop over all particles... */ - for ( i = 0 ; i < count_i ; i++ ) { - - /* Init the ith particle's data. */ - for ( k = 0 ; k < 3 ; k++ ) { - xi[k] = parts_i[i].x[k]; - ai[k] = 0.0; - } - mi = parts_i[i].mass; - - /* Loop over every following particle. */ - for ( j = 0 ; j < count_j ; j++ ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = xi[k] - parts_j[j].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = const_G * ir * ir * ir; - mj = parts_j[j].mass; - for ( k = 0 ; k < 3 ; k++ ) { - parts_j[j].a[k] += w * dx[k] * mi; - ai[k] -= w * dx[k] * mj; - } - -#if ICHECK >= 0 - if ( parts_i[i].id == ICHECK ) - printf("[NEW] Interaction with particle id= %d (pair i)\n", parts_j[j].id); - - if ( parts_j[j].id == ICHECK ) - printf("[NEW] Interaction with particle id= %d (pair j) h_i= %f h_j= %f ci= %p cj= %p count_i= %d count_j= %d d_i= %d d_j= %d\n", parts_i[i].id, ci->h[0], cj->h[0], ci, cj, count_i, count_j, - ci->res, cj->res ) ; -#endif - } /* loop over every other particle. */ - - /* Store the accumulated acceleration on the ith part. */ - for ( k = 0 ; k < 3 ; k++ ) - parts_i[i].a[k] += ai[k]; - - } /* loop over all particles. */ - - - } - else { - - if ( ci == cj ) - printf("The impossible has happened\n"); //debug - - - /* We can split one of the two cells. Let's try the biggest one */ - if ( ci->h[0] > cj->h[0] ) { - - if ( ci->split ) - for ( k = 0 ; k < 8 ; k++ ) - iact_pair ( ci->progeny[k], cj ); - - else if ( cj->split ) - for ( k = 0 ; k < 8 ; k++ ) - iact_pair ( ci, cj->progeny[k] ); - - else - printf("oO\n"); - - } - else { - - if ( cj->split ) - for ( k = 0 ; k < 8 ; k++ ) - iact_pair ( ci, cj->progeny[k] ); - - else if ( ci->split ) - for ( k = 0 ; k < 8 ; k++ ) - iact_pair ( ci->progeny[k], cj ); - - else - printf("Oo\n"); - - } - } - } - - -/** - * @brief Compute the interactions between all particles in a cell. - * - * @param c The #cell. - */ - -void iact_self ( struct cell *c ) { - - int i, j, k, count = c->count; - double xi[3], ai[3], mi, mj, dx[3], r2, ir, w; - struct part *parts = c->parts; - - /* Early abort? */ - if ( count == 0 ) - return; - - // printf("entering cell at x= %f %f %f h= %f\n", c->loc[0], c->loc[1], c->loc[2], c->h[0] ); - - /* message( "cell=[%.3e,%.3e,%.3e], h=%.3e.", - c->loc[0], c->loc[1], c->loc[2], c->h[0] ); */ - - /* Recurse? */ - if ( c->split ) - { - for ( j = 0 ; j < 8 ; j++ ) { - iact_self( c->progeny[j] ); - for ( k = j+1 ; k < 8 ; k++ ) - iact_pair( c->progeny[j] , c->progeny[k] ); - } - } - /* Otherwise, compute interactions directly. */ - else { - - /* Loop over all particles... */ - for ( i = 0 ; i < count ; i++ ) { - - /* Init the ith particle's data. */ - for ( k = 0 ; k < 3 ; k++ ) { - xi[k] = parts[i].x[k]; - ai[k] = 0.0; - } - mi = parts[i].mass; - - /* Loop over every following particle. */ - for ( j = i+1 ; j < count ; j++ ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = xi[k] - parts[j].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = const_G * ir * ir * ir; - mj = parts[j].mass; - for ( k = 0 ; k < 3 ; k++ ) { - parts[j].a[k] += w * dx[k] * mi; - ai[k] -= w * dx[k] * mj; - } - -#if ICHECK >= 0 - if ( parts[i].id == ICHECK ) - printf("[NEW] Interaction with particle id= %d (self i)\n", parts[j].id); - - if ( parts[j].id == ICHECK ) - printf("[NEW] Interaction with particle id= %d (self j)\n", parts[i].id); -#endif - - } /* loop over every other particle. */ - - /* Store the accumulated acceleration on the ith part. */ - for ( k = 0 ; k < 3 ; k++ ) - parts[i].a[k] += ai[k]; - - } /* loop over all particles. */ - - } /* otherwise, compute interactions directly. */ - - } - - -/** - * @brief Create the tasks for the cell pair/self. - * - * @param s The #sched in which to create the tasks. - * @param ci The first #cell. - * @param cj The second #cell. - */ - -void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) { - - int j, k; - qsched_task_t tid; - struct cell *data[2]; - double dx, r2; - - /* If either cell is empty, stop. */ - if ( ci->count == 0 || ( cj != NULL && cj->count == 0 ) ) - return; - - /* Single cell? */ - if ( cj == NULL ) { - - /* Is this cell split? */ - if ( ci->split && ci->count > task_limit ) { - - /* Recurse over the progeny. */ - for ( j = 0 ; j < 8 ; j++ ) { - - /* Make self-task. */ - create_tasks( s , ci->progeny[j] , NULL ); - - /* Make all pair tasks. */ - for ( k = j+1 ; k < 8 ; k++ ) - create_tasks( s , ci->progeny[j] , ci->progeny[k] ); - } - - } - - /* Otherwise, add a self-interaction task. */ - else { - - /* Set the data. */ - data[0] = ci; data[1] = NULL; - - /* Create the task. */ - tid = qsched_addtask( s , task_type_self , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*ci->count/2 ); - - /* Add the resource. */ - qsched_addlock( s , tid , ci->res ); - - /* If this call might recurse, add a dependency on the cell's COM. */ - if ( ci->split ) - qsched_addunlock( s , ci->com_tid , tid ); - - } - - } - - /* Otherwise, it's a pair. */ - else { - - - // printf("Pair !\n"); - - /* Distance between the CoMs */ - for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) { - dx = fabs( ci->loc[k] - cj->loc[k] ); - r2 += dx*dx; - } - - const double s_max_i = ci->h[0]; - const double s_max_j = cj->h[0]; - - /* Check whether we can use the multipoles. */ - if ( ( dist_min * dist_min * r2 > s_max_i * s_max_i ) && ( dist_min * dist_min * r2 > s_max_j * s_max_j ) ) - { - data[0] = ci; data[1] = cj; - tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count ); - qsched_addlock( s , tid , ci->res ); - qsched_addunlock( s , cj->com_tid , tid ); - - data[0] = cj; data[1] = ci; - tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , cj->count ); - qsched_addlock( s , tid , cj->res ); - qsched_addunlock( s , ci->com_tid , tid ); - } - - - /* Otherwise, generate a part-part task. */ - else if ( ci->split == 0 && cj->split == 0 ) - { - - /* Set the data. */ - data[0] = ci; data[1] = cj; - - /* Create the task. */ - tid = qsched_addtask( s , task_type_pair , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*cj->count ); - - /* Add the resources. */ - qsched_addlock( s , tid , ci->res ); - qsched_addlock( s , tid , cj->res ); - - /* qsched_addunlock( s , ci->com_tid , tid ); */ - /* qsched_addunlock( s , cj->com_tid , tid ); */ - - - } - - - else if ( ci->count > task_limit && cj->count > task_limit ) - { - - /* if ( ci == cj ) */ - /* printf("The impossible has happened\n"); //debug */ - - /* We can split one of the two cells. Let's try the biggest one */ - if ( ci->h[0] > cj->h[0] ) { - - if ( ci->split ) - for ( k = 0 ; k < 8 ; k++ ) - create_tasks ( s , ci->progeny[k], cj ); - - else if ( cj->split ) - for ( k = 0 ; k < 8 ; k++ ) - create_tasks ( s , ci, cj->progeny[k] ); - - /* else */ - /* printf("oO\n"); */ - - } - else { - - if ( cj->split ) - for ( k = 0 ; k < 8 ; k++ ) - create_tasks ( s , ci, cj->progeny[k] ); - - else if ( ci->split ) - for ( k = 0 ; k < 8 ; k++ ) - create_tasks ( s , ci->progeny[k], cj ); - - /* else */ - /* printf("Oo\n"); */ - - } - } - - /* /\* Create a task if too few particles *\/ */ - else - { - /* Set the data. */ - data[0] = ci; data[1] = cj; - - /* Create the task. */ - tid = qsched_addtask( s , task_type_pair , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*cj->count ); - - /* Add the resources. */ - qsched_addlock( s , tid , ci->res ); - qsched_addlock( s , tid , cj->res ); - - /* Depend on the COMs in case this task recurses. */ - if ( ci->split || cj->split ) { - qsched_addunlock( s , ci->com_tid , tid ); - qsched_addunlock( s , cj->com_tid , tid ); - } - } - - - - } /* otherwise, it's a pair. */ - - } - - - - - - - - - - -/** - * @brief Compute the center of mass of a given cell recursively. - * - * @param c The #cell. - */ - -void legacy_comp_com ( struct cell *c , int* countCoMs ) { - - int k, count = c->count; - struct cell *cp; - struct part *p, *parts = c->parts; - - ++(*countCoMs); - - /* Is the cell split? */ - if ( c->split ) { - - - for ( k = 0 ; k < 8 ; k++ ) { - legacy_comp_com( c->progeny[k] , countCoMs ); - } - - /* Collect the centre of gravity and mass from the progeny. */ - for ( k = 0 ; k < 8 ; k++ ) { - cp = c->progeny[k]; - c->com_legacy[0] += cp->com_legacy[0]*cp->mass_legacy; - c->com_legacy[1] += cp->com_legacy[1]*cp->mass_legacy; - c->com_legacy[2] += cp->com_legacy[2]*cp->mass_legacy; - c->mass_legacy += cp->mass_legacy; - } - - if ( c-> mass_legacy != 0. ) - { - c->com_legacy[0] /= c->mass_legacy; - c->com_legacy[1] /= c->mass_legacy; - c->com_legacy[2] /= c->mass_legacy; - } - else - { - c->com_legacy[0] = 0.; - c->com_legacy[1] = 0.; - c->com_legacy[2] = 0.; - } - - } - - /* Otherwise, collect the multipole from local data. */ - else { - - for ( k = 0 ; k < count ; k++ ) { - p = &parts[k]; - c->com_legacy[0] += p->x[0]*p->mass; - c->com_legacy[1] += p->x[1]*p->mass; - c->com_legacy[2] += p->x[2]*p->mass; - c->mass_legacy += p->mass; - } - - if ( c-> mass_legacy != 0. ) - { - c->com_legacy[0] /= c->mass_legacy; - c->com_legacy[1] /= c->mass_legacy; - c->com_legacy[2] /= c->mass_legacy; - } - else - { - c->com_legacy[0] = 0.; - c->com_legacy[1] = 0.; - c->com_legacy[2] = 0.; - } - } - - } - - - - -/** - * @brief Interacts a particle with a cell recursively using the original B-H tree walk procedure - * - * @param parts The array of particles - * @param i The particle of interest - * @param cell The cell the particle interacts with - */ -void legacy_interact( struct part* parts, int i , struct cell* cell , int monitor, int* countMultipoles, int* countPairs) { - - int j, k; - double r2, ir, w, dx[3]; - - /* Early abort */ - if( cell->count == 0 ) - return; - - if( parts[i].id == monitor ) - printf( "Particle %d entering cell at x= %f %f %f h= %f\n", parts[i].id , cell->loc[0], cell->loc[1], cell->loc[2], cell->h[0] ); - - /* Are we in a leaf ? */ - if ( ! cell->split ) - { - if( parts[i].id == monitor ) - printf( "This is a leaf with %d particles.\n", cell->count); - - /* Interact the particle with the particles in the leaf */ - - for( j = 0 ; j < cell->count ; ++ j ) - { - if( cell->parts[j].id == parts[i].id ) - continue; - - if( parts[i].id == monitor ) - printf( "[BH_] Interaction with particle id= %d\n", cell->parts[j].id ); - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = cell->parts[j].x[k] - parts[i].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = cell->parts[j].mass * const_G * ir * ir * ir; - for ( k = 0 ; k < 3 ; k++ ) - parts[i].a_legacy[k] += w * dx[k]; - - (*countPairs)++; - } - } - else - { - /* We are in a node */ - for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) { - dx[k] = cell->loc[k] - parts[i].x[k]; - r2 += dx[k]*dx[k]; - } - - if( parts[i].id == monitor ) - printf( "This is a node with %d particles h= %f. r= %f theta= %f\n", cell->count, cell->h[0], sqrt( r2 ), dist_min ); - - - /* Is the cell far enough ? */ - if( dist_min*dist_min*r2 > cell->h[0]*cell->h[0] ) - { - /* Interact with the monopole */ - if( parts[i].id == monitor ) - printf( "[BH_] Can interact with the monopole. x= %f %f %f m= %f h= %f\n", cell->com_legacy[0] , cell->com_legacy[1] , cell->com_legacy[2] , cell->mass_legacy , cell->h[0]); - - for ( r2 = 0.0, k = 0 ; k < 3 ; k++ ) { - dx[k] = cell->com_legacy[k] - parts[i].x[k]; - r2 += dx[k]*dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = cell->mass_legacy * const_G * ir * ir * ir; - for ( k = 0 ; k < 3 ; k++ ) - parts[i].a_legacy[k] += w * dx[k]; - - (*countMultipoles)++; - - } - else - { - if( parts[i].id == monitor ) - printf( "Recursing...\n" ); - - /* OK, we need to recurse */ - for( k = 0 ; k < 8 ; k++ ) - legacy_interact( parts, i, cell->progeny[k] , monitor , countMultipoles , countPairs ); - } - } -} - - - -/** - * @brief Does a tree walk as in the B-H original work for all particles - * - * @param N The number of particles - * @param parts The array of particles - * @param root The root cell of the tree - * @param monitor ID of the particle to monitor and output interactions to stdout - */ -void legacy_tree_walk( int N , struct part* parts , struct cell* root , int monitor , int* countMultipoles, int* countPairs , int* countCoMs ) { - - int i; - - legacy_comp_com( root , countCoMs ); - - for( i = 0 ; i < N ; ++i ) - { - if ( parts[i].id == monitor ) - printf( "tree walk for particle %d x= %f %f %f \n", parts[i].id , parts[i].x[0] , parts[i].x[1], parts[i].x[2] ); - - legacy_interact( parts , i , root , monitor , countMultipoles , countPairs ); - - if ( parts[i].id == monitor ) - printf( "\n[LEGACY] acceleration for particle %d a= %f %f %f \n", parts[i].id , parts[i].a_legacy[0] , parts[i].a_legacy[1], parts[i].a_legacy[2] ); - - } - - -} - - - - - - - - - -/** - * @brief Solve the particle interactions using the stupid N^2 algorithm - * - * @param N The number of particles - * @param parts The array of particles - */ -void interact_exact( int N , struct part* parts , int monitor) { - - int i, j, k; - double ir, w, r2, dx[3] = {0., 0., 0.}; - - for ( i = 0 ; i < N ; ++i ) { - - for ( j = i+1 ; j < N ; ++j ) { - - /* Compute the pairwise distance. */ - for ( r2 = 0.0 , k = 0 ; k < 3 ; k++ ) { - dx[k] = parts[i].x[k] - parts[j].x[k]; - r2 += dx[k]*dx[k]; - } - - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt( r2 ); - w = const_G * ir * ir * ir; - - for ( k = 0 ; k < 3 ; k++ ) { - parts[j].a_exact[k] += w * dx[k] * parts[i].mass; - parts[i].a_exact[k] -= w * dx[k] * parts[j].mass; - } - - - } - } - - for( i = 0 ; i < N ; ++i ) - if( parts[i].id == monitor ) - printf( "[EXACT ] acceleration for particle %d a= %f %f %f \n\n", parts[i].id , parts[i].a_exact[0] , parts[i].a_exact[1], parts[i].a_exact[2] ); - -} - - - - -/** - * @brief Set up and run a task-based Barnes-Hutt N-body solver. - * - * @param N The number of random particles to use. - * @param nr_threads Number of threads to use. - */ - -void test_bh ( int N , int nr_threads , int runs ) { - - int i , k; - struct cell *root; - struct part *parts; - struct qsched s; - ticks tic, toc_run, tot_setup = 0, tot_run = 0, tic_legacy; - int countMultipoles, countPairs, countCoMs; - - - /* Runner function. */ - void runner ( int type , void *data ) { - - ticks tic = getticks(); - - /* Decode the data. */ - struct cell **d = (struct cell **)data; - - /* Decode and execute the task. */ - switch ( type ) { - case task_type_self: - iact_self( d[0] ); - break; - case task_type_pair: - iact_pair( d[0] , d[1] ); - break; - case task_type_pair_pc: - iact_pair_pc( d[0] , d[1] ); - break; - case task_type_com: - comp_com( d[0] ); - break; - default: - error( "Unknown task type." ); - } - - atomic_add( &task_timers[type] , getticks() - tic ); - - } - - /* Initialize the per-task type timers. */ - for ( k = 0 ; k < task_type_count ; k++ ) - task_timers[k] = 0; - - /* Initialize the scheduler. */ - qsched_init( &s , nr_threads , qsched_flag_noreown ); - - /* Init and fill the particle array. */ - if ( ( parts = (struct part *)malloc( sizeof(struct part) * N ) ) == NULL ) - error( "Failed to allocate particle buffer." ); - - // int hN = ceil( cbrt( N ) ); - // double h = 1.0 / hN; - for ( k = 0 ; k < N ; k++ ) { - parts[k].id = k; - parts[k].x[0] = ((double)rand())/RAND_MAX; // h*(k % hN); - parts[k].x[1] = ((double)rand())/RAND_MAX; // h*((k / hN) % hN); - parts[k].x[2] = ((double)rand())/RAND_MAX; // h*(k / hN / hN); - parts[k].mass = ((double)rand())/RAND_MAX; // 1.0; - parts[k].a[0] = 0.0; - parts[k].a[1] = 0.0; - parts[k].a[2] = 0.0; - } - - /* Init the cells. */ - root = cell_get(); - root->loc[0] = 0.0; root->loc[1] = 0.0; root->loc[2] = 0.0; - root->h[0] = 1.0; root->h[1] = 1.0; root->h[2] = 1.0; - root->count = N; - root->parts = parts; - cell_split( root , &s ); - - printf("----------------------------------------------------------\n"); - - /* Do a traditional (serial) B-H tree walk */ - tic_legacy = getticks(); - countMultipoles = 0; - countPairs = 0; - countCoMs = 0; - legacy_tree_walk( N , parts , root , ICHECK , &countMultipoles , &countPairs , &countCoMs ); - printf( "Legacy B-H walk (1 thread) took %lli (= %e) ticks\n", getticks() - tic_legacy , (float)(getticks() - tic_legacy) ); - - - - /* Do a N^2 interactions calculation */ - interact_exact( N , parts , ICHECK ); - - printf("----------------------------------------------------------\n"); - - /* Create the tasks. */ - tic = getticks(); - create_tasks( &s , root , NULL ); - tot_setup += getticks() - tic; - - /* Dump the number of tasks. */ - message( "total nr of tasks: %i." , s.count ); - message( "total nr of deps: %i." , s.count_deps ); - message( "total nr of res: %i." , s.count_res ); - message( "total nr of locks: %i." , s.count_locks ); - message( "total nr of uses: %i." , s.count_uses ); - int counts[ task_type_count ]; - for ( k = 0 ; k < task_type_count ; k++ ) - counts[k] = 0; - for ( k = 0 ; k < s.count ; k++ ) - counts[ s.tasks[k].type ] += 1; - printf( "task counts: [ %8s %8s %8s %8s ]\n" , "self", "direct" , "m-poles" , "CoMs" ); - printf( "task counts: [ " ); - for ( k = 0 ; k < task_type_count ; k++ ) - printf( "%8i " , counts[k] ); - printf( "].\n" ); - - printf( "task counts: [ %8i %8i %8i %8i ].\n" , 0 , countPairs , countMultipoles , countCoMs ); - - /* Loop over the number of runs. */ - for ( k = 0 ; k < runs ; k++ ) { - - for ( i = 0 ; i < N ; ++i ) { - parts[i].a[0] = 0.; - parts[i].a[1] = 0.; - parts[i].a[2] = 0.; - } - - /* Execute the tasks. */ - tic = getticks(); - qsched_run( &s , nr_threads , runner ); - toc_run = getticks(); - message( "%ith run took %lli (= %e) ticks..." , k , toc_run - tic , (float)(toc_run - tic) ); - tot_run += toc_run - tic; - - } - - /* Dump the tasks. */ - /* for ( k = 0 ; k < s.count ; k++ ) - printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , s.tasks[k].tic , s.tasks[k].toc ); */ - - /* Dump the costs. */ - message( "costs: setup=%lli ticks, run=%lli ticks." , - tot_setup , tot_run/runs ); - - /* Dump the timers. */ - for ( k = 0 ; k < qsched_timer_count ; k++ ) - message( "timer %s is %lli ticks." , qsched_timer_names[k] , s.timers[k]/runs ); - - /* Dump the per-task type timers. */ - printf( "task timers: [ " ); - for ( k = 0 ; k < task_type_count ; k++ ) - printf( "%lli " , task_timers[k]/runs ); - printf( "] ticks.\n" ); - - - /* Dump the particles to a file */ - FILE* file = fopen( "particle_dump.dat" , "w" ); - fprintf(file, "# a_exact.x a_exact.y a_exact.z a_legacy.x a_legacy.y a_legacy.z a_new.x a_new.y a_new.z\n"); - for ( k = 0 ; k < N ; ++k ) - fprintf ( file , "%d %e %e %e %e %e %e %e %e %e\n" , - parts[k].id, - parts[k].a_exact[0] , parts[k].a_exact[1] , parts[k].a_exact[2] , - parts[k].a_legacy[0] , parts[k].a_legacy[1] , parts[k].a_legacy[2] , - parts[k].a[0] , parts[k].a[1] , parts[k].a[2] ); - fclose( file ); - - /* Clean up. */ - qsched_free( &s ); - - } - - -/** - * @brief Main function. - */ - -int main ( int argc , char *argv[] ) { - - int c, nr_threads; - int N = 1000, runs = 1; - - /* Get the number of threads. */ - #pragma omp parallel shared(nr_threads) - { - if ( omp_get_thread_num() == 0 ) - nr_threads = omp_get_num_threads(); - } - - /* Parse the options */ - while ( ( c = getopt( argc , argv , "n:r:t:" ) ) != -1 ) - switch( c ) { - case 'n': - if ( sscanf( optarg , "%d" , &N ) != 1 ) - error( "Error parsing number of particles." ); - break; - case 'r': - if ( sscanf( optarg , "%d" , &runs ) != 1 ) - error( "Error parsing number of runs." ); - break; - case 't': - if ( sscanf( optarg , "%d" , &nr_threads ) != 1 ) - error( "Error parsing number of threads." ); - omp_set_num_threads( nr_threads ); - break; - case '?': - fprintf( stderr , "Usage: %s [-t nr_threads] [-n N] [-r runs]\n" , argv[0] ); - fprintf( stderr , "Solves the N-body problem using a Barnes-Hutt\n" - "tree code with N random particles in [0,1]^3 using\n" - "nr_threads threads.\n" ); - exit( EXIT_FAILURE ); - } - - /* Dump arguments. */ - message( "Computing the N-body problem over %i particles using %i threads (%i runs)." , - N , nr_threads , runs ); - - /* Run the test. */ - test_bh( N , nr_threads, runs ); - - return 0; - - } - - diff --git a/examples/test_bh_3.c b/examples/test_bh_3.c deleted file mode 100644 index 9f17a852c12c112d9600757a8390610424cf817d..0000000000000000000000000000000000000000 --- a/examples/test_bh_3.c +++ /dev/null @@ -1,1285 +0,0 @@ -/******************************************************************************* - * This file is part of QuickSched. - * Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk), - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * -* *****************************************************************************/ - -/* Config parameters. */ -#include "../config.h" - -/* Standard includes. */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <math.h> -#include <omp.h> - -/* Local includes. */ -#include "quicksched.h" - -/* Some local constants. */ -#define cell_pool_grow 1000 -#define cell_maxparts 16 -#define task_limit 5000 -#define const_G 6.6738e-8 -#define dist_min 0.5 // 0.5 - -#define ICHECK -1 - -/** Data structure for the particles. */ -struct part { - double x[3]; - double a[3]; - double a_legacy[3]; - double a_exact[3]; - double mass; - int id; -}; - -/** Data structure for the BH tree cell. */ -struct cell { - double loc[3]; - double h; - - int split, count; - struct part *parts; - - struct cell *firstchild; /* Next node if opening */ - struct cell *sibling; /* Next node */ - - /* We keep both CoMs and masses to make sure the comp_com calculation is - * correct (use an anonymous union to keep variable names compact). */ - union { - - /* Information for the legacy walk */ - struct { - double com[3]; - double mass; - } legacy; - - /* Information for the QuickShed walk */ - struct { - double com[3]; - double mass; - } new; - }; - - int res, com_tid; - -} __attribute__((aligned(128))); - -/** Task types. */ -enum task_type { - task_type_self = 0, - task_type_pair, - task_type_pair_pc, - task_type_com, - task_type_count -}; - -/** Per-type timers. */ -ticks task_timers[task_type_count]; - -/** Global variable for the pool of allocated cells. */ -struct cell *cell_pool = NULL; - -/** - * @brief Get a #cell from the pool. - */ -struct cell *cell_get() { - - struct cell *res; - int k; - - /* Allocate a new batch? */ - if (cell_pool == NULL) { - - /* Allocate the cell array. */ - if ((cell_pool = - (struct cell *)calloc(cell_pool_grow, sizeof(struct cell))) == - NULL) - error("Failed to allocate fresh batch of cells."); - - /* Link them up via their progeny pointers. */ - for (k = 1; k < cell_pool_grow; k++) - cell_pool[k - 1].firstchild = &cell_pool[k]; - } - - /* Pick a cell off the pool. */ - res = cell_pool; - cell_pool = cell_pool->firstchild; - - /* Clean up a few things. */ - res->res = qsched_res_none; - res->firstchild = 0; - - /* Return the cell. */ - return res; -} - -/** - * @brief Sort the parts into eight bins along the given pivots and - * fill the multipoles. Also adds the hierarchical resources - * to the sched. - * - * @param c The #cell to be split. - * @param N The total number of parts. - * @param s The #sched to store the resources. - */ -void cell_split(struct cell *c, struct qsched *s) { - - int i, j, k, kk, count = c->count; - struct part temp, *parts = c->parts; - struct cell *cp; - int left[8], right[8]; - double pivot[3]; - static struct cell *root = NULL; - struct cell *progenitors[8]; - - /* Set the root cell. */ - if (root == NULL) { - root = c; - // c->depth = 0; - // c->parent = 0; - c->sibling = 0; - } - - /* Add a resource for this cell if it doesn't have one yet. */ - if (c->res == qsched_res_none) - c->res = qsched_addres(s, qsched_owner_none, qsched_res_none); - - /* Attach a center-of-mass task to the cell. */ - if (count > 0) - c->com_tid = qsched_addtask(s, task_type_com, task_flag_none, &c, - sizeof(struct cell *), 1); - - /* Does this cell need to be split? */ - if (count > cell_maxparts) { - - /* Mark this cell as split. */ - c->split = 1; - - /* Create the progeny. */ - for (k = 0; k < 8; k++) { - progenitors[k] = cp = cell_get(); - // cp->parent = c; - // cp->depth = c->depth + 1; - cp->loc[0] = c->loc[0]; - cp->loc[1] = c->loc[1]; - cp->loc[2] = c->loc[2]; - cp->h = c->h / 2; - cp->h = c->h / 2; - cp->h = c->h / 2; - cp->res = qsched_addres(s, qsched_owner_none, c->res); - if (k & 4) cp->loc[0] += cp->h; - if (k & 2) cp->loc[1] += cp->h; - if (k & 1) cp->loc[2] += cp->h; - } - - /* Init the pivots. */ - for (k = 0; k < 3; k++) pivot[k] = c->loc[k] + c->h / 2; - - /* Split along the x-axis. */ - i = 0; - j = count - 1; - while (i <= j) { - while (i <= count - 1 && parts[i].x[0] < pivot[0]) i += 1; - while (j >= 0 && parts[j].x[0] >= pivot[0]) j -= 1; - if (i < j) { - temp = parts[i]; - parts[i] = parts[j]; - parts[j] = temp; - } - } - left[1] = i; - right[1] = count - 1; - left[0] = 0; - right[0] = j; - - /* Split along the y axis, twice. */ - for (k = 1; k >= 0; k--) { - i = left[k]; - j = right[k]; - while (i <= j) { - while (i <= right[k] && parts[i].x[1] < pivot[1]) i += 1; - while (j >= left[k] && parts[j].x[1] >= pivot[1]) j -= 1; - if (i < j) { - temp = parts[i]; - parts[i] = parts[j]; - parts[j] = temp; - } - } - left[2 * k + 1] = i; - right[2 * k + 1] = right[k]; - left[2 * k] = left[k]; - right[2 * k] = j; - } - - /* Split along the z axis, four times. */ - for (k = 3; k >= 0; k--) { - i = left[k]; - j = right[k]; - while (i <= j) { - while (i <= right[k] && parts[i].x[2] < pivot[2]) i += 1; - while (j >= left[k] && parts[j].x[2] >= pivot[2]) j -= 1; - if (i < j) { - temp = parts[i]; - parts[i] = parts[j]; - parts[j] = temp; - } - } - left[2 * k + 1] = i; - right[2 * k + 1] = right[k]; - left[2 * k] = left[k]; - right[2 * k] = j; - } - - /* Store the counts and offsets. */ - for (k = 0; k < 8; k++) { - progenitors[k]->count = right[k] - left[k] + 1; - progenitors[k]->parts = &c->parts[left[k]]; - } - - /* Prepare the pointers. */ - for (k = 0; k < 7; k++) { - - /* Find the next non-empty sibling */ - for (kk = k + 1; kk < 8; ++kk) { - if (progenitors[kk]->count > 0) { - progenitors[k]->sibling = progenitors[kk]; - break; - } - } - - /* No non-empty sibling ? Go back a level */ - if (kk == 8) progenitors[k]->sibling = c->sibling; - } - - /* Last progenitor links to the next sibling */ - progenitors[7]->sibling = c->sibling; - c->firstchild = progenitors[0]; - - /* Recurse. */ - for (k = 0; k < 8; k++) cell_split(progenitors[k], s); - - /* Link the COM tasks. */ - for (k = 0; k < 8; k++) - if (progenitors[k]->count > 0) - qsched_addunlock(s, progenitors[k]->com_tid, c->com_tid); - - } /* does the cell need to be split? */ - - /* Set this cell's resources ownership. */ - qsched_res_own(s, c->res, - s->nr_queues * (c->parts - root->parts) / root->count); -} - -/* -------------------------------------------------------------------------- */ -/* New tree walk */ -/* -------------------------------------------------------------------------- */ - -/** - * @brief Compute the center of mass of a given cell. - * - * @param c The #cell. - */ -void comp_com(struct cell *c) { - - int k, count = c->count; - struct cell *cp; - struct part *p, *parts = c->parts; - double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; - - if (c->split) { - - /* Loop over the projenitors and collect the multipole information. */ - for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - double cp_mass = cp->new.mass; - com[0] += cp->new.com[0] * cp_mass; - com[1] += cp->new.com[1] * cp_mass; - com[2] += cp->new.com[2] * cp_mass; - mass += cp_mass; - } - - /* Otherwise, collect the multipole from the particles. */ - } else { - - for (k = 0; k < count; k++) { - p = &parts[k]; - double p_mass = p->mass; - com[0] += p->x[0] * p_mass; - com[1] += p->x[1] * p_mass; - com[2] += p->x[2] * p_mass; - mass += p_mass; - } - } - - /* Store the COM data, if any was collected. */ - if (mass > 0.0) { - double imass = 1.0 / mass; - c->new.com[0] = com[0] * imass; - c->new.com[1] = com[1] * imass; - c->new.com[2] = com[2] * imass; - c->new.mass = mass; - } else { - c->new.com[0] = 0.0; - c->new.com[1] = 0.0; - c->new.com[2] = 0.0; - c->new.mass = 0.0; - } -} - -/** - * @brief Compute the interactions between all particles in a cell - * and the center of mass of another cell. - * - * @param ci The #cell containing the particles. - * @param cj The #cell containing the center of mass. - */ -void iact_pair_pc(struct cell *ci, struct cell *cj) { - int j, k, count = ci->count; - double com[3], mcom, dx[3], r2, ir, w; - struct part *parts = ci->parts; - - /* Early abort? */ - if (count == 0 || cj->count == 0) return; - - /* message( "ci=[%.3e,%.3e,%.3e], cj=[%.3e,%.3e,%.3e], h=%.3e/%.3e.", - ci->loc[0], ci->loc[1], ci->loc[2], - cj->loc[0], cj->loc[1], cj->loc[2], - ci->h, cj->h ); */ - - /* Sanity check. */ - if (cj->new.mass == 0.0) { - message("%e %e %e %d %p %d %p", cj->new.com[0], cj->new.com[1], - cj->new.com[2], cj->count, cj, cj->split, cj->sibling); - - for (j = 0; j < cj->count; ++j) - message("part %d mass=%e id=%d", j, cj->parts[j].mass, cj->parts[j].id); - - error("com does not seem to have been set."); - } - - /* Corectness check */ - if (cj->new.mass != cj->legacy.mass) - error("Calculation of the CoM is wrong! m_new=%e m_legacy=%e", cj->new.mass, - cj->legacy.mass); - - /* Init the com's data. */ - for (k = 0; k < 3; k++) com[k] = cj->new.com[k]; - mcom = cj->new.mass; - - /* Loop over every particle in ci. */ - for (j = 0; j < count; j++) { - - /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = com[k] - parts[j].x[k]; - r2 += dx[k] * dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt(r2); - w = mcom * const_G * ir * ir * ir; - for (k = 0; k < 3; k++) parts[j].a[k] += w * dx[k]; - -#if ICHECK >= 0 - if (parts[j].id == ICHECK) - printf("[NEW] Can interact with the monopole. x= %f %f %f m= %f h= %f\n", - com[0], com[1], com[2], mcom, cj->h); -#endif - - } /* loop over every particle. */ -} - -/** - * @brief Compute the interactions between all particles in a cell. - * - * @param ci The #cell. - * @param cj The other #cell. - */ -void iact_pair(struct cell *ci, struct cell *cj) { - - int i, j, k; - int count_i = ci->count, count_j = cj->count; - double dx[3], xi[3], ai[3], mi, mj, r2, r2_i, r2_j, w, ir; - double cih = ci->h, cjh = cj->h; - struct part *parts_i = ci->parts, *parts_j = cj->parts; - struct cell *cp; - - /* Early abort? */ - if (count_i == 0 || count_j == 0) return; - - /* Sanity check */ - if (ci == cj) - error("The impossible has happened: pair interaction between a cell and " - "itself."); // debug - - /* Distance between the CoMs */ - r2 = 0.0; - r2_i = 0.0; - r2_j = 0.0; - for (k = 0; k < 3; k++) { - // dx[k] = fabs( ci->new.com[k] - cj->new.com[k] ); - dx[k] = fabs(ci->loc[k] - cj->loc[k]); - - r2 += dx[k] * dx[k]; - r2_i += (dx[k] - 0.5 * cih) * (dx[k] - 0.5 * cih); - r2_j += (dx[k] - 0.5 * cjh) * (dx[k] - 0.5 * cjh); - } - - /* If ci and cj are sufficiently well separated, split this interaction - into a pair of particle-cell interactions. */ - if ((dist_min * dist_min * r2_j > ci->h * ci->h) && - (dist_min * dist_min * r2_i > cj->h * cj->h)) { - iact_pair_pc(ci, cj); - iact_pair_pc(cj, ci); - - /* Otherwise, if neither cell is split, compute the particle-particle - interactions directly. */ - } else if (!ci->split && !cj->split) { - - /* Loop over all particles in ci... */ - for (i = 0; i < count_i; i++) { - - /* Init the ith particle's data. */ - for (k = 0; k < 3; k++) { - xi[k] = parts_i[i].x[k]; - ai[k] = 0.0; - } - mi = parts_i[i].mass; - - /* Loop over every following particle. */ - for (j = 0; j < count_j; j++) { - - /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = xi[k] - parts_j[j].x[k]; - r2 += dx[k] * dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt(r2); - w = const_G * ir * ir * ir; - mj = parts_j[j].mass; - for (k = 0; k < 3; k++) { - double wdx = w * dx[k]; - parts_j[j].a[k] += wdx * mi; - ai[k] -= wdx * mj; - } - -#if ICHECK >= 0 - if (parts_i[i].id == ICHECK) - printf("[NEW] Interaction with particle id= %d (pair i)\n", - parts_j[j].id); - - if (parts_j[j].id == ICHECK) - printf("[NEW] Interaction with particle id= %d (pair j) h_i= %f h_j= " - "%f ci= %p cj= %p count_i= %d count_j= %d d_i= %d d_j= %d\n", - parts_i[i].id, ci->h, cj->h, ci, cj, count_i, count_j, ci->res, - cj->res); -#endif - - } /* loop over every other particle. */ - - /* Store the accumulated acceleration on the ith part. */ - for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k]; - - } /* loop over all particles. */ - - /* Otherwise, compute the interaction recursively over the progeny. */ - } else { - - /* We can split one of the two cells. Let's try the biggest one first.*/ - if (ci->h > cj->h) { - if (ci->split) { - for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) - iact_pair(cp, cj); - } else if (cj->split) { - for (cp = cj->firstchild; cp != cj->sibling; cp = cp->sibling) - iact_pair(ci, cp); - } - - } else { - if (cj->split) { - for (cp = cj->firstchild; cp != cj->sibling; cp = cp->sibling) - iact_pair(ci, cp); - } else if (ci->split) { - for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) - iact_pair(cp, cj); - } - } - } -} - -/** - * @brief Compute the interactions between all particles in a cell. - * - * @param c The #cell. - */ -void iact_self(struct cell *c) { - int i, j, k, count = c->count; - double xi[3], ai[3], mi, mj, dx[3], r2, ir, w; - struct part *parts = c->parts; - struct cell *cp, *cps; - - /* Early abort? */ - if (count == 0) return; - - /* message( "cell=[%.3e,%.3e,%.3e], h=%.3e.", - c->loc[0], c->loc[1], c->loc[2], c->h ); */ - - /* If the cell is split, interact each progeny with itself, and with - each of its siblings. */ - if (c->split) { - for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - iact_self(cp); - for (cps = cp->sibling; cps != c->sibling; cps = cps->sibling) - iact_pair(cp, cps); - } - - /* Otherwise, compute the interactions directly. */ - } else { - - /* Loop over all particles... */ - for (i = 0; i < count; i++) { - - /* Init the ith particle's data. */ - for (k = 0; k < 3; k++) { - xi[k] = parts[i].x[k]; - ai[k] = 0.0; - } - mi = parts[i].mass; - - /* Loop over every following particle. */ - for (j = i + 1; j < count; j++) { - - /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = xi[k] - parts[j].x[k]; - r2 += dx[k] * dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt(r2); - w = const_G * ir * ir * ir; - mj = parts[j].mass; - for (k = 0; k < 3; k++) { - double wdx = w * dx[k]; - parts[j].a[k] += wdx * mi; - ai[k] -= wdx * mj; - } - -#if ICHECK >= 0 - if (parts[i].id == ICHECK) - message("[NEW] Interaction with particle id= %d (self i)", - parts[j].id); - - if (parts[j].id == ICHECK) - message("[NEW] Interaction with particle id= %d (self j)", - parts[i].id); -#endif - - } /* loop over every other particle. */ - - /* Store the accumulated acceleration on the ith part. */ - for (k = 0; k < 3; k++) parts[i].a[k] += ai[k]; - - } /* loop over all particles. */ - - } /* otherwise, compute interactions directly. */ -} - -/** - * @brief Create the tasks for the cell pair/self. - * - * @param s The #sched in which to create the tasks. - * @param ci The first #cell. - * @param cj The second #cell. - */ -void create_tasks(struct qsched *s, struct cell *ci, struct cell *cj) { - - int k; - qsched_task_t tid; - struct cell *data[2], *cp, *cps; - - /* If either cell is empty, stop. */ - if (ci->count == 0 || (cj != NULL && cj->count == 0)) return; - - /* Single cell? */ - if (cj == NULL) { - - /* Is this cell split? */ - if (ci->split && ci->count > task_limit) { - - /* Loop over each of this cell's progeny. */ - for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) { - - /* Make self-interaction task. */ - create_tasks(s, cp, NULL); - - /* Make all pair-interaction tasks. */ - for (cps = cp->sibling; cps != ci->sibling; cps = cps->sibling) - create_tasks(s, cp, cps); - } - - /* Otherwise, add a self-interaction task. */ - } else { - - /* Set the data. */ - data[0] = ci; - data[1] = NULL; - - /* Create the task. */ - tid = - qsched_addtask(s, task_type_self, task_flag_none, data, - sizeof(struct cell *) * 2, ci->count * ci->count / 2); - - /* Add the resource (i.e. the cell) to the new task. */ - qsched_addlock(s, tid, ci->res); - - /* If this call might recurse, add a dependency on the cell's COM - task. */ - if (ci->split) qsched_addunlock(s, ci->com_tid, tid); - } - - /* Otherwise, it's a pair. */ - } else { - - double dx, r2 = 0.0, r2_i = 0.0, r2_j = 0.0; - - /* Distance between the cells */ - for (k = 0; k < 3; k++) { - dx = fabs(ci->loc[k] - cj->loc[k]); - - r2 += dx * dx; - r2_i += (dx - 0.5 * ci->h) * (dx - 0.5 * ci->h); - r2_j += (dx - 0.5 * cj->h) * (dx - 0.5 * cj->h); - } - - /* Check whether we can use the multipoles. */ - if ((dist_min * dist_min * r2_j > ci->h * ci->h) && - (dist_min * dist_min * r2_i > cj->h * cj->h)) { - data[0] = ci; - data[1] = cj; - tid = qsched_addtask(s, task_type_pair_pc, task_flag_none, data, - sizeof(struct cell *) * 2, ci->count); - qsched_addlock(s, tid, ci->res); - qsched_addunlock(s, cj->com_tid, tid); - - data[0] = cj; - data[1] = ci; - tid = qsched_addtask(s, task_type_pair_pc, task_flag_none, data, - sizeof(struct cell *) * 2, cj->count); - qsched_addlock(s, tid, cj->res); - qsched_addunlock(s, ci->com_tid, tid); - - /* Otherwise, if neither cells are split, generate a part-part task. */ - } else if (!ci->split && !cj->split) { - - /* Set the data. */ - data[0] = ci; - data[1] = cj; - - /* Create the task. */ - tid = qsched_addtask(s, task_type_pair, task_flag_none, data, - sizeof(struct cell *) * 2, ci->count * cj->count); - - /* Add the resources. */ - qsched_addlock(s, tid, ci->res); - qsched_addlock(s, tid, cj->res); - qsched_addunlock(s, ci->com_tid, tid); - qsched_addunlock(s, cj->com_tid, tid); - - /* Otherwise, compute the interaction recursively over the progeny. */ - } else if (ci->count > task_limit && cj->count > task_limit) { - - /* We can split one of the two cells. Let's try the biggest one */ - if (ci->h > cj->h) { - - if (ci->split) { - for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) - create_tasks(s, cp, cj); - } else if (cj->split) { - for (cp = cj->firstchild; cp != cj->sibling; cp = cp->sibling) - create_tasks(s, ci, cp); - } - - } else { - - if (cj->split) { - for (cp = cj->firstchild; cp != cj->sibling; cp = cp->sibling) - create_tasks(s, ci, cp); - } else if (ci->split) { - for (cp = ci->firstchild; cp != ci->sibling; cp = cp->sibling) - create_tasks(s, cp, cj); - } - } - - /* Create a task if too few particles */ - } else { - /* Set the data. */ - data[0] = ci; - data[1] = cj; - - /* Create the task. */ - tid = qsched_addtask(s, task_type_pair, task_flag_none, data, - sizeof(struct cell *) * 2, ci->count * cj->count); - - /* Add the resources. */ - qsched_addlock(s, tid, ci->res); - qsched_addlock(s, tid, cj->res); - - /* Depend on the COMs in case this task recurses. */ - if (ci->split || cj->split) { - qsched_addunlock(s, ci->com_tid, tid); - qsched_addunlock(s, cj->com_tid, tid); - } - } - - } /* otherwise, it's a pair. */ -} - -/* -------------------------------------------------------------------------- */ -/* Legacy tree walk */ -/* -------------------------------------------------------------------------- */ - -/** - * @brief Compute the center of mass of a given cell recursively. - * - * @param c The #cell. - */ -void legacy_comp_com(struct cell *c, int *countCoMs) { - - int k, count = c->count; - struct cell *cp; - struct part *p, *parts = c->parts; - double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; - - ++(*countCoMs); - - /* Is the cell split? */ - if (c->split) { - - /* Loop over the progeny. */ - for (cp = c->firstchild; cp != c->sibling; cp = cp->sibling) { - /* Recurse */ - legacy_comp_com(cp, countCoMs); - - /* Collect multipole information */ - double cp_mass = cp->legacy.mass; - com[0] += cp->legacy.com[0] * cp_mass; - com[1] += cp->legacy.com[1] * cp_mass; - com[2] += cp->legacy.com[2] * cp_mass; - mass += cp_mass; - } - - /* Otherwise, collect the multipole from local data. */ - } else { - - for (k = 0; k < count; k++) { - p = &parts[k]; - double p_mass = p->mass; - com[0] += p->x[0] * p_mass; - com[1] += p->x[1] * p_mass; - com[2] += p->x[2] * p_mass; - mass += p_mass; - } - } - - /* Finish multipole calculation */ - if (mass > 0.0) { - double imass = 1.0 / mass; - c->legacy.com[0] = com[0] * imass; - c->legacy.com[1] = com[1] * imass; - c->legacy.com[2] = com[2] * imass; - c->legacy.mass = mass; - } else { - c->legacy.com[0] = 0.0; - c->legacy.com[1] = 0.0; - c->legacy.com[2] = 0.0; - c->legacy.mass = 0.0; - } - -} - -/** - * @brief Interacts a particle with a cell recursively using the original B-H - * tree walk procedure - * - * @param parts The array of particles - * @param i The particle of interest - * @param root The root of the tree under which we will search. - * @param monitor If set to @c parts[i].id, will produce debug output when - * ICHECK is set. - * @param cell The cell the particle interacts with - */ -void legacy_interact(struct part *parts, int i, struct cell *root, int monitor, - int *countMultipoles, int *countPairs) { - - int j, k; - double r2, dx[3], ir, w; - double a[3] = {0.0, 0.0, 0.0}; - double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]}; - int pid = parts[i].id; - struct cell *cell = root; - - /* Traverse the cells of the tree. */ - while (cell != NULL) { - - /* Are we in a leaf ? */ - if (!cell->split) { - - /* Interact the particle with the particles in the leaf */ - for (j = 0; j < cell->count; ++j) { - if (cell->parts[j].id == pid) continue; - -#if ICHECK >= 0 - if (pid == monitor) - message("[BH_] Interaction with particle id= %d", cell->parts[j].id); -#endif - - /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = cell->parts[j].x[k] - pix[k]; - r2 += dx[k] * dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt(r2); - w = cell->parts[j].mass * const_G * ir * ir * ir; - for (k = 0; k < 3; k++) a[k] += w * dx[k]; - - (*countPairs)++; - } - - cell = cell->sibling; - } else { - - /* We are in a node */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = cell->legacy.com[k] - pix[k]; - r2 += dx[k] * dx[k]; - } - -#if ICHECK >= 0 - if (pid == monitor) - message("This is a node with %d particles h= %f. r= %f theta= %f", - cell->count, cell->h, sqrt(r2), dist_min); -#endif - - /* Is the cell far enough ? */ - if (dist_min * dist_min * r2 < cell->h * cell->h) { - -#if ICHECK >= 0 - if (pid == monitor) printf("Recursing...\n"); -#endif - cell = cell->firstchild; - continue; - } - -#if ICHECK >= 0 - if (pid == monitor) - message("[BH_] Can interact with the monopole. x= %f %f %f m= %f h= %f", - cell->legacy.com[0], cell->legacy.com[1], cell->legacy.com[2], - cell->legacy.mass, cell->h); -#endif - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt(r2); - w = cell->legacy.mass * const_G * ir * ir * ir; - for (k = 0; k < 3; k++) a[k] += w * dx[k]; - - (*countMultipoles)++; - - /* Move to the next node */ - cell = cell->sibling; - } - } - - /* Store the locally computed acceleration back to the particle. */ - for (k = 0; k < 3; k++) parts[i].a_legacy[k] += a[k]; -} - -/** - * @brief Does a tree walk as in the B-H original work for all particles - * - * @param N The number of particles - * @param parts The array of particles - * @param root The root cell of the tree - * @param monitor ID of the particle to monitor and output interactions to - * stdout - */ -void legacy_tree_walk(int N, struct part *parts, struct cell *root, int monitor, - int *countMultipoles, int *countPairs, int *countCoMs) { - - int i; - - /* Compute multipoles (recursively) */ - legacy_comp_com(root, countCoMs); - - //#pragma omp parallel for - for (i = 0; i < N; ++i) { - if (parts[i].id == monitor) - message("tree walk for particle %d x= %f %f %f", parts[i].id, - parts[i].x[0], parts[i].x[1], parts[i].x[2]); - - legacy_interact(parts, i, root, monitor, countMultipoles, countPairs); - - if (parts[i].id == monitor) - message("\n[LEGACY] acceleration for particle %d a= %f %f %f", - parts[i].id, parts[i].a_legacy[0], parts[i].a_legacy[1], - parts[i].a_legacy[2]); - } -} - -/* -------------------------------------------------------------------------- */ -/* Exact interaction */ -/* -------------------------------------------------------------------------- */ - -/** - * @brief Solve the particle interactions using the stupid N^2 algorithm - * - * @param N The number of particles - * @param parts The array of particles - */ -void interact_exact(int N, struct part *parts, int monitor) { - - int i, j, k; - double ir, w, r2, dx[3]; - - /* Loop over all particles. */ - for (i = 0; i < N; ++i) { - - /* Some things to store locally. */ - double pix[3] = {parts[i].x[0], parts[i].x[1], parts[i].x[2]}; - double mi = parts[i].mass; - - /* Loop over every other particle. */ - for (j = i + 1; j < N; ++j) { - - /* Compute the pairwise distance. */ - for (r2 = 0.0, k = 0; k < 3; k++) { - dx[k] = parts[j].x[k] - pix[k]; - r2 += dx[k] * dx[k]; - } - - /* Apply the gravitational acceleration. */ - ir = 1.0 / sqrt(r2); - w = const_G * ir * ir * ir; - - for (k = 0; k < 3; k++) { - double wdx = w * dx[k]; - parts[j].a_exact[k] -= wdx * mi; - parts[i].a_exact[k] += wdx * parts[j].mass; - } - } - - } - - for (i = 0; i < N; ++i) - if (parts[i].id == monitor) - message("[EXACT ] acceleration for particle %d a= %f %f %f\n", - parts[i].id, parts[i].a_exact[0], parts[i].a_exact[1], - parts[i].a_exact[2]); -} - -/** - * @brief Set up and run a task-based Barnes-Hutt N-body solver. - * - * @param N The number of random particles to use. - * @param nr_threads Number of threads to use. - * @param runs Number of force evaluations to use as a benchmark. - * @param fileName Input file name. If @c NULL or an empty string, random - * particle positions will be used. - */ -void test_bh(int N, int nr_threads, int runs, char *fileName) { - - int i, k; - struct cell *root; - struct part *parts; - FILE *file; - struct qsched s; - ticks tic, toc_run, tot_setup = 0, tot_run = 0, tic_exact, toc_exact; - int countMultipoles, countPairs, countCoMs; - - /* Runner function. */ - void runner(int type, void * data) { - - ticks tic = getticks(); - - /* Decode the data. */ - struct cell **d = (struct cell **)data; - - /* Decode and execute the task. */ - switch (type) { - case task_type_self: - iact_self(d[0]); - break; - case task_type_pair: - iact_pair(d[0], d[1]); - break; - case task_type_pair_pc: - iact_pair_pc(d[0], d[1]); - break; - case task_type_com: - comp_com(d[0]); - break; - default: - error("Unknown task type."); - } - - atomic_add(&task_timers[type], getticks() - tic); - } - - /* Initialize the per-task type timers. */ - for (k = 0; k < task_type_count; k++) task_timers[k] = 0; - - /* Initialize the scheduler. */ - qsched_init(&s, nr_threads, qsched_flag_noreown); - - /* Init and fill the particle array. */ - if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL) - error("Failed to allocate particle buffer."); - - /* If no input file was specified, generate random particle positions. */ - if (fileName == NULL || fileName[0] == 0) { - for (k = 0; k < N; k++) { - parts[k].id = k; - parts[k].x[0] = ((double)rand()) / RAND_MAX; - parts[k].x[1] = ((double)rand()) / RAND_MAX; - parts[k].x[2] = ((double)rand()) / RAND_MAX; - parts[k].mass = ((double)rand()) / RAND_MAX; - parts[k].a_legacy[0] = 0.0; - parts[k].a_legacy[1] = 0.0; - parts[k].a_legacy[2] = 0.0; - } - - /* Otherwise, read them from a file. */ - } else { - file = fopen(fileName, "r"); - if (file) { - for (k = 0; k < N; k++) { - if (fscanf(file, "%d", &parts[k].id) != 1) - error("Failed to read ID of part %i.", k); - if (fscanf(file, "%lf%lf%lf", &parts[k].x[0], &parts[k].x[1], - &parts[k].x[2]) != - 3) - error("Failed to read position of part %i.", k); - if (fscanf(file, "%lf", &parts[k].mass) != 1) - error("Failed to read mass of part %i.", k); - } - fclose(file); - } - } - - /* Init the cells. */ - root = cell_get(); - root->loc[0] = 0.0; - root->loc[1] = 0.0; - root->loc[2] = 0.0; - root->h = 1.0; - root->count = N; - root->parts = parts; - cell_split(root, &s); - - printf("----------------------------------------------------------\n"); - - /* Do a N^2 interactions calculation */ - - tic_exact = getticks(); - //interact_exact( N , parts , ICHECK ); - toc_exact = getticks(); - - printf("Exact calculation (1 thread) took %lli (= %e) ticks\n", - toc_exact - tic_exact, (float)(toc_exact - tic_exact)); - - printf("----------------------------------------------------------\n"); - - /* Create the tasks. */ - tic = getticks(); - create_tasks(&s, root, NULL); - tot_setup += getticks() - tic; - - /* Dump the number of tasks. */ - message("total nr of tasks: %i.", s.count); - message("total nr of deps: %i.", s.count_deps); - message("total nr of res: %i.", s.count_res); - message("total nr of locks: %i.", s.count_locks); - message("total nr of uses: %i.", s.count_uses); - int counts[task_type_count]; - for (k = 0; k < task_type_count; k++) counts[k] = 0; - for (k = 0; k < s.count; k++) counts[s.tasks[k].type] += 1; - - char buffer[200]; - sprintf(buffer, "timings_legacy_%d_%d.dat", cell_maxparts, nr_threads); - FILE *fileTime = fopen(buffer, "w"); - - /* Loop over the number of runs. */ - for (k = 0; k < runs; k++) { - - countMultipoles = 0; - countPairs = 0; - countCoMs = 0; - - /* Execute the legacy walk. */ - tic = getticks(); - legacy_tree_walk(N, parts, root, ICHECK, &countMultipoles, &countPairs, - &countCoMs); - toc_run = getticks(); - - /* Dump some timings. */ - message("%ith run took %lli (= %e) ticks...", k, toc_run - tic, - (float)(toc_run - tic)); - tot_run += toc_run - tic; - fprintf(fileTime, "%lli %e\n", toc_run - tic, (float)(toc_run - tic)); - } - - fclose(fileTime); - - printf("task counts: [ %8s %8s %8s %8s ]\n", "self", "direct", "m-poles", - "CoMs"); - printf("task counts: [ %8i %8i %8i %8i ] (legacy).\n", 0, countPairs, - countMultipoles, countCoMs); - printf("task counts: [ "); - for (k = 0; k < task_type_count; k++) printf("%8i ", counts[k]); - printf("] (new).\n"); - - /* Loop over the number of runs. */ - for (k = 0; k < runs; k++) { - - for (i = 0; i < N; ++i) { - parts[i].a[0] = 0.; - parts[i].a[1] = 0.; - parts[i].a[2] = 0.; - } - - /* Execute the tasks. */ - tic = getticks(); - qsched_run(&s, nr_threads, runner); - toc_run = getticks(); - message("%ith run took %lli (= %e) ticks...", k, toc_run - tic, - (float)(toc_run - tic)); - tot_run += toc_run - tic; - } - - message("[check] root mass= %f %f", root->legacy.mass, root->new.mass); - message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]); - message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]); - message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]); - - /* Dump the tasks. */ - /* for ( k = 0 ; k < s.count ; k++ ) */ - /* printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , - * s.tasks[k].tic , s.tasks[k].toc ); */ - - /* Dump the costs. */ - message("costs: setup=%lli ticks, run=%lli ticks.", tot_setup, - tot_run / runs); - - /* Dump the timers. */ - for (k = 0; k < qsched_timer_count; k++) - message("timer %s is %lli ticks.", qsched_timer_names[k], - s.timers[k] / runs); - - /* Dump the per-task type timers. */ - printf("task timers: [ "); - for (k = 0; k < task_type_count; k++) printf("%lli ", task_timers[k] / runs); - printf("] ticks.\n"); - - /* Dump the particles to a file */ - /*file = fopen("particle_dump.dat", "w"); - fprintf(file, "# x y z a_exact.x a_exact.y a_exact.z a_legacy.x " - "a_legacy.y a_legacy.z a_new.x a_new.y a_new.z\n"); - for (k = 0; k < N; ++k) - fprintf(file, "%d %e %e %e %e %e %e %e %e %e %e %e %e\n", parts[k].id, - parts[k].x[0], parts[k].x[1], parts[k].x[2], parts[k].a_exact[0], - parts[k].a_exact[1], parts[k].a_exact[2], parts[k].a_legacy[0], - parts[k].a_legacy[1], parts[k].a_legacy[2], parts[k].a[0], - parts[k].a[1], parts[k].a[2]); - fclose(file);*/ - - /* Clean up. */ - qsched_free(&s); -} - -/** - * @brief Main function. - */ - -int main(int argc, char *argv[]) { - - int c, nr_threads; - int N = 1000, runs = 1; - char fileName[100] = {0}; - -/* Get the number of threads. */ -#pragma omp parallel shared(nr_threads) - { - if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads(); - } - - /* Parse the options */ - while ((c = getopt(argc, argv, "n:r:t:f:")) != -1) switch (c) { - case 'n': - if (sscanf(optarg, "%d", &N) != 1) - error("Error parsing number of particles."); - break; - case 'r': - if (sscanf(optarg, "%d", &runs) != 1) - error("Error parsing number of runs."); - break; - case 't': - if (sscanf(optarg, "%d", &nr_threads) != 1) - error("Error parsing number of threads."); - omp_set_num_threads(nr_threads); - break; - case 'f': - if (sscanf(optarg, "%s", &fileName[0]) != 1) - error("Error parsing file name."); - break; - case '?': - fprintf(stderr, - "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file]\n", - argv[0]); - fprintf(stderr, "Solves the N-body problem using a Barnes-Hutt\n" - "tree code with N random particles read from a file in " - "[0,1]^3 using\n" - "nr_threads threads.\n"); - exit(EXIT_FAILURE); - } - - /* Tree node information */ - printf("Size of cell: %zu bytes.\n", sizeof(struct cell)); - - /* Dump arguments. */ - if (fileName[0] == 0) { - message("Computing the N-body problem over %i random particles using %i " - "threads (%i runs).", - N, nr_threads, runs); - } else { - message("Computing the N-body problem over %i particles read from '%s' " - "using %i threads (%i runs).", - N, fileName, nr_threads, runs); - } - - /* Run the test. */ - test_bh(N, nr_threads, runs, fileName); - - return 0; -} diff --git a/examples/test_bh_sorted.c b/examples/test_bh_sorted.c index fbdf55acb1a11b844df24be92356952e6ba8192b..1e596813280a5c7265bc9eb3e0f65730d2b29357 100644 --- a/examples/test_bh_sorted.c +++ b/examples/test_bh_sorted.c @@ -42,7 +42,7 @@ #define const_G 1 // 6.6738e-8 #define dist_min 0.5 /* Used for legacy walk only */ #define dist_cutoff_ratio 1.5 -#define iact_pair_direct iact_pair_direct_sorted_dipole +#define iact_pair_direct iact_pair_direct_sorted #define ICHECK -1 #define NO_SANITY_CHECKS @@ -116,7 +116,7 @@ struct dipole { float axis[3]; double x[3]; float mass; - double da, da2; + float da, da2; }; static inline void dipole_init(struct dipole *d, const float *axis) { @@ -164,7 +164,7 @@ static inline void dipole_iact(const struct dipole *d, const double *x, /* Compute the first dipole. */ for (r2 = 0.0f, k = 0; k < 3; k++) { - dx[k] = x[k] - d->x[k] * inv_mass + offset * d->axis[k]; + dx[k] = x[k] - (d->x[k] * inv_mass + offset * d->axis[k]); r2 += dx[k] * dx[k]; } ir = 1.0f / sqrtf(r2); @@ -173,7 +173,7 @@ static inline void dipole_iact(const struct dipole *d, const double *x, /* Compute the second dipole. */ for (r2 = 0.0f, k = 0; k < 3; k++) { - dx[k] = x[k] - d->x[k] * inv_mass - offset * d->axis[k]; + dx[k] = x[k] - (d->x[k] * inv_mass - offset * d->axis[k]); r2 += dx[k] * dx[k]; } ir = 1.0f / sqrtf(r2); @@ -377,7 +377,7 @@ void indices_sort(struct index *sort, int N) { * @param axis The normalized axis along which to sort. * @param aid The axis ID at which to store the indices. */ -void cell_sort(struct cell *c, float *axis, int aid) { +void cell_sort(struct cell *c, const float *axis, int aid) { /* Has the indices array even been allocated? */ if (c->indices == NULL) { @@ -1609,6 +1609,16 @@ static inline void iact_pair_direct_sorted_dipole(struct cell *ci, for (k = 0; k < 3; k++) parts_i[i].a[k] += ai[k]; } /* loop over all particles in ci. */ + + /* Dump extensive info on the dipole. */ + /* message("dip[0] has x=[%e,%e,%e], axis=[%e,%e,%e], da=%e, da2=%e, mass=%e.", + dip[0].x[0], dip[0].x[1], dip[0].x[2], + dip[0].axis[0], dip[0].axis[1], dip[0].axis[2], + dip[0].da, dip[0].da2, dip[0].mass); + for (j = count_j; j < cj->count; j++) { + message(" particle x=[%e,%e,%e], mass=%e.", + parts_j[j].x[0], parts_j[j].x[1], parts_j[j].x[2], parts_j[j].mass); + } */ /* Re-init some values. */ count_j = cj->count; diff --git a/examples/test_qr_ompss.c b/examples/test_qr_ompss.c index 8d9286775f01d4ac3b8514f6a657c20f2eede68f..475e892376f6412dd5a71c2de5c10a49793db6c9 100644 --- a/examples/test_qr_ompss.c +++ b/examples/test_qr_ompss.c @@ -417,6 +417,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { int k, j, i, r; double* A, *A_orig, *tau; ticks tic, toc_run, tot_run = 0; + double tid[ m * n ]; /* Allocate and fill the original matrix. */ if ((A = (double*)malloc(sizeof(double)* m* n* K* K)) == NULL || @@ -539,7 +540,7 @@ int main(int argc, char* argv[]) { /* Dump arguments. */ message("Computing the tiled QR decomposition of a %ix%i matrix using %i " "threads (%i runs).", - 32 * M, 32 * N, nr_threads, runs); + K * M, K * N, nr_threads, runs); /* Initialize the timers. */ if ((timers = (struct timer*)malloc(sizeof(struct timer) * M * M * N)) ==