Skip to content
Snippets Groups Projects
Commit d1648725 authored by d74ksy's avatar d74ksy
Browse files

Added the initial test_bh_mpi file. Fixed a bug in adding child resources...

Added the initial test_bh_mpi file. Fixed a bug in adding child resources where it used position based on integer spacing instead of character spacing (i.e. skipped 4*offset bytes as opposed to offset bytes)
parent 4e5eacc1
Branches
No related tags found
No related merge requests found
......@@ -23,8 +23,13 @@ AM_CFLAGS = -I../src -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-v
AM_LDFLAGS =
METIS_LIBS = @METIS_LIBS@
MPI_THREAD_LIBS = @MPI_THREAD_LIBS@
MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
# Set-up the library
bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted
bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted test_bh_mpi
if HAVECBLAS
bin_PROGRAMS += test_qr
endif
......@@ -53,3 +58,8 @@ test_bh_sorted_LDADD = ../src/.libs/libquicksched.a
test_fmm_sorted_SOURCES = test_fmm_sorted.c
test_fmm_sorted_CFLAGS = $(AM_CFLAGS)
test_fmm_sorted_LDADD = ../src/.libs/libquicksched.a
test_bh_mpi_SOURCES = test_bh_mpi.c
test_bh_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
test_bh_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
test_bh_mpi_LDFLAGS = $(MPI_THREAD_LIBS)
/*******************************************************************************
* This file is part of QuickSched.
* Coypright (c) 2014 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
* Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* *****************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Standard includes. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <omp.h>
#include <fenv.h>
#include <mpi.h>
/* Local includes. */
#include "quicksched.h"
/* Some local constants. */
#define cell_pool_grow 1000
#define cell_maxparts 100
#define task_limit 1e8
#define const_G 1 // 6.6738e-8
#define dist_min 0.5 /* Used for legacy walk only */
#define dist_cutoff_ratio 1.5
#define ICHECK -1
#define NO_SANITY_CHECKS
#define NO_COM_AS_TASK
#define NO_COUNTERS
/** Data structure for the particles. */
struct part {
double x[3];
union {
float a[3];
float a_legacy[3];
float a_exact[3];
};
float mass;
int id;
}; // __attribute__((aligned(32)));
struct part_local {
float x[3];
float a[3];
float mass;
} __attribute__((aligned(32)));
struct multipole {
double com[3];
float mass;
};
/** Data structure for the BH tree cell. */
struct cell {
double loc[3];
double h;
int count;
unsigned short int split, sorted;
struct part *parts;
qsched_res_t firstchild; /* Next node if opening */
qsched_res_t sibling; /* Next node */
/* We keep both CoMs and masses to make sure the comp_com calculation is
* correct (use an anonymous union to keep variable names compact). */
union {
/* Information for the legacy walk */
struct multipole legacy;
/* Information for the QuickShed walk */
struct multipole new;
};
long long int res,res_parts, com_tid;
struct index *indices;
} __attribute__((aligned(128)));
/** Global variable for the pool of allocated cells. */
struct cell *cell_pool = NULL;
struct cell *cell_get(struct qsched *s) {
struct cell *res;
// int k;
long long int resource = qsched_addres(s, qsched_owner_none, sizeof(struct cell), (void**) &res );
res->res = resource;
res->firstchild = -1;
#ifdef THINK_CAN_BE_DELETED
/* Allocate a new batch? */
if (cell_pool == NULL) {
/* Allocate the cell array. */
if ((cell_pool =
(struct cell *)calloc(cell_pool_grow, sizeof(struct cell))) ==
NULL)
error("Failed to allocate fresh batch of cells.");
/* Link them up via their progeny pointers. */
for (k = 1; k < cell_pool_grow; k++)
cell_pool[k - 1].firstchild = cell_pool[k].res;
}
/* Pick a cell off the pool. */
res = cell_pool;
cell_pool = cell_pool->firstchild;
/* Clean up a few things. */
res->res = qsched_res_none;
res->firstchild = 0;
#endif
/* Return the cell. */
return res;
}
/**
* @brief Sort the parts into eight bins along the given pivots and
* fill the multipoles. Also adds the hierarchical resources
* to the sched.
*
* @param c The #cell to be split.
* @param N The total number of parts.
* @param s The #sched to store the resources.
*/
void cell_split(struct cell *c, struct qsched *s) {
int i, j, k, kk, count = c->count;
struct part temp, *parts = qsched_getresdata( s, c->res_parts );
struct cell *cp;
int left[8], right[8];
double pivot[3];
static struct cell *root = NULL;
struct cell *progenitors[8];
/* Set the root cell. */
if (root == NULL) {
root = c;
c->sibling = 0;
}
/* Add a resource for this cell if it doesn't have one yet. */
//if (c->res == qsched_res_none)
//c->res = qsched_addres_local(s, qsched_owner_none, qsched_res_none);
/* Does this cell need to be split? */
if (count > cell_maxparts) {
/* Mark this cell as split. */
c->split = 1;
/* Create the progeny. */
for (k = 0; k < 8; k++) {
progenitors[k] = cp = cell_get(s);
cp->loc[0] = c->loc[0];
cp->loc[1] = c->loc[1];
cp->loc[2] = c->loc[2];
cp->h = c->h * 0.5;
//cp->res = qsched_addres_local(s, qsched_owner_none, c->res);
if (k & 4) cp->loc[0] += cp->h;
if (k & 2) cp->loc[1] += cp->h;
if (k & 1) cp->loc[2] += cp->h;
}
/* Init the pivots. */
for (k = 0; k < 3; k++) pivot[k] = c->loc[k] + c->h * 0.5;
/* Split along the x-axis. */
i = 0;
j = count - 1;
while (i <= j) {
while (i <= count - 1 && parts[i].x[0] < pivot[0]) i += 1;
while (j >= 0 && parts[j].x[0] >= pivot[0]) j -= 1;
if (i < j) {
temp = parts[i];
parts[i] = parts[j];
parts[j] = temp;
}
}
left[1] = i;
right[1] = count - 1;
left[0] = 0;
right[0] = j;
/* Split along the y axis, twice. */
for (k = 1; k >= 0; k--) {
i = left[k];
j = right[k];
while (i <= j) {
while (i <= right[k] && parts[i].x[1] < pivot[1]) i += 1;
while (j >= left[k] && parts[j].x[1] >= pivot[1]) j -= 1;
if (i < j) {
temp = parts[i];
parts[i] = parts[j];
parts[j] = temp;
}
}
left[2 * k + 1] = i;
right[2 * k + 1] = right[k];
left[2 * k] = left[k];
right[2 * k] = j;
}
/* Split along the z axis, four times. */
for (k = 3; k >= 0; k--) {
i = left[k];
j = right[k];
while (i <= j) {
while (i <= right[k] && parts[i].x[2] < pivot[2]) i += 1;
while (j >= left[k] && parts[j].x[2] >= pivot[2]) j -= 1;
if (i < j) {
temp = parts[i];
parts[i] = parts[j];
parts[j] = temp;
}
}
left[2 * k + 1] = i;
right[2 * k + 1] = right[k];
left[2 * k] = left[k];
right[2 * k] = j;
}
/* Store the counts and offsets. */
for (k = 0; k < 8; k++) {
progenitors[k]->count = right[k] - left[k] + 1;
// progenitors[k]->parts = &c->parts[left[k]];
progenitors[k]->res_parts = qsched_addchildres(s, c->res_parts, qsched_owner_none, (right[k] - left[k] + 1)*sizeof(struct part), left[k]*sizeof(struct part), (void**) &progenitors[k]->parts );
}
/* Find the first non-empty progenitor */
for (k = 0; k < 8; k++)
if (progenitors[k]->count > 0) {
c->firstchild = progenitors[k]->res;
break;
}
/* Prepare the pointers. */
for (k = 0; k < 8; k++) {
/* Find the next non-empty sibling */
for (kk = k + 1; kk < 8; ++kk) {
if (progenitors[kk]->count > 0) {
progenitors[k]->sibling = progenitors[kk]->res;
break;
}
}
/* No non-empty sibling ? Go back a level */
if (kk == 8) progenitors[k]->sibling = c->sibling;
}
/* Recurse. */
for (k = 0; k < 8; k++)
if (progenitors[k]->count > 0) cell_split(progenitors[k], s);
/* Otherwise, we're at a leaf, so create the cell's particle-cell task. */
} else {
//struct cell *data[2] = {root, c};
/*int tid = qsched_addtask_local(s, task_type_self_pc, task_flag_none, data,
2 * sizeof(struct cell *), 1);
qsched_addlock(s, tid, c->res);*/
} /* does the cell need to be split? */
/* Set this cell's resources ownership. */
/*qsched_res_own(s, c->res,
s->nr_queues * (c->parts - root->parts) / root->count);*/
}
/**
* @brief Set up and run a task-based Barnes-Hutt N-body solver.
*
* @param N The number of random particles to use.
* @param nr_threads Number of threads to use.
* @param runs Number of force evaluations to use as a benchmark.
* @param fileName Input file name. If @c NULL or an empty string, random
* particle positions will be used.
*/
void test_bh(int N, int nr_threads, int runs, char *fileName) {
int k;
struct cell *root;
struct part *parts;
qsched_res_t parts_res;
FILE *file;
struct qsched s;
// ticks tic, toc_run, tot_setup = 0, tot_run = 0;
// int countMultipoles = 0, countPairs = 0, countCoMs = 0;
char processor_name[MPI_MAX_PROCESSOR_NAME];
int name_len;
/* Runner function. */
void runner(int type, void * data) {
// ticks tic = getticks();
/* Decode the data. */
// struct cell **d = (struct cell **)data;
/* Decode and execute the task. */
/* switch (type) {
case task_type_self:
iact_self_direct(d[0]);
break;
case task_type_pair:
iact_pair(d[0], d[1]);
break;
case task_type_self_pc:
iact_self_pc(d[0], d[1], NULL);
break;
case task_type_com:
comp_com(d[0]);
break;
default:
error("Unknown task type.");
}
atomic_add(&task_timers[type], getticks() - tic);*/
}
// Initialize the MPI environment
int MpiThreadLevel;
MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &MpiThreadLevel);
if(MpiThreadLevel != MPI_THREAD_MULTIPLE)
error("We didn't get thread multiple!");
MPI_Get_processor_name(processor_name, &name_len);
/* Initialize the per-task type timers. */
// for (k = 0; k < task_type_count; k++) task_timers[k] = 0;
/* Initialize the scheduler. */
qsched_init(&s, nr_threads, qsched_flag_noreown, MPI_COMM_WORLD);
/* Init and fill the particle array. */
// if ((parts = (struct part *)malloc(sizeof(struct part) * N)) == NULL)
// error("Failed to allocate particle buffer.");
parts_res = qsched_addres(&s, qsched_owner_none, sizeof(struct part) * N, (void**) &parts);
/* If no input file was specified, generate random particle positions. */
if (fileName == NULL || fileName[0] == 0) {
for (k = 0; k < N; k++) {
parts[k].id = k;
parts[k].x[0] = ((double)rand()) / RAND_MAX;
parts[k].x[1] = ((double)rand()) / RAND_MAX;
parts[k].x[2] = ((double)rand()) / RAND_MAX;
parts[k].mass = ((double)rand()) / RAND_MAX;
parts[k].a_legacy[0] = 0.0;
parts[k].a_legacy[1] = 0.0;
parts[k].a_legacy[2] = 0.0;
}
/* Otherwise, read them from a file. */
} else {
file = fopen(fileName, "r");
if (file) {
for (k = 0; k < N; k++) {
if (fscanf(file, "%d", &parts[k].id) != 1)
error("Failed to read ID of part %i.", k);
if (fscanf(file, "%lf%lf%lf", &parts[k].x[0], &parts[k].x[1],
&parts[k].x[2]) !=
3)
error("Failed to read position of part %i.", k);
if (fscanf(file, "%f", &parts[k].mass) != 1)
error("Failed to read mass of part %i.", k);
}
fclose(file);
}
}
/* Init the cells. */
root = cell_get(&s);
root->loc[0] = 0.0;
root->loc[1] = 0.0;
root->loc[2] = 0.0;
root->h = 1.0;
root->count = N;
root->parts = parts;
root->res_parts = parts_res;
cell_split(root, &s);
/* Iterate over the cells and get the average number of particles
per leaf. */
struct cell *c = root;
int nr_leaves = 0;
while (c != NULL && (c->sibling > 0 || c->firstchild > 0)) {
if (!c->split) {
nr_leaves++;
c = (struct cell*) qsched_getresdata( &s, c->sibling );
} else {
c = (struct cell*) qsched_getresdata( &s, c->firstchild );
}
}
message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves);
// Print off a hello world message
printf("Hello world from processor %s, rank = %i, count_ranks = %i\n",
processor_name, s.rank, s.count_ranks);
// Finalize the MPI environment.
MPI_Finalize();
}
/**
* @brief Main function.
*/
int main(int argc, char *argv[]) {
int c, nr_threads;
int N = 1000, runs = 1;
char fileName[100] = {0};
/* Die on FP-exceptions. */
feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
/* Get the number of threads. */
#pragma omp parallel shared(nr_threads)
{
if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads();
}
/* Parse the options */
while ((c = getopt(argc, argv, "n:r:t:f:c:i:")) != -1) switch (c) {
case 'n':
if (sscanf(optarg, "%d", &N) != 1)
error("Error parsing number of particles.");
break;
case 'r':
if (sscanf(optarg, "%d", &runs) != 1)
error("Error parsing number of runs.");
break;
case 't':
if (sscanf(optarg, "%d", &nr_threads) != 1)
error("Error parsing number of threads.");
omp_set_num_threads(nr_threads);
break;
case 'f':
if (sscanf(optarg, "%s", &fileName[0]) != 1)
error("Error parsing file name.");
break;
case '?':
fprintf(stderr, "Usage: %s [-t nr_threads] [-n N] [-r runs] [-f file] "
"[-c Nparts] [-i Niterations] \n",
argv[0]);
fprintf(stderr, "Solves the N-body problem using a Barnes-Hut\n"
"tree code with N random particles read from a file in "
"[0,1]^3 using"
"nr_threads threads.\n"
"A test of the neighbouring cells interaction with "
"Nparts per cell is also run Niterations times.\n");
exit(EXIT_FAILURE);
}
/* Tree node information */
printf("Size of cell: %zu bytes.\n", sizeof(struct cell));
/* Part information */
printf("Size of part: %zu bytes.\n", sizeof(struct part));
/* Dump arguments. */
if (fileName[0] == 0) {
message("Computing the N-body problem over %i random particles using %i "
"threads (%i runs).",
N, nr_threads, runs);
} else {
message("Computing the N-body problem over %i particles read from '%s' "
"using %i threads (%i runs).",
N, fileName, nr_threads, runs);
}
/* Run the BH test. */
test_bh(N, nr_threads, runs, fileName);
return 0;
}
......@@ -3896,7 +3896,7 @@ int index = getindex(id,s);
s->res[ index ].parent = parent;
s->res[ index ].offset = position;
s->res[ index ].node = s->rank;
int *temp = (int*) s->res[getindex(parent,s)].data;
char *temp = (char*) s->res[getindex(parent,s)].data;
s->res[ index ].data = &temp[position];
s->res[ index ].size = size;
s->res[ index ].users = NULL;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment