Commit bda5342c authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

added distributed-memory parallelism using MPI, overhaul of error and message routines.


Former-commit-id: 41472e03d439a86511b8a379cf77da84c8dd4d0e
parent 036faeff
......@@ -20,8 +20,8 @@
AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing
AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
-funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
AM_CFLAGS = -g -O3 -std=gnu99 -Wall -Werror -ffast-math -fstrict-aliasing \
-ftree-vectorize -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
-DTIMER -DCOUNTER -DCPU_TPS=2.30e9
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
......@@ -31,10 +31,24 @@ AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0
# Build the libswiftsim library
lib_LTLIBRARIES = libswiftsim.la
libswiftsim_la_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
io.c timers.c debug.c scheduler.c
# Build a MPI-enabled version too?
if HAVEMPI
lib_LTLIBRARIES += libswiftsim_mpi.la
endif
# List required headers
include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
engine.h swift.h io.h timers.h debug.h scheduler.h
engine.h swift.h io.h timers.h debug.h scheduler.h proxy.h
# Common source files
AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
io.c timers.c debug.c scheduler.c proxy.c
# Sources and flags for regular library
libswiftsim_la_SOURCES = $(AM_SOURCES)
# Sources and flags for MPI library
libswiftsim_mpi_la_SOURCES = $(AM_SOURCES)
libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
libswiftsim_mpi_la_SHORTNAME = mpi
......@@ -29,22 +29,144 @@
#include <limits.h>
#include <math.h>
/* MPI headers. */
#ifdef WITH_MPI
#include <mpi.h>
#endif
/* Switch off timers. */
#ifdef TIMER
#undef TIMER
#endif
/* Local headers. */
#include "const.h"
#include "cycle.h"
#include "lock.h"
#include "task.h"
#include "timers.h"
#include "part.h"
#include "space.h"
#include "cell.h"
#include "error.h"
#include "inline.h"
/**
* @brief Get the size of the cell subtree.
*
* @param c The #cell.
*/
int cell_getsize ( struct cell *c ) {
int k, count = 1;
/* Sum up the progeny if split. */
if ( c->split )
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL )
count += cell_getsize( c->progeny[k] );
/* Return the final count. */
return count;
}
/**
* @brief Unpack the data of a given cell and its sub-cells.
*
* @param pc An array of packed #pcell.
* @param c The #cell in which to unpack the #pcell.
* @param s The #space in which the cells are created.
* @param parts The #part array holding the particle data.
*
* @return The number of cells created.
*/
int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s , struct part *parts ) {
int k, count = 1;
struct cell *temp;
/* Unpack the current pcell. */
c->h_max = pc->h_max;
c->dt_min = pc->dt_min;
c->dt_max = pc->dt_max;
c->count = pc->count;
c->parts = parts;
/* Fill the progeny recursively, depth-first. */
for ( k = 0 ; k < 8 ; k++ )
if ( pc->progeny[k] >= 0 ) {
temp = space_getcell( s );
temp->count = 0;
temp->loc[0] = c->loc[0];
temp->loc[1] = c->loc[1];
temp->loc[2] = c->loc[2];
temp->h[0] = c->h[0]/2;
temp->h[1] = c->h[1]/2;
temp->h[2] = c->h[2]/2;
temp->dmin = c->dmin/2;
if ( k & 4 )
temp->loc[0] += temp->h[0];
if ( k & 2 )
temp->loc[1] += temp->h[1];
if ( k & 1 )
temp->loc[2] += temp->h[2];
temp->depth = c->depth + 1;
temp->split = 0;
temp->dx_max = 0.0;
temp->nodeID = c->nodeID;
temp->parent = c;
c->progeny[k] = temp;
c->split = 1;
count += cell_unpack( &pc[ pc->progeny[k] ] , temp , s , parts );
parts = &parts[ temp->count ];
}
/* Return the total number of unpacked cells. */
return count;
}
/**
* @brief Pack the data of the given cell and all it's sub-cells.
*
* @param c The #cell.
* @param pc Pointer to an array of packed cells in which the
* cells will be packed.
*
* @return The number of packed cells.
*/
int cell_pack ( struct cell *c , struct pcell *pc ) {
int k, count = 1;
/* Start by packing the data of the current cell. */
pc->h_max = c->h_max;
pc->dt_min = c->dt_min;
pc->dt_max = c->dt_max;
pc->count = c->count;
/* Fill in the progeny, depth-first recursion. */
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL ) {
pc->progeny[k] = count;
count += cell_pack( c->progeny[k] , &pc[count] );
}
else
pc->progeny[k] = -1;
/* Return the number of packed cells used. */
return count;
}
/**
* @brief Lock a cell and hold its parents.
*
......@@ -195,7 +317,7 @@ void cell_split ( struct cell *c ) {
}
/* for ( int kk = left[k] ; kk <= j ; kk++ )
if ( parts[kk].x[1] > pivot[1] ) {
printf( "cell_split: ival=[%i,%i], i=%i, j=%i.\n" , left[k] , right[k] , i , j );
message( "ival=[%i,%i], i=%i, j=%i." , left[k] , right[k] , i , j );
error( "sorting failed (left)." );
}
for ( int kk = i ; kk <= right[k] ; kk++ )
......@@ -220,12 +342,12 @@ void cell_split ( struct cell *c ) {
}
/* for ( int kk = left[k] ; kk <= j ; kk++ )
if ( parts[kk].x[2] > pivot[2] ) {
printf( "cell_split: ival=[%i,%i], i=%i, j=%i.\n" , left[k] , right[k] , i , j );
message( "ival=[%i,%i], i=%i, j=%i." , left[k] , right[k] , i , j );
error( "sorting failed (left)." );
}
for ( int kk = i ; kk <= right[k] ; kk++ )
if ( parts[kk].x[2] < pivot[2] ) {
printf( "cell_split: ival=[%i,%i], i=%i, j=%i.\n" , left[k] , right[k] , i , j );
message( "ival=[%i,%i], i=%i, j=%i." , left[k] , right[k] , i , j );
error( "sorting failed (right)." );
} */
left[2*k+1] = i; right[2*k+1] = right[k];
......
......@@ -21,6 +21,21 @@
#define cell_sid_dt 13
/* Packed cell. */
struct pcell {
/* Stats on this cell's particles. */
double h_max, dt_min, dt_max;
/* Number of particles in this cell. */
int count;
/* Relative indices of the cell's progeny. */
int progeny[8];
};
/* Structure to store the data of a single cell. */
struct cell {
......@@ -78,12 +93,15 @@ struct cell {
int sortsize;
/* The tasks computing this cell's density. */
struct task *density[27];
int nr_density;
struct task *density[27], *force[27];
int nr_density, nr_force;
/* The ghost task to link density to interactions. */
struct task *ghost, *kick1, *kick2;
/* Task receiving data. */
struct task *recv_xv, *recv_rho;
/* Number of tasks that are associated with this cell. */
int nr_tasks;
......@@ -111,6 +129,16 @@ struct cell {
/* Linking pointer for "memory management". */
struct cell *next;
/* ID of the node this cell lives on. */
int nodeID;
/* Bit mask of the proxies this cell is registered with. */
unsigned int sendto;
/* Pointer to this cell's packed representation. */
struct pcell *pcell;
int pcell_size;
} __attribute__((aligned (64)));
......@@ -118,3 +146,6 @@ struct cell {
void cell_split ( struct cell *c );
int cell_locktree( struct cell *c );
void cell_unlocktree( struct cell *c );
int cell_pack ( struct cell *c , struct pcell *pc );
int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s , struct part *parts );
int cell_getsize ( struct cell *c );
This diff is collapsed.
......@@ -28,11 +28,17 @@
#define engine_policy_fixdt 16
#define engine_policy_multistep 32
#define engine_policy_cputight 64
#define engine_policy_mpi 128
#define engine_policy_setaffinity 256
#define engine_queue_scale 1.2
#define engine_maxtaskspercell 32
/* The rank of the engine as a global variable (for messages). */
extern int engine_rank;
/* Data structure for the engine. */
struct engine {
......@@ -77,12 +83,21 @@ struct engine {
pthread_cond_t barrier_cond;
volatile int barrier_running, barrier_launch, barrier_launchcount;
/* ID of the node this engine lives on. */
int nr_nodes, nodeID;
/* Proxies for the other nodes in this simulation. */
struct proxy *proxies;
int nr_proxies, *proxy_ind;
};
/* Function prototypes. */
void engine_barrier( struct engine *e , int tid );
void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy );
void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int nr_nodes , int nodeID , int policy );
void engine_prepare ( struct engine *e );
void engine_step ( struct engine *e );
void engine_maketasks ( struct engine *e );
void engine_split ( struct engine *e , int *grid );
int engine_exchange_strays ( struct engine *e , struct part *parts , struct xpart *xparts , int *ind , int N );
......@@ -20,8 +20,26 @@
#include <stdio.h>
/**
* @brief Error macro. Prints the message given in argument and aborts.
*
*/
#define error(s) { fprintf( stderr , "%s:%s():%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); }
#ifdef WITH_MPI
extern int engine_rank;
#define error(s, ...) { fprintf( stderr , "[%03i] %s:%s():%i: " s "\n" , engine_rank , __FILE__ , __FUNCTION__ , __LINE__ , ##__VA_ARGS__ ); MPI_Abort( MPI_COMM_WORLD , 0 ); abort(); }
#else
#define error(s, ...) { fprintf( stderr , "%s:%s():%i: " s "\n" , __FILE__ , __FUNCTION__ , __LINE__ , ##__VA_ARGS__ ); abort(); }
#endif
/**
* @brief Macro to print a localized message with variable arguments.
*
*/
#ifdef WITH_MPI
extern int engine_rank;
#define message(s, ...) printf( "%s[%03i]: " s "\n" , __FUNCTION__ , engine_rank , ##__VA_ARGS__ )
#else
#define message(s, ...) printf( "%s: " s "\n" , __FUNCTION__ , ##__VA_ARGS__ )
#endif
......@@ -32,6 +32,12 @@
#include <hdf5.h>
#include <math.h>
/* MPI headers. */
#ifdef WITH_MPI
#include <mpi.h>
#endif
#include "const.h"
#include "cycle.h"
#include "lock.h"
#include "task.h"
......@@ -120,17 +126,13 @@ void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data)
h_attr = H5Aopen(grp, name, H5P_DEFAULT);
if(h_attr < 0)
{
char buf[100];
sprintf(buf, "Error while opening attribute '%s'\n", name);
error(buf);
error( "Error while opening attribute '%s'" , name );
}
h_err = H5Aread(h_attr, hdf5Type(type), data);
if(h_err < 0)
{
char buf[100];
sprintf(buf, "Error while reading attribute '%s'\n", name);
error(buf);
error( "Error while reading attribute '%s'" , name );
}
H5Aclose(h_attr);
......@@ -168,21 +170,17 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int dim
exist = H5Lexists(grp, name, 0);
if(exist < 0)
{
char buf[100];
sprintf(buf, "Error while checking the existence of data set '%s'\n", name);
error(buf);
error( "Error while checking the existence of data set '%s'." , name );
}
else if(exist == 0)
{
if(importance == COMPULSORY)
{
char buf[100];
sprintf(buf, "Compulsory data set '%s' not present in the file.\n", name);
error(buf);
error( "Compulsory data set '%s' not present in the file." , name );
}
else
{
/* printf("readArray: Optional data set '%s' not present. Zeroing this particle field...\n", name); */
/* message("Optional data set '%s' not present. Zeroing this particle field...", name); */
for(i=0; i<N; ++i)
memset(part_c+i*partSize, 0, copySize);
......@@ -191,15 +189,13 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int dim
}
}
/* printf("readArray: Reading %s '%s' array...\n", importance == COMPULSORY ? "compulsory": "optional ", name); */
/* message( "Reading %s '%s' array...", importance == COMPULSORY ? "compulsory": "optional ", name); */
/* Open data space */
h_data = H5Dopen1(grp, name);
if(h_data < 0)
{
char buf[100];
sprintf(buf, "Error while opening data space '%s'\n", name);
error(buf);
error( "Error while opening data space '%s'." , name );
}
/* Check data type */
......@@ -220,9 +216,7 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int dim
h_err = H5Dread(h_data, hdf5Type(type), H5S_ALL, H5S_ALL, H5P_DEFAULT, temp);
if(h_err < 0)
{
char buf[100];
sprintf(buf, "Error while reading data array '%s'\n", name);
error(buf);
error( "Error while reading data array '%s'." , name );
}
/* Copy temporary buffer to particle data */
......@@ -277,17 +271,15 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
int numParticles[6]={0}; /* GADGET has 6 particle types. We only keep the type 0*/
/* Open file */
/* printf("read_ic: Opening file '%s' as IC.\n", fileName); */
/* message("Opening file '%s' as IC.", fileName); */
h_file = H5Fopen(fileName, H5F_ACC_RDONLY, H5P_DEFAULT);
if(h_file < 0)
{
char buf[200];
sprintf(buf, "Error while opening file '%s'", fileName);
error(buf);
error( "Error while opening file '%s'." , fileName );
}
/* Open header to read simulation properties */
/* printf("read_ic: Reading runtime parameters...\n"); */
/* message("Reading runtime parameters..."); */
h_grp = H5Gopen1(h_file, "/RuntimePars");
if(h_grp < 0)
error("Error while opening runtime parameters\n");
......@@ -299,7 +291,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
H5Gclose(h_grp);
/* Open header to read simulation properties */
/* printf("read_ic: Reading file header...\n"); */
/* message("Reading file header..."); */
h_grp = H5Gopen1(h_file, "/Header");
if(h_grp < 0)
error("Error while opening file header\n");
......@@ -313,7 +305,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
dim[1] = ( boxSize[1] < 0 ) ? boxSize[0] : boxSize[1];
dim[2] = ( boxSize[2] < 0 ) ? boxSize[0] : boxSize[2];
/* printf("read_ic: Found %d particles in a %speriodic box of size [%f %f %f]\n", */
/* message("Found %d particles in a %speriodic box of size [%f %f %f].", */
/* *N, (periodic ? "": "non-"), dim[0], dim[1], dim[2]); */
/* Close header */
......@@ -324,10 +316,10 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
error("Error while allocating memory for particles");
bzero( *parts , *N * sizeof(struct part) );
/* printf("read_ic: Allocated %8.2f MB for particles.\n", *N * sizeof(struct part) / (1024.*1024.)); */
/* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / (1024.*1024.)); */
/* Open SPH particles group */
/* printf("read_ic: Reading particle arrays...\n"); */
/* message("Reading particle arrays..."); */
h_grp = H5Gopen1(h_file, "/PartType0");
if(h_grp < 0)
error( "Error while opening particle group.\n");
......@@ -346,7 +338,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
/* Close particle group */
H5Gclose(h_grp);
/* printf("read_ic: Done Reading particles...\n"); */
/* message("Done Reading particles..."); */
/* Close file */
H5Fclose(h_file);
......@@ -385,33 +377,25 @@ void writeAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data, int
h_space = H5Screate(H5S_SIMPLE);
if(h_space < 0)
{
char buf[100];
sprintf(buf, "Error while creating dataspace for attribute '%s'\n", name);
error(buf);
error( "Error while creating dataspace for attribute '%s'." , name );
}
h_err = H5Sset_extent_simple(h_space, 1, dim, NULL);
if(h_err < 0)
{
char buf[100];
sprintf(buf, "Error while changing dataspace shape for attribute '%s'\n", name);
error(buf);
error( "Error while changing dataspace shape for attribute '%s'." , name );
}
h_attr = H5Acreate1(grp, name, hdf5Type(type), h_space, H5P_DEFAULT);
if(h_attr < 0)
{
char buf[100];
sprintf(buf, "Error while creating attribute '%s'\n", name);
error(buf);
error( "Error while creating attribute '%s'.", name );
}
h_err = H5Awrite(h_attr, hdf5Type(type), data);
if(h_err < 0)
{
char buf[100];
sprintf(buf, "Error while reading attribute '%s'\n", name);
error(buf);
error( "Error while reading attribute '%s'." , name );
}
H5Sclose(h_space);
......@@ -435,41 +419,31 @@ void writeStringAttribute(hid_t grp, char* name, char* str, int length)
h_space = H5Screate(H5S_SCALAR);
if(h_space < 0)
{
char buf[100];
sprintf(buf, "Error while creating dataspace for attribute '%s'\n", name);
error(buf);
error( "Error while creating dataspace for attribute '%s'." , name );
}
h_type = H5Tcopy(H5T_C_S1);
if(h_type < 0)
{
char buf[100];
sprintf(buf, "Error while copying datatype 'H5T_C_S1'\n");
error(buf);
error( "Error while copying datatype 'H5T_C_S1'." );
}
h_err = H5Tset_size(h_type, length);
if(h_err < 0)
{
char buf[100];
sprintf(buf, "Error while resizing attribute tyep to '%i'\n", length);
error(buf);
error( "Error while resizing attribute tyep to '%i'." , length );
}
h_attr = H5Acreate1(grp, name, h_type, h_space, H5P_DEFAULT);
if(h_attr < 0)
{
char buf[100];
sprintf(buf, "Error while creating attribute '%s'\n", name);
error(buf);
error( "Error while creating attribute '%s'." , name );
}
h_err = H5Awrite(h_attr, h_type, str );
if(h_err < 0)
{
char buf[100];
sprintf(buf, "Error while reading attribute '%s'\n", name);
error(buf);
error( "Error while reading attribute '%s'." , name );
}
H5Tclose(h_type);
......@@ -562,7 +536,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, enu
char* temp_c = 0;
hsize_t shape[2];
/* printf("writeArray: Writing '%s' array...\n", name); */
/* message("Writing '%s' array...", name); */
/* Allocate temporary buffer */
temp = malloc(N * dim * sizeOfType(type));
......@@ -578,9 +552,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, enu
h_space = H5Screate(H5S_SIMPLE);
if(h_space < 0)
{
char buf[100];
sprintf(buf, "Error while creating data space for field '%s'\n", name);
error(buf);
error( "Error while creating data space for field '%s'." , name );
}
if(dim > 1)
......@@ -598,27 +570,21 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, enu
h_err = H5Sset_extent_simple(h_space, rank, shape, NULL);
if(h_err < 0)
{
char buf[100];
sprintf(buf, "Error while changing data space shape for field '%s'\n", name);
error(buf);
error( "Error while changing data space shape for field '%s'." , name );
}
/* Create dataset */
h_data = H5Dcreate1(grp, name, hdf5Type(type), h_space, H5P_DEFAULT);
if(h_data < 0)
{
char buf[100];
sprintf(buf, "Error while creating dataspace '%s'\n", name);
error(buf);
error( "Error while creating dataspace '%s'." , name );
}
/* Write temporary buffer to HDF5 dataspace */
h_err = H5Dwrite(h_data, hdf5Type(type), h_space, H5S_ALL, H5P_DEFAULT, temp);
if(h_err < 0)
{
char buf[100];
sprintf(buf, "Error while reading data array '%s'\n", name);
error(buf);
error( "Error while reading data array '%s'." , name );
}
/* Write XMF description for this data set */
......@@ -689,17 +655,15 @@ void write_output (struct engine *e)
/* Open file */
/* printf("write_output: Opening file '%s'.\n", fileName); */
/* message("Opening file '%s'.", fileName); */
h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT,H5P_DEFAULT);
if(h_file < 0)
{
char buf[200];
sprintf(buf, "Error while opening file '%s'", fileName);
error(buf);
error( "Error while opening file '%s'." , fileName );
}
/* Open header to read simulation properties */
/* printf("write_output: Writing runtime parameters...\n"); */
/* message("Writing runtime parameters..."); */
h_grp = H5Gcreate1(h_file, "/RuntimePars", 0);
if(h_grp < 0)
error("Error while creating runtime parameters group\n");
......@@ -711,7 +675,7 @@ void write_output (struct engine *e)
H5Gclose(h_grp);