Commit 4739cede authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

add gravity, still work in progress...


Former-commit-id: 6ae814f4b6e04f232d3e3e52ae7c50e13b23fea9
parent 15e1c515
......@@ -22,12 +22,13 @@ AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing
AM_CFLAGS = -g -O3 -std=gnu99 -Wall -Werror -ffast-math -fstrict-aliasing \
-ftree-vectorize -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
-DTIMER -DCOUNTER -DCPU_TPS=2.30e9
-DTIMER -DCOUNTER -DCPU_TPS=2.30e9 \
# -fsanitize=address -fno-omit-frame-pointer
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# Assign a "safe" version number
AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0
AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 # -fsanitize=address
# Build the libswiftsim library
lib_LTLIBRARIES = libswiftsim.la
......@@ -38,11 +39,13 @@ endif
# List required headers
include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h common_io.h
engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \
common_io.h multipole.h
# Common source files
AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c units.c common_io.c
serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
units.c common_io.c multipole.c
# Sources and flags for regular library
libswiftsim_la_SOURCES = $(AM_SOURCES)
......
......@@ -47,6 +47,7 @@
#include "timers.h"
#include "part.h"
#include "space.h"
#include "multipole.h"
#include "cell.h"
#include "error.h"
#include "inline.h"
......@@ -265,6 +266,72 @@ int cell_locktree( struct cell *c ) {
}
int cell_glocktree( struct cell *c ) {
struct cell *finger, *finger2;
TIMER_TIC
/* First of all, try to lock this cell. */
if ( c->ghold || lock_trylock( &c->glock ) != 0 ) {
TIMER_TOC(timer_locktree);
return 1;
}
/* Did somebody hold this cell in the meantime? */
if ( c->ghold ) {
/* Unlock this cell. */
if ( lock_unlock( &c->glock ) != 0 )
error( "Failed to unlock cell." );
/* Admit defeat. */
TIMER_TOC(timer_locktree);
return 1;
}
/* Climb up the tree and lock/hold/unlock. */
for ( finger = c->parent ; finger != NULL ; finger = finger->parent ) {
/* Lock this cell. */
if ( lock_trylock( &finger->glock ) != 0 )
break;
/* Increment the hold. */
__sync_fetch_and_add( &finger->ghold , 1 );
/* Unlock the cell. */
if ( lock_unlock( &finger->glock ) != 0 )
error( "Failed to unlock cell." );
}
/* If we reached the top of the tree, we're done. */
if ( finger == NULL ) {
TIMER_TOC(timer_locktree);
return 0;
}
/* Otherwise, we hit a snag. */
else {
/* Undo the holds up to finger. */
for ( finger2 = c->parent ; finger2 != finger ; finger2 = finger2->parent )
__sync_fetch_and_sub( &finger2->ghold , 1 );
/* Unlock this cell. */
if ( lock_unlock( &c->glock ) != 0 )
error( "Failed to unlock cell." );
/* Admit defeat. */
TIMER_TOC(timer_locktree);
return 1;
}
}
/**
* @brief Unock a cell's parents.
*
......@@ -289,6 +356,24 @@ void cell_unlocktree( struct cell *c ) {
}
void cell_gunlocktree( struct cell *c ) {
struct cell *finger;
TIMER_TIC
/* First of all, try to unlock this cell. */
if ( lock_unlock( &c->glock ) != 0 )
error( "Failed to unlock cell." );
/* Climb up the tree and unhold the parents. */
for ( finger = c->parent ; finger != NULL ; finger = finger->parent )
__sync_fetch_and_sub( &finger->ghold , 1 );
TIMER_TOC(timer_locktree);
}
/**
* @brief Sort the parts into eight bins along the given pivots.
*
......@@ -297,20 +382,21 @@ void cell_unlocktree( struct cell *c ) {
void cell_split ( struct cell *c ) {
int i, j, k;
int i, j, k, count = c->count, gcount = c->gcount;
struct part temp, *parts = c->parts;
struct xpart xtemp, *xparts = c->xparts;
struct gpart gtemp, *gparts = c->gparts;
int left[8], right[8];
double pivot[3];
/* Init the pivot. */
/* Init the pivots. */
for ( k = 0 ; k < 3 ; k++ )
pivot[k] = c->loc[k] + c->h[k]/2;
/* Split along the x-axis. */
i = 0; j = c->count - 1;
i = 0; j = count - 1;
while ( i <= j ) {
while ( i <= c->count-1 && parts[i].x[0] <= pivot[0] )
while ( i <= count-1 && parts[i].x[0] <= pivot[0] )
i += 1;
while ( j >= 0 && parts[j].x[0] > pivot[0] )
j -= 1;
......@@ -322,10 +408,10 @@ void cell_split ( struct cell *c ) {
/* for ( k = 0 ; k <= j ; k++ )
if ( parts[k].x[0] > pivot[0] )
error( "cell_split: sorting failed." );
for ( k = i ; k < c->count ; k++ )
for ( k = i ; k < count ; k++ )
if ( parts[k].x[0] < pivot[0] )
error( "cell_split: sorting failed." ); */
left[1] = i; right[1] = c->count - 1;
left[1] = i; right[1] = count - 1;
left[0] = 0; right[0] = j;
/* Split along the y axis, twice. */
......@@ -387,13 +473,17 @@ void cell_split ( struct cell *c ) {
c->progeny[k]->xparts = &c->xparts[ left[k] ];
}
/* Re-link the gparts. */
for ( k = 0 ; k < count ; k++ )
parts[k].gpart->part = &parts[k];
/* Verify that _all_ the parts have been assigned to a cell. */
/* for ( k = 1 ; k < 8 ; k++ )
if ( &c->progeny[k-1]->parts[ c->progeny[k-1]->count ] != c->progeny[k]->parts )
error( "Particle sorting failed (internal consistency)." );
if ( c->progeny[0]->parts != c->parts )
error( "Particle sorting failed (left edge)." );
if ( &c->progeny[7]->parts[ c->progeny[7]->count ] != &c->parts[ c->count ] )
if ( &c->progeny[7]->parts[ c->progeny[7]->count ] != &c->parts[ count ] )
error( "Particle sorting failed (right edge)." ); */
/* Verify a few sub-cells. */
......@@ -413,6 +503,65 @@ void cell_split ( struct cell *c ) {
c->progeny[2]->parts[k].x[2] > pivot[2] )
error( "Sorting failed (progeny=2)." ); */
/* Now do the same song and dance for the gparts. */
/* Split along the x-axis. */
i = 0; j = gcount - 1;
while ( i <= j ) {
while ( i <= gcount-1 && gparts[i].x[0] <= pivot[0] )
i += 1;
while ( j >= 0 && gparts[j].x[0] > pivot[0] )
j -= 1;
if ( i < j ) {
gtemp = gparts[i]; gparts[i] = gparts[j]; gparts[j] = gtemp;
}
}
left[1] = i; right[1] = gcount - 1;
left[0] = 0; right[0] = j;
/* Split along the y axis, twice. */
for ( k = 1 ; k >= 0 ; k-- ) {
i = left[k]; j = right[k];
while ( i <= j ) {
while ( i <= right[k] && gparts[i].x[1] <= pivot[1] )
i += 1;
while ( j >= left[k] && gparts[j].x[1] > pivot[1] )
j -= 1;
if ( i < j ) {
gtemp = gparts[i]; gparts[i] = gparts[j]; gparts[j] = gtemp;
}
}
left[2*k+1] = i; right[2*k+1] = right[k];
left[2*k] = left[k]; right[2*k] = j;
}
/* Split along the z axis, four times. */
for ( k = 3 ; k >= 0 ; k-- ) {
i = left[k]; j = right[k];
while ( i <= j ) {
while ( i <= right[k] && gparts[i].x[2] <= pivot[2] )
i += 1;
while ( j >= left[k] && gparts[j].x[2] > pivot[2] )
j -= 1;
if ( i < j ) {
gtemp = gparts[i]; gparts[i] = gparts[j]; gparts[j] = gtemp;
}
}
left[2*k+1] = i; right[2*k+1] = right[k];
left[2*k] = left[k]; right[2*k] = j;
}
/* Store the counts and offsets. */
for ( k = 0 ; k < 8 ; k++ ) {
c->progeny[k]->gcount = right[k] - left[k] + 1;
c->progeny[k]->gparts = &c->gparts[ left[k] ];
}
/* Re-link the parts. */
for ( k = 0 ; k < gcount ; k++ )
if ( gparts[k].id > 0 )
gparts[k].part->gpart = &gparts[k];
}
......@@ -67,7 +67,7 @@ struct cell {
int depth, split, maxdepth;
/* Nr of parts. */
int count;
int count, gcount;
/* Pointers to the particle data. */
struct part *parts;
......@@ -75,12 +75,12 @@ struct cell {
/* Pointers to the extra particle data. */
struct xpart *xparts;
/* Pointers for the sorted indices. */
struct entry *sort;
unsigned int sorted;
/* Pointers to the gravity particle data. */
struct gpart *gparts;
/* Number of pairs associated with this cell. */
// int nr_pairs;
/* Pointers for the sorted indices. */
struct entry *sort, *gsort;
unsigned int sorted, gsorted;
/* Pointers to the next level of cells. */
struct cell *progeny[8];
......@@ -91,13 +91,13 @@ struct cell {
/* Super cell, i.e. the highest-level supercell that has interactions. */
struct cell *super;
/* The tasks computing this cell's sorts. */
struct task *sorts;
int sortsize;
/* The task computing this cell's sorts. */
struct task *sorts, *gsorts;
int sortsize, gsortsize;
/* The tasks computing this cell's density. */
struct link *density, *force;
int nr_density, nr_force;
struct link *density, *force, *grav;
int nr_density, nr_force, nr_grav;
/* The ghost task to link density to interactions. */
struct task *ghost, *kick1, *kick2;
......@@ -105,17 +105,17 @@ struct cell {
/* Task receiving data. */
struct task *recv_xv, *recv_rho;
/* Tasks for gravity tree. */
struct task *grav_up, *grav_down;
/* Number of tasks that are associated with this cell. */
int nr_tasks;
/* Number of tasks this cell is waiting for and whether it is in use. */
int wait;
/* Is the data of this cell being used in a sub-cell? */
int hold;
int hold, ghold;
/* Spin lock for various uses. */
lock_type lock;
lock_type lock, glock;
/* ID of the previous owner, e.g. runner. */
int owner;
......@@ -143,6 +143,9 @@ struct cell {
int pcell_size;
int tag;
/* This cell's multipole. */
struct multipole multipole;
} __attribute__((aligned (64)));
......@@ -150,6 +153,8 @@ struct cell {
void cell_split ( struct cell *c );
int cell_locktree( struct cell *c );
void cell_unlocktree( struct cell *c );
int cell_glocktree( struct cell *c );
void cell_gunlocktree( struct cell *c );
int cell_pack ( struct cell *c , struct pcell *pc );
int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s );
int cell_getsize ( struct cell *c );
......
......@@ -41,6 +41,18 @@
#define const_delta_nwneigh 1.f
#define CUBIC_SPLINE_KERNEL
/* Gravity stuff. */
#define const_theta_max 0.57735f /* Opening criteria, which is the ratio of the
cell distance over the cell width. */
#define const_G 6.67384e-8f /* Gravitational constant. */
#define const_epsilon 0.0014f /* Gravity blending distance. */
#define const_iepsilon 714.285714286f /* Inverse gravity blending distance. */
#define const_iepsilon2 (const_iepsilon*const_iepsilon)
#define const_iepsilon3 (const_iepsilon2*const_iepsilon)
#define const_iepsilon4 (const_iepsilon2*const_iepsilon2)
#define const_iepsilon5 (const_iepsilon3*const_iepsilon2)
#define const_iepsilon6 (const_iepsilon3*const_iepsilon3)
/* SPH variant to use */
#define LEGACY_GADGET2_SPH
......
......@@ -34,14 +34,14 @@
*
* (Should be used for debugging only as it runs in O(N).)
*/
void printParticle ( struct part *parts , long long int id, int N ) {
int i;
int i, found = 0;
/* Look for the particle. */
for ( i = 0 ; i < N && parts[i].id != id; i++ );
if ( i < N )
for ( i = 0 ; i < N ; i++ )
if ( parts[i].id == id ) {
printf("## Particle[%d]: id=%lld, x=[%.16e,%.16e,%.16e], v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], h=%.3e, h_dt=%.3e, wcount=%.3e, m=%.3e, rho=%.3e, rho_dh=%.3e, div_v=%.3e, u=%.3e, dudt=%.3e, bals=%.3e, POrho2=%.3e, v_sig=%.3e, dt=%.3e\n",
i,
parts[i].id,
......@@ -61,17 +61,47 @@ void printParticle ( struct part *parts , long long int id, int N ) {
parts[i].force.v_sig,
parts[i].dt
);
else
found = 1;
}
if ( !found )
printf("## Particles[???] id=%lld not found\n", id);
}
void printgParticle ( struct gpart *parts , long long int id, int N ) {
int i, found = 0;
/* Look for the particle. */
for ( i = 0 ; i < N ; i++ )
if ( parts[i].id == -id || ( parts[i].id > 0 && parts[i].part->id == id ) ) {
printf("## gParticle[%d]: id=%lld, x=[%.16e,%.16e,%.16e], v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], m=%.3e, dt=%.3e\n",
i,
(parts[i].id < 0) ? -parts[i].id : parts[i].part->id ,
parts[i].x[0], parts[i].x[1], parts[i].x[2],
parts[i].v[0], parts[i].v[1], parts[i].v[2],
parts[i].a[0], parts[i].a[1], parts[i].a[2],
parts[i].mass,
parts[i].dt
);
found = 1;
}
if ( !found )
printf("## Particles[???] id=%lld not found\n", id);
}
/**
* @brief Prints the details of a given particle to stdout
*
* @param p The particle to print
*
*/
void printParticle_single ( struct part *p ) {
printf("## Particle: id=%lld, x=[%e,%e,%e], v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], h=%.3e, h_dt=%.3e, wcount=%.3e, m=%.3e, rho=%.3e, rho_dh=%.3e, div_v=%.3e, u=%.3e, dudt=%.3e, bals=%.3e, POrho2=%.3e, v_sig=%.3e, dt=%.3e\n",
......
......@@ -21,4 +21,5 @@
void printParticle(struct part *parts, long long int i, int N);
void printgParticle(struct gpart *parts, long long int i, int N);
void printParticle_single ( struct part *p );
......@@ -54,6 +54,7 @@
#include "part.h"
#include "debug.h"
#include "space.h"
#include "multipole.h"
#include "cell.h"
#include "queue.h"
#include "scheduler.h"
......@@ -553,6 +554,30 @@ void engine_repartition ( struct engine *e ) {
}
/**
* @brief Add up/down gravity tasks to a cell hierarchy.
*
* @param e The #engine.
* @param c The #cell
* @param up The upward gravity #task.
* @param down The downward gravity #task.
*/
void engine_addtasks_grav ( struct engine *e , struct cell *c , struct task *up , struct task *down ) {
/* Link the tasks to this cell. */
c->grav_up = up;
c->grav_down = down;
/* Recurse? */
if ( c->split )
for ( int k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL )
engine_addtasks_grav( e , c->progeny[k] , up , down );
}
/**
* @brief Add send tasks to a hierarchy of cells.
*
......@@ -887,6 +912,9 @@ void engine_maketasks ( struct engine *e ) {
struct space *s = e->s;
struct scheduler *sched = &e->sched;
struct cell *cells = s->cells;
int nr_cells = s->nr_cells;
int nodeID = e->nodeID;
int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd, sid;
int *cdim = s->cdim;
struct task *t, *t2;
......@@ -900,12 +928,12 @@ void engine_maketasks ( struct engine *e ) {
for ( j = 0 ; j < cdim[1] ; j++ )
for ( k = 0 ; k < cdim[2] ; k++ ) {
cid = cell_getid( cdim , i , j , k );
if ( s->cells[cid].count == 0 )
if ( cells[cid].count == 0 )
continue;
ci = &s->cells[cid];
ci = &cells[cid];
if ( ci->count == 0 )
continue;
if ( ci->nodeID == e->nodeID )
if ( ci->nodeID == nodeID )
scheduler_addtask( sched , task_type_self , task_subtype_density , 0 , 0 , ci , NULL , 0 );
for ( ii = -1 ; ii < 2 ; ii++ ) {
iii = i + ii;
......@@ -923,15 +951,22 @@ void engine_maketasks ( struct engine *e ) {
continue;
kkk = ( kkk + cdim[2] ) % cdim[2];
cjd = cell_getid( cdim , iii , jjj , kkk );
cj = &s->cells[cjd];
cj = &cells[cjd];
if ( cid >= cjd || cj->count == 0 ||
( ci->nodeID != e->nodeID && cj->nodeID != e->nodeID ) )
( ci->nodeID != nodeID && cj->nodeID != nodeID ) )
continue;
sid = sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ];
t = scheduler_addtask( sched , task_type_pair , task_subtype_density , sid , 0 , ci , cj , 1 );
scheduler_addtask( sched , task_type_pair , task_subtype_density , sid , 0 , ci , cj , 1 );
}
}
}
}
/* Add the gravity mm tasks. */
for ( i = 0 ; i < nr_cells ; i++ ) {
scheduler_addtask( sched , task_type_grav_mm , task_subtype_none , -1 , 0 , &cells[i] , NULL , 0 );
for ( j = i+1 ; j < nr_cells ; j++ )
scheduler_addtask( sched , task_type_grav_mm , task_subtype_none , -1 , 0 , &cells[i] , &cells[j] , 0 );
}
/* Split the tasks. */
......@@ -944,27 +979,44 @@ void engine_maketasks ( struct engine *e ) {
error( "Failed to allocate cell-task links." );
e->nr_links = 0;
/* Add the gravity up/down tasks at the top-level cells and push them down. */
for ( k = 0 ; k < nr_cells ; k++ )
if ( cells[k].nodeID == nodeID ) {
/* Create tasks at top level. */
struct task *up = scheduler_addtask( sched , task_type_grav_up , task_subtype_none , 0 , 0 , &cells[k] , NULL , 0 );
struct task *down = scheduler_addtask( sched , task_type_grav_down , task_subtype_none , 0 , 0 , &cells[k] , NULL , 0 );
/* Push tasks down the cell hierarchy. */
engine_addtasks_grav( e , &cells[k] , up , down );
}
/* Count the number of tasks associated with each cell and
store the density tasks in each cell, and make each sort
depend on the sorts of its progeny. */
// #pragma omp parallel for private(t,j)
for ( k = 0 ; k < sched->nr_tasks ; k++ ) {
/* Get the current task. */
t = &sched->tasks[k];
if ( t->skip )
continue;
/* Link sort tasks together. */
if ( t->type == task_type_sort && t->ci->split )
for ( j = 0 ; j < 8 ; j++ )
if ( t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL ) {
t->ci->progeny[j]->sorts->skip = 0;
scheduler_addunlock( sched , t->ci->progeny[j]->sorts , t );
}
/* Link density tasks to cells. */
if ( t->type == task_type_self ) {
atomic_inc( &t->ci->nr_tasks );
if ( t->subtype == task_subtype_density ) {
t->ci->density = engine_addlink( e , t->ci->density , t );
atomic_inc( &t->ci->nr_density );
if ( t->ci->nr_density > 27*8 )
error( "Density overflow." );
}
}
else if ( t->type == task_type_pair ) {
......@@ -975,8 +1027,6 @@ void engine_maketasks ( struct engine *e ) {
atomic_inc( &t->ci->nr_density );
t->cj->density = engine_addlink( e , t->cj->density , t );
atomic_inc( &t->cj->nr_density );
if ( t->ci->nr_density > 8*27 || t->cj->nr_density > 8*27 )
error( "Density overflow." );
}
}
else if ( t->type == task_type_sub ) {
......@@ -989,16 +1039,28 @@ void engine_maketasks ( struct engine *e ) {
if ( t->cj != NULL ) {
t->cj->density = engine_addlink( e , t->cj->density , t );
atomic_inc( &t->cj->nr_density );
if ( t->cj->nr_density > 8*27 )
error( "Density overflow." );
}
}
}
/* Link gravity multipole tasks to the up/down tasks. */
if ( t->type == task_type_grav_mm ||
( t->type == task_type_sub && t->subtype == task_subtype_grav ) ) {
atomic_inc( &t->ci->nr_tasks );
scheduler_addunlock( sched , t->ci->grav_up , t );
scheduler_addunlock( sched , t , t->ci->grav_down );
if ( t->cj != NULL && t->ci->grav_up != t->cj->grav_up ) {
scheduler_addunlock( sched , t->cj->grav_up , t );
scheduler_addunlock( sched , t , t->cj->grav_down );
}
}
/* Append a ghost task to each cell. */
for ( k = 0 ; k < s->nr_cells ; k++ )
engine_mkghosts( e , &s->cells[k] , NULL );
}
/* Append a ghost task to each cell, and add kick2 tasks to the
super cells. */
for ( k = 0 ; k < nr_cells ; k++ )
engine_mkghosts( e , &cells[k] , NULL );
/* if ( e->policy & engine_policy_fixdt )
space_map_cells_pre( s , 1 , &scheduler_map_mkghosts_nokick1 , sched );
else
......@@ -1031,12 +1093,12 @@ void engine_maketasks ( struct engine *e ) {
/* Otherwise, pair interaction? */
else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) {
t2 = scheduler_addtask( sched , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , 0 );