Commit 838ec603 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

made the second kick a regular task.


Former-commit-id: e7c80f9cd2e92e223a2311302aa6d2a7e8c68a73
parent 10014e35
......@@ -22,3 +22,4 @@
#define atomic_add(v,i) __sync_fetch_and_add( v , i )
#define atomic_inc(v) atomic_add( v , 1 )
#define atomic_dec(v) atomic_add( v , -1 )
......@@ -88,7 +88,7 @@ struct cell {
int nr_density;
/* The ghost task to link density to interactions. */
struct task *ghost;
struct task *ghost, *kick2;
/* Number of tasks that are associated with this cell. */
int nr_tasks;
......@@ -105,6 +105,15 @@ struct cell {
/* ID of the previous owner, e.g. runner. */
int owner;
/* Momentum of particles in cell. */
float mom[3], ang[3];
/* Potential and kinetic energy of particles in this cell. */
float epot, ekin;
/* Number of particles updated in this cell. */
int updated;
/* Linking pointer for "memory management". */
struct cell *next;
......
......@@ -33,6 +33,7 @@
/* Local headers. */
#include "cycle.h"
#include "atomic.h"
#include "timers.h"
#include "const.h"
#include "vector.h"
......@@ -94,7 +95,7 @@ void engine_prepare ( struct engine *e ) {
if ( s->tasks[k].skip )
continue;
for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
__sync_add_and_fetch( &s->tasks[k].unlock_tasks[j]->wait , 1 );
atomic_inc( &s->tasks[k].unlock_tasks[j]->wait );
}
// printf( "engine_prepare: preparing task dependencies took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
......@@ -288,6 +289,51 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
}
/**
* @brief Mapping function to collect the data from the second kick.
*/
void engine_collect_kick2 ( struct cell *c ) {
int k, updated = 0;
float dt_min = FLT_MAX, dt_max = 0.0f, ekin = 0.0f, epot = 0.0f;
float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f };
struct cell *cp;
/* If I am a super-cell, return immediately. */
if ( c->kick2 != NULL )
return;
/* If this cell is not split, I'm in trouble. */
if ( !c->split )
error( "Cell has no super-cell." );
/* Collect the values from the progeny. */
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL ) {
cp = c->progeny[k];
engine_collect_kick2( cp );
dt_min = fminf( dt_min , cp->dt_min );
dt_max = fmaxf( dt_max , cp->dt_max );
updated += cp->updated;
ekin += cp->ekin;
epot += cp->epot;
mom[0] += cp->mom[0]; mom[1] += cp->mom[1]; mom[2] += cp->mom[2];
ang[0] += cp->ang[0]; ang[1] += cp->ang[1]; ang[2] += cp->ang[2];
}
/* Store the collected values in the cell. */
c->dt_min = dt_min;
c->dt_max = dt_max;
c->updated = updated;
c->ekin = ekin;
c->epot = epot;
c->mom[0] = mom[0]; c->mom[1] = mom[1]; c->mom[2] = mom[2];
c->ang[0] = ang[0]; c->ang[1] = ang[1]; c->ang[2] = ang[2];
}
/**
* @brief Compute the force on a single particle brute-force.
*/
......@@ -354,16 +400,13 @@ void engine_single ( double *dim , long long int pid , struct part *__restrict__
void engine_step ( struct engine *e , int sort_queues ) {
int k, nr_parts = e->s->nr_parts;
struct part *restrict parts = e->s->parts, *restrict p;
struct xpart *restrict xp;
float dt = e->dt, hdt = 0.5*dt, dt_step, dt_max, dt_min, ldt_min, ldt_max;
float pdt, h, m, v[3], u;
double x[3];
double epot = 0.0, ekin = 0.0, lepot, lekin, lmom[3], mom[3] = { 0.0 , 0.0 , 0.0 };
double lang[3], ang[3] = { 0.0 , 0.0 , 0.0 };
int k;
float dt = e->dt, dt_step, dt_max = 0.0f, dt_min = FLT_MAX;
float epot = 0.0, ekin = 0.0, mom[3] = { 0.0 , 0.0 , 0.0 };
float ang[3] = { 0.0 , 0.0 , 0.0 };
// double lent, ent = 0.0;
int threadID, nthreads, count = 0, lcount;
int count = 0;
struct cell *c;
TIMER_TIC2
......@@ -424,83 +467,19 @@ void engine_step ( struct engine *e , int sort_queues ) {
// printParticle( parts , 432626 );
// printParticle( parts , 432628 );
/* Second kick. */
TIMER_TIC_ND
dt_min = FLT_MAX; dt_max = 0.0f;
#pragma omp parallel private(p,xp,k,ldt_min,lcount,ldt_max,lmom,lang,lekin,lepot,threadID,nthreads,pdt,v,h,m,x,u)
{
threadID = omp_get_thread_num();
nthreads = omp_get_num_threads();
ldt_min = FLT_MAX; ldt_max = 0.0f;
lmom[0] = 0.0; lmom[1] = 0.0; lmom[2] = 0.0;
lang[0] = 0.0; lang[1] = 0.0; lang[2] = 0.0;
lekin = 0.0; lepot = 0.0; /* lent=0.0; */ lcount = 0;
for ( k = nr_parts * threadID / nthreads ; k < nr_parts * (threadID + 1) / nthreads ; k++ ) {
/* Get a handle on the part. */
p = &parts[k];
xp = p->xtras;
/* Get local copies of particle data. */
pdt = p->dt;
h = p->h;
m = p->mass;
x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2];
/* Scale the derivatives if they're freshly computed. */
if ( pdt <= dt_step ) {
p->force.h_dt *= h * 0.333333333f;
lcount += 1;
}
/* Update the particle's time step. */
p->dt = pdt = const_cfl * h / ( p->force.v_sig );
/* Update positions and energies at the half-step. */
p->v[0] = v[0] = xp->v_old[0] + hdt * p->a[0];
p->v[1] = v[1] = xp->v_old[1] + hdt * p->a[1];
p->v[2] = v[2] = xp->v_old[2] + hdt * p->a[2];
p->u = u = xp->u_old + hdt * p->force.u_dt;
/* Get the smallest/largest dt. */
ldt_min = fminf( ldt_min , pdt );
ldt_max = fmaxf( ldt_max , pdt );
/* Collect total energy. */
lekin += 0.5 * m * ( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] );
lepot += m * u;
/* Collect momentum */
lmom[0] += m * p->v[0];
lmom[1] += m * p->v[1];
lmom[2] += m * p->v[2];
/* Collect angular momentum */
lang[0] += m * ( x[1]*v[2] - v[2]*x[1] );
lang[1] += m * ( x[2]*v[0] - v[0]*x[2] );
lang[2] += m * ( x[0]*v[1] - v[1]*x[0] );
/* Collect entropic function */
// lent += u * pow( p->rho, 1.f-const_gamma );
}
#pragma omp critical
{
dt_min = fminf( dt_min , ldt_min );
dt_max = fmaxf( dt_max , ldt_max );
mom[0] += lmom[0];
mom[1] += lmom[1];
mom[2] += lmom[2];
ang[0] += lang[0];
ang[1] += lang[1];
ang[2] += lang[2];
// ent += (const_gamma -1.) * lent;
epot += lepot;
ekin += lekin;
count += lcount;
/* Collect the cell data from the second kick. */
for ( k = 0 ; k < e->s->nr_cells ; k++ ) {
c = &e->s->cells[k];
engine_collect_kick2( c );
dt_min = fminf( dt_min , c->dt_min );
dt_max = fmaxf( dt_max , c->dt_max );
ekin += c->ekin;
epot += c->epot;
count += c->updated;
mom[0] += c->mom[0]; mom[1] += c->mom[1]; mom[2] += c->mom[2];
ang[0] += c->ang[0]; ang[1] += c->ang[1]; ang[2] += c->ang[2];
}
}
TIMER_TOC( timer_kick2 );
e->dt_min = dt_min;
// printParticle( parts , 432626 );
printf( "engine_step: dt_min/dt_max is %e/%e.\n" , dt_min , dt_max ); fflush(stdout);
......
......@@ -309,8 +309,8 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep
continue;
/* Get the score for this task. */
if ( res->type == task_type_self || res->type == task_type_ghost || res->type == task_type_sort || ( res->type == task_type_sub && res->cj == NULL ) )
score = ( res->ci->owner == rid );
if ( res->cj == NULL )
score = 2 * ( res->ci->owner == rid );
else
score = ( res->ci->owner == rid ) + ( res->cj->owner == rid );
if ( score <= score_best )
......@@ -352,8 +352,7 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep
hits += 1;
/* Should we bother looking any farther? */
if ( ( qtasks[ qtid[ ind_best ] ].cj == NULL && score_best == 1 ) ||
score_best == 2 );
if ( score_best == 2 );
break;
} /* loop over the task IDs. */
......
......@@ -33,6 +33,7 @@
/* Local headers. */
#include "cycle.h"
#include "atomic.h"
#include "timers.h"
#include "const.h"
#include "lock.h"
......@@ -331,7 +332,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags , int clock )
* @brief Intermediate task between density and force
*
* @param r The runner thread.
* @param c THe cell.
* @param c The cell.
*/
void runner_doghost ( struct runner *r , struct cell *c ) {
......@@ -506,6 +507,96 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
#endif
}
/**
* @brief Compute the second kick of the given cell.
*
* @param r The runner thread.
* @param c The cell.
*/
void runner_dokick2 ( struct runner *r , struct cell *c ) {
int k, count = 0, nr_parts = c->count;
float dt_min = FLT_MAX, dt_max = 0.0f, ekin = 0.0f, epot = 0.0f;
float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f };
float x[3], v[3], u, h, pdt, m;
float dt_step = r->e->dt_step, dt = r->e->dt, hdt = 0.5f*dt;
struct part *p, *parts = c->parts;
struct xpart *xp;
TIMER_TIC
/* Loop over the particles and kick them. */
for ( k = 0 ; k < nr_parts ; k++ ) {
/* Get a handle on the part. */
p = &parts[k];
xp = p->xtras;
/* Get local copies of particle data. */
pdt = p->dt;
h = p->h;
m = p->mass;
x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2];
/* Scale the derivatives if they're freshly computed. */
if ( pdt <= dt_step ) {
p->force.h_dt *= h * 0.333333333f;
count += 1;
}
/* Update the particle's time step. */
p->dt = pdt = const_cfl * h / ( p->force.v_sig );
/* Update positions and energies at the half-step. */
p->v[0] = v[0] = xp->v_old[0] + hdt * p->a[0];
p->v[1] = v[1] = xp->v_old[1] + hdt * p->a[1];
p->v[2] = v[2] = xp->v_old[2] + hdt * p->a[2];
p->u = u = xp->u_old + hdt * p->force.u_dt;
/* Get the smallest/largest dt. */
dt_min = fminf( dt_min , pdt );
dt_max = fmaxf( dt_max , pdt );
/* Collect total energy. */
ekin += 0.5 * m * ( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] );
epot += m * u;
/* Collect momentum */
mom[0] += m * p->v[0];
mom[1] += m * p->v[1];
mom[2] += m * p->v[2];
/* Collect angular momentum */
ang[0] += m * ( x[1]*v[2] - v[2]*x[1] );
ang[1] += m * ( x[2]*v[0] - v[0]*x[2] );
ang[2] += m * ( x[0]*v[1] - v[1]*x[0] );
/* Collect entropic function */
// lent += u * pow( p->rho, 1.f-const_gamma );
}
#ifdef TIMER_VERBOSE
printf( "runner_dokick2[%02i]: %i parts at depth %i took %.3f ms.\n" ,
r->id , c->count , c->depth ,
((double)TIMER_TOC(timer_kick2)) / CPU_TPS * 1000 ); fflush(stdout);
#else
TIMER_TOC(timer_kick2);
#endif
/* Store the computed values in the cell. */
c->dt_min = dt_min;
c->dt_max = dt_max;
c->updated = count;
c->ekin = ekin;
c->epot = epot;
c->mom[0] = mom[0]; c->mom[1] = mom[1]; c->mom[2] = mom[2];
c->ang[0] = ang[0]; c->ang[1] = ang[1]; c->ang[2] = ang[2];
}
/**
......@@ -660,14 +751,17 @@ void *runner_main ( void *data ) {
if ( ci->super == ci )
runner_doghost( r , ci );
break;
case task_type_kick2:
runner_dokick2( r , ci );
break;
default:
error( "Unknown task type." );
}
/* Resolve any dependencies. */
for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
if ( __sync_fetch_and_sub( &t->unlock_tasks[k]->wait , 1 ) == 0 )
abort();
if ( atomic_dec( &t->unlock_tasks[k]->wait ) == 0 )
error( "Task negative wait." );
} /* main loop. */
......
......@@ -106,8 +106,8 @@ int space_marktasks ( struct space *s ) {
/* Single-cell task? */
else if ( t->type == task_type_self ||
t->type == task_type_ghost ||
( t->type == task_type_sub && t->cj == NULL ) ) {
t->type == task_type_ghost ||
( t->type == task_type_sub && t->cj == NULL ) ) {
/* Set this task's skip. */
t->skip = ( t->ci->dt_min > dt_step );
......@@ -736,6 +736,8 @@ void space_map_clearsort ( struct cell *c , void *data ) {
* Looks for the super cell, e.g. the highest-level cell above each
* cell for which a pair is defined. All ghosts below this cell will
* depend on the ghost of their parents (sounds spooky, but it isn't).
*
* A kick2-task is appended to each super cell.
*/
void space_map_mkghosts ( struct cell *c , void *data ) {
......@@ -754,11 +756,15 @@ void space_map_mkghosts ( struct cell *c , void *data ) {
if ( c->super != c || c->nr_tasks > 0 )
c->ghost = space_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , 0 );
/* Append a kick task if we are the active super cell. */
if ( c->super == c && c->nr_tasks > 0 )
c->kick2 = space_addtask( s , task_type_kick2 , task_subtype_none , 0 , 0 , c , NULL , 0 );
/* If we are not the super cell ourselves, make our ghost depend
on our parent cell. */
if ( c->super != c )
task_addunlock( c->parent->ghost , c->ghost );
}
......@@ -1601,7 +1607,9 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Append a ghost task to each cell. */
space_map_cells_pre( s , 1 , &space_map_mkghosts , s );
/* Run through the tasks and make force tasks for each density task. */
/* Run through the tasks and make force tasks for each density task.
Each force task depends on the cell ghosts and unlocks the kick2 task
of its super-cell. */
kk = s->nr_tasks;
#pragma omp parallel for private(t,t2)
for ( k = 0 ; k < kk ; k++ ) {
......@@ -1618,6 +1626,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
task_addunlock( t , t->ci->super->ghost );
t2 = space_addtask( s , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , 0 );
task_addunlock( t->ci->ghost , t2 );
task_addunlock( t2 , t->ci->super->kick2 );
}
/* Otherwise, pair interaction? */
......@@ -1628,17 +1637,23 @@ void space_maketasks ( struct space *s , int do_sort ) {
t2 = space_addtask( s , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , 0 );
task_addunlock( t->ci->ghost , t2 );
task_addunlock( t->cj->ghost , t2 );
task_addunlock( t2 , t->ci->super->kick2 );
if ( t->ci->super != t->cj->super )
task_addunlock( t2 , t->cj->super->kick2 );
}
/* Otherwise, sub interaction? */
else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->super->ghost );
if ( t->cj != NULL )
if ( t->cj != NULL && t->ci->super != t->cj->super )
task_addunlock( t , t->cj->super->ghost );
t2 = space_addtask( s , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , 0 );
task_addunlock( t->ci->ghost , t2 );
if ( t->cj != NULL )
task_addunlock( t->cj->ghost , t2 );
task_addunlock( t2 , t->ci->super->kick2 );
if ( t->cj != NULL && t->ci->super != t->cj->super )
task_addunlock( t2 , t->cj->super->kick2 );
}
}
......
......@@ -28,7 +28,7 @@
#define space_subsize_default 1000
#define space_dosub 0
#define space_stretch 1.05
#define space_maxtaskspercell 30
#define space_maxtaskspercell 31
/* Convert cell location to ID. */
......
......@@ -31,6 +31,7 @@ enum task_types {
task_type_pair,
task_type_sub,
task_type_ghost,
task_type_kick2,
task_type_count
};
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment