Commit aae80764 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

added force calculation alongside density calculation.


Former-commit-id: 48ebccf90e923839f803f63c2d03f8d8b286aa2a
parent c11145cb
......@@ -22,6 +22,7 @@
/* Local headers. */
#include "cycle.h"
#include "const.h"
#include "lock.h"
#include "task.h"
#include "part.h"
......
......@@ -21,6 +21,7 @@
/* Some constants. */
#define part_maxwait 3
#define part_maxunlock 39
#define part_dtmax 10
/* Data of a single particle. */
......@@ -30,14 +31,17 @@ struct part {
float h;
/* Particle time-step. */
float dt;
int dt;
/* Particle ID. */
int id;
/* Particle mass. */
float mass;
/* Particle density. */
float rho;
/* Particle ID. */
int id;
/* Particle position. */
double x[3];
......@@ -50,8 +54,8 @@ struct part {
/* Particle pressure. */
float P;
/* Particle mass. */
float m;
/* Aggregate quantities. */
float POrho2;
/* Particle internal energy. */
float u;
......@@ -59,11 +63,15 @@ struct part {
/* Change in particle energy over time. */
float u_dt;
/* Change in smoothing length over time. */
float h_dt;
/* Derivative of the density with respect to this particle's smoothing length. */
float rho_dh;
/* Particle number density. */
int icount;
float wcount;
} __attribute__((aligned (32)));
......
......@@ -306,7 +306,7 @@ struct task *queue_gettask_new ( struct queue *q , int rid , int blocking , int
continue;
/* Get the score for this task. */
if ( res->type == task_type_self || res->type == task_type_sort || ( res->type == task_type_sub && res->cj == NULL ) )
if ( res->type == task_type_self || res->type == task_type_ghost || res->type == task_type_sort || ( res->type == task_type_sub && res->cj == NULL ) )
score = ( res->ci->owner == rid );
else
score = ( res->ci->owner == rid ) + ( res->cj->owner == rid );
......
......@@ -33,6 +33,7 @@
/* Local headers. */
#include "cycle.h"
#include "const.h"
#include "lock.h"
#include "task.h"
#include "part.h"
......@@ -485,6 +486,56 @@ void runner_dosort ( struct runner_thread *rt , struct cell *c , int flags ) {
#endif
}
/**
* @brief Intermediate task between density and force
*
* @param rt The runner thread.
* @param ci THe cell.
*/
void runner_doghost ( struct runner_thread *r , struct cell *c ) {
struct part *p;
int i, k;
TIMER_TIC
/* If this cell has progeny, don't bother. */
if ( c->split )
return;
/* Loop over the parts in this cell. */
for ( i = 0 ; i < c->count ; i++ ) {
/* Get a direct pointer on the part. */
p = &c->parts[i];
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
/* Compute the pressure. */
p->P = p->rho * p->u * ( const_gamma - 1.0f );
/* Compute the P/Omega/rho2. */
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
}
#ifdef TIMER_VERBOSE
printf( "runner_doghost[%02i]: %i parts at depth %i took %.3f ms.\n" ,
rt->id , c->count , c->depth ,
((double)TIMER_TOC(runner_timer_doghost)) / CPU_TPS * 1000 ); fflush(stdout);
#else
TIMER_TOC(runner_timer_doghost);
#endif
}
/**
......@@ -677,6 +728,7 @@ void *runner_main ( void *data ) {
cell_unlocktree( cj );
break;
case task_type_ghost:
runner_doghost( rt , ci );
break;
default:
error( "Unknown task type." );
......
......@@ -33,9 +33,13 @@
enum {
runner_timer_none = 0,
runner_timer_dosort,
runner_timer_doself,
runner_timer_dopair,
runner_timer_dosub,
runner_timer_doself_density,
runner_timer_doself_force,
runner_timer_dopair_density,
runner_timer_dopair_force,
runner_timer_dosub_density,
runner_timer_dosub_force,
runner_timer_doghost,
runner_timer_getpair,
runner_timer_steal,
runner_timer_stalled,
......@@ -155,6 +159,7 @@ struct runner {
/* Function prototypes. */
void runner_run ( struct runner *r , int sort_queues );
void runner_doghost ( struct runner_thread *rt , struct cell *c );
void runner_dopair_density ( struct runner_thread *rt , struct cell *ci , struct cell *cj );
void runner_doself_density ( struct runner_thread *rt , struct cell *c );
void runner_dosub_density ( struct runner_thread *rt , struct cell *ci , struct cell *cj , int flags );
......
......@@ -41,6 +41,15 @@
#define IACT2(f) PASTE(runner_iact,f)
#define IACT IACT2(FUNCTION)
#define TIMER_DOSELF2(f) PASTE(runner_timer_doself,f)
#define TIMER_DOSELF TIMER_DOSELF2(FUNCTION)
#define TIMER_DOPAIR2(f) PASTE(runner_timer_dopair,f)
#define TIMER_DOPAIR TIMER_DOPAIR2(FUNCTION)
#define TIMER_DOSUB2(f) PASTE(runner_timer_dosub,f)
#define TIMER_DOSUB TIMER_DOSUB2(FUNCTION)
/**
* @brief Compute the interactions between a cell pair.
......@@ -56,7 +65,8 @@ void DOPAIR_NAIVE ( struct runner_thread *rt , struct cell *ci , struct cell *cj
int pid, pjd, k, count_i = ci->count, count_j = cj->count;
double shift[3] = { 0.0 , 0.0 , 0.0 };
struct part *pi, *pj, *parts_i = ci->parts, *parts_j = cj->parts;
double dx[3], pix[3], hi, hi2, r2;
double pix[3];
float dx[3], hi, hi2, r2;
TIMER_TIC
/* Get the relative distance between the pairs, wrapping. */
......@@ -98,7 +108,7 @@ void DOPAIR_NAIVE ( struct runner_thread *rt , struct cell *ci , struct cell *cj
/* Hit or miss? */
if ( r2 < hi2 || r2 < pj->h*pj->h ) {
IACT( r2 , hi , pj->h , pi , pj );
IACT( r2 , dx , hi , pj->h , pi , pj );
}
......@@ -107,9 +117,9 @@ void DOPAIR_NAIVE ( struct runner_thread *rt , struct cell *ci , struct cell *cj
} /* loop over the parts in ci. */
#ifdef TIMER_VERBOSE
printf( "runner_dopair_naive[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , rt->id , count_i , count_j , ci->depth , ci->r_max , cj->r_max , ((double)TIMER_TOC(runner_timer_dopair)) / CPU_TPS * 1000 );
printf( "runner_dopair_naive[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , rt->id , count_i , count_j , ci->depth , ci->r_max , cj->r_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 );
#else
TIMER_TOC(runner_timer_dopair);
TIMER_TOC(TIMER_DOPAIR);
#endif
......@@ -132,7 +142,8 @@ void DOPAIR ( struct runner_thread *rt , struct cell *ci , struct cell *cj ) {
struct cell *temp;
struct entry *sort_i, *sort_j;
struct part *pi, *pj, *parts_i, *parts_j;
double dx[3], pix[3], pjx[3], hi, hi2, hj, hj2, r2, di, dj;
double pix[3], pjx[3], di, dj;
float dx[3], hi, hi2, hj, hj2, r2;
double hi_max, hj_max, di_max, dj_min;
int count_i, count_j;
TIMER_TIC
......@@ -214,7 +225,7 @@ void DOPAIR ( struct runner_thread *rt , struct cell *ci , struct cell *cj ) {
/* Hit or miss? */
if ( r2 < hi2 ) {
IACT( r2 , hi , pj->h , pi , pj );
IACT( r2 , dx , hi , pj->h , pi , pj );
}
......@@ -255,7 +266,7 @@ void DOPAIR ( struct runner_thread *rt , struct cell *ci , struct cell *cj ) {
/* Hit or miss? */
if ( r2 < hj2 && r2 > pi->h*pi->h ) {
IACT( r2 , pi->h , hj , pi , pj );
IACT( r2 , dx , pi->h , hj , pi , pj );
}
......@@ -264,9 +275,9 @@ void DOPAIR ( struct runner_thread *rt , struct cell *ci , struct cell *cj ) {
} /* loop over the parts in ci. */
#ifdef TIMER_VERBOSE
printf( "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) took %.3f ms.\n" , rt->id , count_i , count_j , ci->depth , ci->r_max , cj->r_max , fmax(ci->h[0],fmax(ci->h[1],ci->h[2])) , ((double)(TIMER_TOC(runner_timer_dopair))) / CPU_TPS * 1000 );
printf( "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) took %.3f ms.\n" , rt->id , count_i , count_j , ci->depth , ci->r_max , cj->r_max , fmax(ci->h[0],fmax(ci->h[1],ci->h[2])) , ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000 );
#else
TIMER_TOC(runner_timer_dopair);
TIMER_TOC(TIMER_DOPAIR);
#endif
}
......@@ -282,7 +293,8 @@ void DOPAIR ( struct runner_thread *rt , struct cell *ci , struct cell *cj ) {
void DOSELF ( struct runner_thread *rt , struct cell *c ) {
int k, pid, pjd, count = c->count;
double pix[3], dx[3], hi, hi2, r2;
double pix[3];
float dx[3], hi, hi2, r2;
struct part *pi, *pj, *parts = c->parts;
TIMER_TIC
......@@ -317,7 +329,7 @@ void DOSELF ( struct runner_thread *rt , struct cell *c ) {
/* Hit or miss? */
if ( r2 < hi2 || r2 < pj->h*pj->h ) {
IACT( r2 , hi , pj->h , pi , pj );
IACT( r2 , dx , hi , pj->h , pi , pj );
}
......@@ -326,9 +338,9 @@ void DOSELF ( struct runner_thread *rt , struct cell *c ) {
} /* loop over all particles. */
#ifdef TIMER_VERBOSE
printf( "runner_doself[%02i]: %i parts at depth %i took %.3f ms.\n" , rt->id , count , c->depth , ((double)TIMER_TOC(runner_timer_doself)) / CPU_TPS * 1000 );
printf( "runner_doself[%02i]: %i parts at depth %i took %.3f ms.\n" , rt->id , count , c->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
#else
TIMER_TOC(runner_timer_doself);
TIMER_TOC(TIMER_DOSELF);
#endif
}
......@@ -533,9 +545,9 @@ void DOSUB ( struct runner_thread *rt , struct cell *ci , struct cell *cj , int
#ifdef TIMER_VERBOSE
printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , rt->id , flags , ci->depth , ((double)TIMER_TOC(runner_timer_dosub)) / CPU_TPS * 1000 );
printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , rt->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
#else
TIMER_TOC(runner_timer_dosub);
TIMER_TOC(TIMER_DOSUB);
#endif
}
......
......@@ -16,59 +16,88 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Coefficients for the kernel. */
#define kernel_degree 3
#define kernel_ivals 2
#define kernel_gamma 0.5f
#define kernel_igamma 2.0f
#define kernel_igamma3 8.0f
#define kernel_igamma4 16.0f
static float kernel_coeffs[ (kernel_degree + 1) * (kernel_ivals + 1) ] =
{ 3.0/4.0*M_1_PI , -3.0/2.0*M_1_PI , 0.0 , M_1_PI ,
-0.25*M_1_PI , 3.0/2.0*M_1_PI , -3.0*M_1_PI , M_2_PI ,
0.0 , 0.0 , 0.0 , 0.0 };
#define kernel_root(h) ( 1.0f/((h)*(h)*(h))*kernel_igamma3*kernel_coeffs[ kernel_degree ] )
/**
* @brief Helper function to evaluate the kernel at a given x.
*/
__attribute__ ((always_inline)) INLINE static void kernel_deval ( float x , float *W , float *dW_dx ) {
int ind = fmin( x , kernel_ivals );
float *coeffs = &kernel_coeffs[ ind*(kernel_degree + 1) ];
float w = coeffs[0]*x + coeffs[1];
float dw_dx = coeffs[0];
for ( int k = 2 ; k <= kernel_degree ; k++ ) {
dw_dx = dw_dx*x + w;
w = x*w + coeffs[k];
}
*W = w;
*dW_dx = dw_dx;
}
__attribute__ ((always_inline)) INLINE static void kernel_eval ( float x , float *W ) {
int ind = fmin( x , kernel_ivals );
float *coeffs = &kernel_coeffs[ ind*(kernel_degree + 1) ];
float w = coeffs[0]*x + coeffs[1];
for ( int k = 2 ; k <= kernel_degree ; k++ )
w = x*w + coeffs[k];
*W = w;
}
__attribute__ ((always_inline)) INLINE void runner_iact_density ( float r2 , float hi , float hj , struct part *pi , struct part *pj ) {
/**
* @brief Density kernel
*/
#define KERNEL_COEFF_1 2.546479089470f
#define KERNEL_COEFF_2 15.278874536822f
#define KERNEL_COEFF_3 45.836623610466f
#define KERNEL_COEFF_4 30.557749073644f
#define KERNEL_COEFF_5 5.092958178941f
#define KERNEL_COEFF_6 (-15.278874536822f)
#define NORM_COEFF 4.188790204786f
__attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r2 , float *dx , float hi , float hj , struct part *pi , struct part *pj ) {
float r = sqrtf( r2 );
float ui, uj, wi, wj;
float ui_dh, uj_dh, wi_dh, wj_dh;
float xi, xj;
float hg_inv, hg2_inv;
float wi, wj, wi_dx, wj_dx;
if ( r2 < hi*hi && pi != NULL ) {
ui = r / hi;
ui_dh = -r / hi / hi;
if ( ui < 0.5f ) {
wi = KERNEL_COEFF_1 + KERNEL_COEFF_2 * (ui - 1.0f) * ui * ui;
wi_dh = KERNEL_COEFF_2 * ui_dh * ui * ui
+ 2 * KERNEL_COEFF_2 * (ui - 1.0f) * ui_dh * ui;
}
else {
wi = KERNEL_COEFF_5 * (1.0f - ui) * (1.0f - ui) * (1.0f - ui);
wi_dh = -3 * KERNEL_COEFF_5 * ui_dh * (1.0f - ui) * (1.0f - ui);
}
pi->rho += NORM_COEFF * wi;
pi->rho_dh += NORM_COEFF * wi_dh;
hg_inv = kernel_igamma / hi;
hg2_inv = hg_inv * hg_inv;
xi = r * hg_inv;
kernel_deval( xi , &wi , &wi_dx );
pi->rho += pj->mass * hg_inv * hg2_inv * wi;
pi->rho_dh += -pj->mass * hg2_inv * hg2_inv * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * 4.0f * M_PI / 3.0f * kernel_igamma3;
pi->icount += 1;
}
if ( r2 < hj*hj && pj != NULL ) {
uj = r / hj;
uj_dh = -r / hj / hj;
if ( uj < 0.5f ) {
wj = KERNEL_COEFF_1 + KERNEL_COEFF_2 * (uj - 1.0f) * uj * uj;
wj_dh = KERNEL_COEFF_2 * uj_dh * uj * uj
+ 2 * KERNEL_COEFF_2 * (uj - 1.0f) * uj_dh * uj;
}
else {
wj = KERNEL_COEFF_5 * (1.0f - uj) * (1.0f - uj) * (1.0f - uj);
wj_dh = -3 * KERNEL_COEFF_5 * uj_dh * (1.0f - uj) * (1.0f - uj);
}
pj->rho += NORM_COEFF * wj;
pj->rho_dh += NORM_COEFF * wj_dh;
hg_inv = kernel_igamma / hj;
hg2_inv = hg_inv * hg_inv;
xj = r * hg_inv;
kernel_deval( xj , &wj , &wj_dx );
pj->rho += pi->mass * hg_inv * hg2_inv * wj;
pj->rho_dh += -pi->mass * hg2_inv * hg2_inv * ( 3.0*wj + xj*wj_dx );
pj->wcount += wj * 4.0f * M_PI / 3.0f * kernel_igamma3;
pj->icount += 1;
}
#ifdef HIST
......@@ -82,15 +111,57 @@ __attribute__ ((always_inline)) INLINE void runner_iact_density ( float r2 , flo
__attribute__ ((always_inline)) INLINE void runner_iact_force ( float r2 , float hi , float hj , struct part *pi , struct part *pj ) {
__attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2 , float *dx , float hi , float hj , struct part *pi , struct part *pj ) {
#define KERNEL_COEFF_1 2.546479089470f
#define KERNEL_COEFF_2 15.278874536822f
#define KERNEL_COEFF_3 45.836623610466f
#define KERNEL_COEFF_4 30.557749073644f
#define KERNEL_COEFF_5 5.092958178941f
#define KERNEL_COEFF_6 (-15.278874536822f)
#define NORM_COEFF 4.188790204786f
float r = sqrtf( r2 ), ri = 1.0f / r;
float xi, xj;
float hig_inv, hig2_inv;
float hjg_inv, hjg2_inv;
float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
float f;
int k;
/* Get the kernel for hi. */
hig_inv = kernel_igamma / hi;
hig2_inv = hig_inv * hig_inv;
xi = r * hig_inv;
kernel_deval( xi , &wi , &wi_dx );
wi_dr = hig2_inv * hig2_inv * wi_dx;
/* Get the kernel for hj. */
hjg_inv = kernel_igamma / hj;
hjg2_inv = hjg_inv * hjg_inv;
xj = r * hjg_inv;
kernel_deval( xj , &wj , &wj_dx );
wj_dr = hjg2_inv *hjg2_inv * wj_dx;
/* Get the common factor out. */
w = ri * ( pi->POrho2 * wi_dr + pj->POrho2 * wj_dr );
/* Use the force, Luke! */
for ( k = 0 ; k < 3 ; k++ ) {
f = dx[k] * w;
pi->a[k] += pj->mass * f;
pj->a[k] += -pi->mass * f;
}
/* Compute dv dot r. */
dvdr = ( pi->v[0] - pj->v[0] ) * dx[0] + ( pi->v[1] - pj->v[1] ) * dx[1] + ( pi->v[2] - pj->v[2] ) * dx[2];
/* Get the time derivative for u. */
pi->u_dt += pj->mass * dvdr * wi_dr;
pj->u_dt += pi->mass * dvdr * wj_dr;
/* Get the time derivative for h. */
pi->h_dt += pj->mass / pj->rho * dvdr * wi_dr;
pj->h_dt += pi->mass / pi->rho * dvdr * wj_dr;
#ifdef HIST
if ( hi > hj )
runner_hist_hit( hi / hj );
else
runner_hist_hit( hj / hi );
#endif
}
......
......@@ -960,6 +960,17 @@ void space_maketasks ( struct space *s , int do_sort ) {
task_addunlock( t->cj->ghost , t2 );
}
/* Otherwise, sub interaction? */
else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->ghost );
if ( t->cj != NULL )
task_addunlock( t , t->cj->ghost );
t2 = space_addtask( s , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , NULL , 0 , NULL , 0 );
task_addunlock( t->ci->ghost , t2 );
if ( t->cj != NULL )
task_addunlock( t->cj->ghost , t2 );
}
}
/* Did we already create indices? */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment