Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
c207d539
Commit
c207d539
authored
Mar 17, 2013
by
Pedro Gonnet
Browse files
fix bug in runner_doghost.
Former-commit-id: cbd37d27f2028c64a464e5251fb0cffe6077ae0b
parent
f17cb9fc
Changes
9
Hide whitespace changes
Inline
Side-by-side
src/debug.c
View file @
c207d539
...
...
@@ -30,7 +30,7 @@ void printParticle ( struct part *parts , long long int id ) {
/* Look for the particle. */
for
(
i
=
0
;
parts
[
i
].
id
!=
id
;
i
++
);
printf
(
"## Particle[%d]: id=%lld, x=[%
f,%f,%f], v=[%f,%f,%f], a=[%f,%f,%f
], h=%.3e, h_dt=%.3e, wcount=%
f
, m=%.3e, rho=%
f
, u=%
f
, dudt=%
f
, dt=%.3e
\n
"
,
printf
(
"## Particle[%d]: id=%lld, x=[%
.3e,%.3e,%.3e], v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e
], h=%.3e, h_dt=%.3e, wcount=%
.3e
, m=%.3e, rho=%
.3e, rho_dh=%.3e, div_v=%.3e
, u=%
.3e
, dudt=%
.3e, bals=%.3e, POrho2=%.3e, v_sig=%.3e
, dt=%.3e
\n
"
,
i
,
parts
[
i
].
id
,
parts
[
i
].
x
[
0
],
parts
[
i
].
x
[
1
],
parts
[
i
].
x
[
2
],
...
...
@@ -40,9 +40,13 @@ void printParticle ( struct part *parts , long long int id ) {
parts
[
i
].
force
.
h_dt
,
parts
[
i
].
wcount
,
parts
[
i
].
mass
,
parts
[
i
].
rho
,
parts
[
i
].
rho
,
parts
[
i
].
rho_dh
,
parts
[
i
].
density
.
div_v
,
parts
[
i
].
u
,
parts
[
i
].
force
.
u_dt
,
parts
[
i
].
force
.
balsara
,
parts
[
i
].
force
.
POrho2
,
parts
[
i
].
force
.
v_sig
,
parts
[
i
].
dt
);
}
...
...
src/engine.c
View file @
c207d539
...
...
@@ -97,8 +97,6 @@ void engine_prepare ( struct engine *e ) {
continue
;
for
(
j
=
0
;
j
<
s
->
tasks
[
k
].
nr_unlock_tasks
;
j
++
)
__sync_add_and_fetch
(
&
s
->
tasks
[
k
].
unlock_tasks
[
j
]
->
wait
,
1
);
for
(
j
=
0
;
j
<
s
->
tasks
[
k
].
nr_unlock_cells
;
j
++
)
__sync_add_and_fetch
(
&
s
->
tasks
[
k
].
unlock_cells
[
j
]
->
wait
,
1
);
}
// printf( "engine_prepare: preparing task dependencies took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
...
...
@@ -278,6 +276,59 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
}
void
engine_single2
(
double
*
dim
,
long
long
int
pid
,
struct
part
*
__restrict__
parts
,
int
N
,
int
periodic
)
{
int
i
,
k
;
double
r2
,
dx
[
3
];
float
fdx
[
3
],
ihg
;
struct
part
p
,
p2
;
/* Find "our" part. */
for
(
k
=
0
;
k
<
N
&&
parts
[
k
].
id
!=
pid
;
k
++
);
if
(
k
==
N
)
error
(
"Part not found."
);
p
=
parts
[
k
];
/* Clear accumulators. */
ihg
=
kernel_igamma
/
p
.
h
;
p
.
a
[
0
]
=
0
.
0
f
;
p
.
a
[
1
]
=
0
.
0
f
;
p
.
a
[
2
]
=
0
.
0
f
;
p
.
force
.
u_dt
=
0
.
0
f
;
p
.
force
.
h_dt
=
0
.
0
f
;
p
.
force
.
v_sig
=
0
.
0
f
;
/* Loop over all particle pairs (force). */
for
(
k
=
0
;
k
<
N
;
k
++
)
{
if
(
parts
[
k
].
id
==
p
.
id
)
continue
;
for
(
i
=
0
;
i
<
3
;
i
++
)
{
dx
[
i
]
=
p
.
x
[
i
]
-
parts
[
k
].
x
[
i
];
if
(
periodic
)
{
if
(
dx
[
i
]
<
-
dim
[
i
]
/
2
)
dx
[
i
]
+=
dim
[
i
];
else
if
(
dx
[
i
]
>
dim
[
i
]
/
2
)
dx
[
i
]
-=
dim
[
i
];
}
fdx
[
i
]
=
dx
[
i
];
}
r2
=
fdx
[
0
]
*
fdx
[
0
]
+
fdx
[
1
]
*
fdx
[
1
]
+
fdx
[
2
]
*
fdx
[
2
];
if
(
r2
<
p
.
h
*
p
.
h
||
r2
<
parts
[
k
].
h
*
parts
[
k
].
h
)
{
p
.
a
[
0
]
=
0
.
0
f
;
p
.
a
[
1
]
=
0
.
0
f
;
p
.
a
[
2
]
=
0
.
0
f
;
p
.
force
.
u_dt
=
0
.
0
f
;
p
.
force
.
h_dt
=
0
.
0
f
;
p
.
force
.
v_sig
=
0
.
0
f
;
p2
=
parts
[
k
];
runner_iact_nonsym_force
(
r2
,
fdx
,
p
.
h
,
p2
.
h
,
&
p
,
&
p2
);
printf
(
"pairs_simple: interacting particles %lli and %lli (rho=%.3e,r=%e): a=[%.3e,%.3e,%.3e], u_dt=%.3e, h_dt=%.3e, v_sig=%.3e.
\n
"
,
pid
,
p2
.
id
,
p2
.
rho
,
sqrtf
(
r2
)
,
p
.
a
[
0
]
,
p
.
a
[
1
]
,
p
.
a
[
2
]
,
p
.
force
.
u_dt
,
p
.
force
.
h_dt
,
p
.
force
.
v_sig
);
}
}
/* Dump the result. */
ihg
=
kernel_igamma
/
p
.
h
;
printf
(
"pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.
\n
"
,
p
.
id
,
p
.
h
,
p
.
wcount
+
32
.
0
/
3
,
ihg
*
ihg
*
ihg
*
(
p
.
rho
+
p
.
mass
*
kernel_root
)
);
fflush
(
stdout
);
}
/**
* @brief Let the #engine loose to compute the forces.
*
...
...
@@ -307,6 +358,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
e
->
s
->
dt_step
=
dt_step
;
printf
(
"engine_step: dt_step set to %.3e.
\n
"
,
dt_step
);
fflush
(
stdout
);
// printParticle( parts , 432626 );
/* First kick. */
TIMER_TIC
space_map_cells_post
(
e
->
s
,
1
,
&
engine_map_kick_first
,
e
);
...
...
@@ -314,7 +367,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 494849 );
// printParticle( parts , 432626 );
// printParticle( parts , 432628 );
/* Prepare the space. */
engine_prepare
(
e
);
...
...
@@ -346,12 +400,14 @@ void engine_step ( struct engine *e , int sort_queues ) {
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 494849 );
// engine_single2( e->s->dim , 432626 , e->s->parts , e->s->nr_parts , e->s->periodic );
// printParticle( parts , 432626 );
// printParticle( parts , 432628 );
/* Second kick. */
TIMER_TIC_ND
dt_min
=
FLT_MAX
;
dt_max
=
0
.
0
f
;
#pragma omp parallel private(p,xp,k,ldt_min,lcount,ldt_max,lmom,lang,lent,lekin,lepot,threadID,nthreads)
#pragma omp parallel private(p,xp,k,ldt_min,lcount,ldt_max,lmom,lang,lent,lekin,lepot,threadID,nthreads)
{
threadID
=
omp_get_thread_num
();
nthreads
=
omp_get_num_threads
();
...
...
@@ -372,7 +428,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
/* Update the particle's time step. */
p
->
dt
=
const_cfl
*
p
->
h
/
(
p
->
force
.
v_sig
);
p
->
dt
=
const_cfl
*
p
->
h
/
(
p
->
force
.
v_sig
);
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
=
xp
->
v_old
[
0
]
+
hdt
*
p
->
a
[
0
];
...
...
@@ -393,13 +449,13 @@ void engine_step ( struct engine *e , int sort_queues ) {
lmom
[
1
]
+=
p
->
mass
*
p
->
v
[
1
];
lmom
[
2
]
+=
p
->
mass
*
p
->
v
[
2
];
/* Collect angular momentum */
lang
[
0
]
+=
p
->
mass
*
(
p
->
x
[
1
]
*
p
->
v
[
2
]
-
p
->
v
[
2
]
*
p
->
x
[
1
]
);
lang
[
1
]
+=
p
->
mass
*
(
p
->
x
[
2
]
*
p
->
v
[
0
]
-
p
->
v
[
0
]
*
p
->
x
[
2
]
);
lang
[
2
]
+=
p
->
mass
*
(
p
->
x
[
0
]
*
p
->
v
[
1
]
-
p
->
v
[
1
]
*
p
->
x
[
0
]
);
/* Collect angular momentum */
lang
[
0
]
+=
p
->
mass
*
(
p
->
x
[
1
]
*
p
->
v
[
2
]
-
p
->
v
[
2
]
*
p
->
x
[
1
]
);
lang
[
1
]
+=
p
->
mass
*
(
p
->
x
[
2
]
*
p
->
v
[
0
]
-
p
->
v
[
0
]
*
p
->
x
[
2
]
);
lang
[
2
]
+=
p
->
mass
*
(
p
->
x
[
0
]
*
p
->
v
[
1
]
-
p
->
v
[
1
]
*
p
->
x
[
0
]
);
/* Collect entropic function */
lent
+=
p
->
u
*
pow
(
p
->
rho
,
1
.
f
-
const_gamma
);
/* Collect entropic function */
lent
+=
p
->
u
*
pow
(
p
->
rho
,
1
.
f
-
const_gamma
);
}
#pragma omp critical
...
...
@@ -412,7 +468,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
ang
[
0
]
+=
lang
[
0
];
ang
[
1
]
+=
lang
[
1
];
ang
[
2
]
+=
lang
[
2
];
ent
+=
(
const_gamma
-
1
.)
*
lent
;
ent
+=
(
const_gamma
-
1
.)
*
lent
;
epot
+=
lepot
;
ekin
+=
lekin
;
count
+=
lcount
;
...
...
@@ -420,6 +476,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
TIMER_TOC
(
timer_kick2
);
e
->
dt_min
=
dt_min
;
// printParticle( parts , 432626 );
printf
(
"engine_step: dt_min/dt_max is %e/%e.
\n
"
,
dt_min
,
dt_max
);
fflush
(
stdout
);
printf
(
"engine_step: etot is %e (ekin=%e, epot=%e).
\n
"
,
ekin
+
epot
,
ekin
,
epot
);
fflush
(
stdout
);
printf
(
"engine_step: total momentum is [ %e , %e , %e ].
\n
"
,
mom
[
0
]
,
mom
[
1
]
,
mom
[
2
]
);
fflush
(
stdout
);
...
...
src/part.h
View file @
c207d539
...
...
@@ -70,7 +70,7 @@ struct part {
float
rho_dh
;
/* Store density/force specific stuff. */
union
{
//
union {
struct
{
...
...
@@ -107,7 +107,7 @@ struct part {
}
force
;
};
//
};
/* Particle pressure. */
// float P;
...
...
src/runner.c
View file @
c207d539
...
...
@@ -334,7 +334,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
struct
cell
*
finger
;
int
i
,
k
,
redo
,
count
=
c
->
count
;
int
*
pid
;
float
ihg
,
ihg2
,
ihg4
,
h_corr
;
float
h
,
hg
,
ihg
,
ihg2
,
ihg4
,
h_corr
;
float
normDiv_v
,
normCurl_v
;
float
dt_step
=
r
->
e
->
dt_step
;
TIMER_TIC
...
...
@@ -368,21 +368,27 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Is this part within the timestep? */
if
(
cp
->
dt
<=
dt_step
)
{
/* Some smoothing length multiples. */
ihg
=
kernel_igamma
/
p
->
h
;
h
=
p
->
h
;
hg
=
kernel_gamma
*
h
;
ihg
=
1
.
0
f
/
hg
;
ihg2
=
ihg
*
ihg
;
ihg4
=
ihg2
*
ihg2
;
/* Adjust the computed weighted number of neighbours. */
p
->
wcount
+=
kernel_wroot
;
/* Final operation on the density. */
p
->
rho
=
ihg
*
ihg2
*
(
p
->
rho
+
p
->
mass
*
kernel_root
);
p
->
rho_dh
*=
ihg4
;
/* Compute the smoothing length update (Newton step). */
h_corr
=
(
const_nwneigh
-
p
->
wcount
)
/
p
->
density
.
wcount_dh
;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr
=
fminf
(
h_corr
,
p
->
h
);
h_corr
=
fmaxf
(
h_corr
,
-
p
->
h
/
2
);
h_corr
=
fminf
(
h_corr
,
h
);
h_corr
=
fmaxf
(
h_corr
,
-
h
/
2
);
/* Apply the correction to p->h. */
p
->
h
+=
h_corr
;
...
...
@@ -391,7 +397,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Did we get the right number density? */
if
(
p
->
wcount
>
const_nwneigh
+
const_delta_nwneigh
||
p
->
wcount
<
const_nwneigh
-
const_delta_nwneigh
)
{
// printf( "runner_doghost: particle %lli (h=%e,
h_dt=%e,
depth=%i) has bad wcount=%.3f.\n" , p->id , p->h ,
p->h_dt ,
c->depth , p->wcount ); fflush(stdout);
// printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->wcount ); fflush(stdout);
// p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh;
pid
[
redo
]
=
pid
[
i
];
redo
+=
1
;
...
...
@@ -405,22 +411,24 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
continue
;
}
/* Final operation on the density. */
p
->
rho
=
ihg
*
ihg2
*
(
p
->
rho
+
p
->
mass
*
kernel_root
);
p
->
rho_dh
*=
ihg4
;
if
(
p
->
id
==
432626
)
printf
(
"runner_doghost: updating particle %lli.
\n
"
,
p
->
id
);
/* Pre-compute some stuff for the balsara switch. */
normDiv_v
=
fabs
(
p
->
density
.
div_v
/
p
->
rho
*
ihg4
);
normCurl_v
=
sqrtf
(
p
->
density
.
curl_v
[
0
]
*
p
->
density
.
curl_v
[
0
]
+
p
->
density
.
curl_v
[
1
]
*
p
->
density
.
curl_v
[
1
]
+
p
->
density
.
curl_v
[
2
]
*
p
->
density
.
curl_v
[
2
]
)
/
p
->
rho
*
ihg4
;
/* As of here, particle force variables will be set. Do _NOT_
try to read any particle density variables! */
/* Compute this particle's sound speed. */
p
->
force
.
c
=
sqrtf
(
const_gamma
*
(
const_gamma
-
1
.
0
f
)
*
p
->
u
);
/* Compute the P/Omega/rho2. */
p
->
force
.
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
p
->
h
*
p
->
rho_dh
/
3
.
0
f
);
p
->
force
.
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
h
g
*
p
->
rho_dh
/
3
.
0
f
);
/* Balsara switch */
p
->
force
.
balsara
=
normCurl_v
/
(
normDiv_v
+
normCurl_v
+
0
.
0001
f
*
p
->
force
.
c
/
p
->
h
);
p
->
force
.
balsara
=
normCurl_v
/
(
normDiv_v
+
normCurl_v
+
0
.
0001
f
*
p
->
force
.
c
*
ihg
);
/* Reset the acceleration. */
for
(
k
=
0
;
k
<
3
;
k
++
)
...
...
@@ -652,8 +660,6 @@ void *runner_main ( void *data ) {
for
(
k
=
0
;
k
<
t
->
nr_unlock_tasks
;
k
++
)
if
(
__sync_fetch_and_sub
(
&
t
->
unlock_tasks
[
k
]
->
wait
,
1
)
==
0
)
abort
();
for
(
k
=
0
;
k
<
t
->
nr_unlock_cells
;
k
++
)
__sync_fetch_and_sub
(
&
t
->
unlock_cells
[
k
]
->
wait
,
1
);
}
/* main loop. */
...
...
src/runner_doiact.h
View file @
c207d539
...
...
@@ -663,46 +663,6 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
for
(
rshift
=
0
.
0
,
k
=
0
;
k
<
3
;
k
++
)
rshift
+=
shift
[
k
]
*
runner_shift
[
3
*
sid
+
k
];
/* for ( k = 0 ; k < ci->count ; k++ )
if ( ci->parts[k].id == 5245989477229 )
break;
if ( k < ci->count ) {
printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout);
for ( pjd = 0 ; pjd < cj->count ; pjd++ ) {
pi = &ci->parts[k];
pj = &cj->parts[pjd];
r2 = sqrtf( (pi->x[0] - pj->x[0])*(pi->x[0] - pj->x[0]) + (pi->x[1] - pj->x[1])*(pi->x[1] - pj->x[1]) + (pi->x[2] - pj->x[2])*(pi->x[2] - pj->x[2]) );
printf( "runner_dopair1: inspecting particles %lli [%i,%i,%i] and %lli [%i,%i,%i], r=%e.\n" ,
pi->id , (int)(ci->loc[0]/ci->h[0]) , (int)(ci->loc[1]/ci->h[1]) , (int)(ci->loc[2]/ci->h[2]) ,
pj->id , (int)(cj->loc[0]/cj->h[0]) , (int)(cj->loc[1]/cj->h[1]) , (int)(cj->loc[2]/cj->h[2]) ,
r2 );
}
} */
/* for ( k = 0 ; k < cj->count ; k++ )
if ( cj->parts[k].id == 5245989477229 )
break;
if ( k < cj->count ) {
printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout);
for ( pjd = 0 ; pjd < ci->count ; pjd++ ) {
pj = &cj->parts[k];
pi = &ci->parts[pjd];
r2 = sqrtf( (pi->x[0] - pj->x[0])*(pi->x[0] - pj->x[0]) + (pi->x[1] - pj->x[1])*(pi->x[1] - pj->x[1]) + (pi->x[2] - pj->x[2])*(pi->x[2] - pj->x[2]) );
printf( "runner_dopair1: inspecting particles %lli [%i,%i,%i] and %lli [%i,%i,%i], r=%e.\n" ,
pi->id , (int)(ci->loc[0]/ci->h[0]) , (int)(ci->loc[1]/ci->h[1]) , (int)(ci->loc[2]/ci->h[2]) ,
pj->id , (int)(cj->loc[0]/cj->h[0]) , (int)(cj->loc[1]/cj->h[1]) , (int)(cj->loc[2]/cj->h[2]) ,
r2 );
}
} */
/* for ( hi = 0 , k = 0 ; k < ci->count ; k++ )
hi += ci->parts[k].r;
for ( hj = 0 , k = 0 ; k < cj->count ; k++ )
hj += cj->parts[k].r;
printf( "runner_dopair: avg. radii %g/%g for h=%g at depth=%i.\n" , hi/ci->count , hj/cj->count , ci->h[0] , ci->depth ); fflush(stdout); */
/* Pick-out the sorted lists. */
sort_i
=
&
ci
->
sort
[
sid
*
(
ci
->
count
+
1
)
];
sort_j
=
&
cj
->
sort
[
sid
*
(
cj
->
count
+
1
)
];
...
...
@@ -914,23 +874,6 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
for
(
rshift
=
0
.
0
,
k
=
0
;
k
<
3
;
k
++
)
rshift
+=
shift
[
k
]
*
runner_shift
[
3
*
sid
+
k
];
/* for ( k = 0 ; k < ci->count ; k++ )
if ( ci->parts[k].id == 561590 )
break;
if ( k == ci->count )
for ( k = 0 ; k < cj->count ; k++ )
if ( cj->parts[k].id == 561590 )
break;
if ( k < cj->count )
printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout); */
/* for ( hi = 0 , k = 0 ; k < ci->count ; k++ )
hi += ci->parts[k].r;
for ( hj = 0 , k = 0 ; k < cj->count ; k++ )
hj += cj->parts[k].r;
printf( "runner_dopair: avg. radii %g/%g for h=%g at depth=%i.\n" , hi/ci->count , hj/cj->count , ci->h[0] , ci->depth ); fflush(stdout); */
/* Pick-out the sorted lists. */
sort_i
=
&
ci
->
sort
[
sid
*
(
ci
->
count
+
1
)
];
sort_j
=
&
cj
->
sort
[
sid
*
(
cj
->
count
+
1
)
];
...
...
@@ -971,9 +914,6 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
}
}
/* if ( ci->split && cj->split && sid == 4 )
printf( "boing!\n" ); */
/* Loop over the parts in ci. */
for
(
pid
=
count_i
-
1
;
pid
>=
0
&&
sort_i
[
pid
].
d
+
hi_max
>
dj_min
;
pid
--
)
{
...
...
src/runner_iact.h
View file @
c207d539
...
...
@@ -59,7 +59,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
for
(
k
=
0
;
k
<
3
;
k
++
)
curlvr
[
k
]
*=
ri
;
if
(
r2
<
hi
*
hi
&&
pi
!=
NULL
)
{
if
(
r2
<
hi
*
hi
)
{
h_inv
=
1
.
0
/
hi
;
hg_inv
=
kernel_igamma
*
h_inv
;
...
...
@@ -78,7 +78,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
}
if
(
r2
<
hj
*
hj
&&
pj
!=
NULL
)
{
if
(
r2
<
hj
*
hj
)
{
h_inv
=
1
.
0
/
hj
;
hg_inv
=
kernel_igamma
*
h_inv
;
...
...
@@ -242,7 +242,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
for
(
k
=
0
;
k
<
3
;
k
++
)
curlvr
[
k
]
*=
ri
;
if
(
r2
<
hi
*
hi
&&
pi
!=
NULL
)
{
if
(
r2
<
hi
*
hi
)
{
h_inv
=
1
.
0
/
hi
;
hg_inv
=
kernel_igamma
*
h_inv
;
...
...
@@ -255,9 +255,9 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
pi
->
density
.
wcount_dh
-=
xi
*
h_inv
*
wi_dx
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
// pi->icount += 1;
pi
->
density
.
div_v
+=
pj
->
mass
*
dvdr
*
wi_dx
;
for
(
k
=
0
;
k
<
3
;
k
++
)
pi
->
density
.
curl_v
[
k
]
+=
pj
->
mass
*
curlvr
[
k
]
*
wi_dx
;
pi
->
density
.
div_v
+=
pj
->
mass
*
dvdr
*
wi_dx
;
for
(
k
=
0
;
k
<
3
;
k
++
)
pi
->
density
.
curl_v
[
k
]
+=
pj
->
mass
*
curlvr
[
k
]
*
wi_dx
;
}
}
...
...
@@ -380,7 +380,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
dvdr
*=
ri
;
/* Compute the relative velocity. (This is 0 if the particles move away from each other and negative otherwise) */
omega_ij
=
fminf
(
dvdr
,
0
.
f
);
omega_ij
=
fminf
(
dvdr
,
0
.
f
);
/* Compute signal velocity */
v_sig
=
pi
->
force
.
c
+
pj
->
force
.
c
-
3
.
*
omega_ij
;
...
...
@@ -392,7 +392,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
Pi_ij
*=
(
pi
->
force
.
balsara
+
pj
->
force
.
balsara
);
/* Get the common factor out. */
w
=
ri
*
(
(
pi
->
force
.
POrho2
*
wi_dr
+
pj
->
force
.
POrho2
*
wj_dr
)
-
0
.
25
f
*
Pi_ij
*
(
wi_dr
+
wj_dr
)
);
w
=
ri
*
(
(
pi
->
force
.
POrho2
*
wi_dr
+
pj
->
force
.
POrho2
*
wj_dr
)
+
0
.
25
f
*
Pi_ij
*
(
wi_dr
+
wj_dr
)
);
/* Use the force, Luke! */
for
(
k
=
0
;
k
<
3
;
k
++
)
{
...
...
@@ -402,12 +402,12 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
}
/* Get the time derivative for u. */
pi
->
force
.
u_dt
+=
p
i
->
force
.
POrho2
*
pj
->
mass
*
dvdr
*
wi_dr
+
0
.
125
f
*
pj
->
mass
*
Pi_ij
*
dvdr
*
(
wi_dr
+
wj_dr
);
pj
->
force
.
u_dt
+=
p
j
->
force
.
POrho2
*
pi
->
mass
*
dvdr
*
wj_dr
+
0
.
125
f
*
pi
->
mass
*
Pi_ij
*
dvdr
*
(
wi_dr
+
wj_dr
);
pi
->
force
.
u_dt
+=
p
j
->
mass
*
dvdr
*
(
pi
->
force
.
POrho2
*
wi_dr
+
0
.
125
f
*
Pi_ij
*
(
wi_dr
+
wj_dr
)
);
pj
->
force
.
u_dt
+=
p
i
->
mass
*
dvdr
*
(
pj
->
force
.
POrho2
*
wj_dr
+
0
.
125
f
*
Pi_ij
*
(
wi_dr
+
wj_dr
)
);
/* Get the time derivative for h. */
pi
->
force
.
h_dt
-=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
pj
->
force
.
h_dt
-=
pi
->
mass
/
pi
->
rho
*
dvdr
*
wj_dr
;
pi
->
force
.
h_dt
-=
pj
->
mass
*
dvdr
/
pj
->
rho
*
wi_dr
;
pj
->
force
.
h_dt
-=
pi
->
mass
*
dvdr
/
pi
->
rho
*
wj_dr
;
/* Update the signal velocity. */
pi
->
force
.
v_sig
=
fmaxf
(
pi
->
force
.
v_sig
,
v_sig
);
...
...
@@ -538,7 +538,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
Pi_ij
.
v
*=
(
wi_dr
.
v
+
wj_dr
.
v
);
/* Get the common factor out. */
w
.
v
=
ri
.
v
*
(
(
piPOrho2
.
v
*
wi_dr
.
v
+
pjPOrho2
.
v
*
wj_dr
.
v
)
-
vec_set1
(
0
.
25
f
)
*
Pi_ij
.
v
);
w
.
v
=
ri
.
v
*
(
(
piPOrho2
.
v
*
wi_dr
.
v
+
pjPOrho2
.
v
*
wj_dr
.
v
)
+
vec_set1
(
0
.
25
f
)
*
Pi_ij
.
v
);
/* Use the force, Luke! */
for
(
k
=
0
;
k
<
3
;
k
++
)
{
...
...
@@ -590,8 +590,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
float
hi_inv
,
hi2_inv
;
float
hj_inv
,
hj2_inv
;
float
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
w
,
dvdr
;
float
v_sig
,
omega_ij
,
Pi_ij
;
float
f
;
float
omega_ij
,
Pi_ij
,
v_sig
;
int
k
;
/* Get the kernel for hi. */
...
...
@@ -607,13 +607,13 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
xj
=
r
*
hj_inv
*
kernel_igamma
;
kernel_deval
(
xj
,
&
wj
,
&
wj_dx
);
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
/* Compute dv dot r. */
dvdr
=
(
pi
->
v
[
0
]
-
pj
->
v
[
0
]
)
*
dx
[
0
]
+
(
pi
->
v
[
1
]
-
pj
->
v
[
1
]
)
*
dx
[
1
]
+
(
pi
->
v
[
2
]
-
pj
->
v
[
2
]
)
*
dx
[
2
];
dvdr
*=
ri
;
/* Compute the relative velocity. (This is 0 if the particles move away from each other and negative otherwise) */
omega_ij
=
fminf
(
dvdr
,
0
.
f
);
omega_ij
=
fminf
(
dvdr
,
0
.
f
);
/* Compute signal velocity */
v_sig
=
pi
->
force
.
c
+
pj
->
force
.
c
-
3
.
*
omega_ij
;
...
...
@@ -625,25 +625,31 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
Pi_ij
*=
(
pi
->
force
.
balsara
+
pj
->
force
.
balsara
);
/* Get the common factor out. */
w
=
ri
*
(
(
pi
->
force
.
POrho2
*
wi_dr
+
pj
->
force
.
POrho2
*
wj_dr
)
-
0
.
25
f
*
Pi_ij
*
(
wi_dr
+
wj_dr
)
);
w
=
ri
*
(
(
pi
->
force
.
POrho2
*
wi_dr
+
pj
->
force
.
POrho2
*
wj_dr
)
+
0
.
25
f
*
Pi_ij
*
(
wi_dr
+
wj_dr
)
);
/* Use the force, Luke! */
for
(
k
=
0
;
k
<
3
;
k
++
)
{
f
=
dx
[
k
]
*
w
;
pi
->
a
[
k
]
-=
pj
->
mass
*
f
;
}
/* Get the time derivative for u. */
pi
->
force
.
u_dt
+=
p
i
->
force
.
POrho2
*
pj
->
mass
*
dvdr
*
wi_dr
+
0
.
125
f
*
pj
->
mass
*
Pi_ij
*
dvdr
*
(
wi_dr
+
wj_dr
);
pi
->
force
.
u_dt
+=
p
j
->
mass
*
dvdr
*
(
pi
->
force
.
POrho2
*
wi_dr
+
0
.
125
f
*
Pi_ij
*
(
wi_dr
+
wj_dr
)
);
/* Get the time derivative for h. */
pi
->
force
.
h_dt
-=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
pi
->
force
.
h_dt
-=
pj
->
mass
*
dvdr
/
pj
->
rho
*
wi_dr
;
/* Update the signal velocity. */
pi
->
force
.
v_sig
=
fmaxf
(
pi
->
force
.
v_sig
,
v_sig
);
pj
->
force
.
v_sig
=
fmaxf
(
pj
->
force
.
v_sig
,
v_sig
);
#ifdef HIST
if
(
hi
>
hj
)
runner_hist_hit
(
hi
/
hj
);
else
runner_hist_hit
(
hj
/
hi
);
#endif
}
...
...
@@ -759,7 +765,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
Pi_ij
.
v
*=
(
wi_dr
.
v
+
wj_dr
.
v
);
/* Get the common factor out. */
w
.
v
=
ri
.
v
*
(
(
piPOrho2
.
v
*
wi_dr
.
v
+
pjPOrho2
.
v
*
wj_dr
.
v
)
-
vec_set1
(
0
.
25
f
)
*
Pi_ij
.
v
);
w
.
v
=
ri
.
v
*
(
(
piPOrho2
.
v
*
wi_dr
.
v
+
pjPOrho2
.
v
*
wj_dr
.
v
)
+
vec_set1
(
0
.
25
f
)
*
Pi_ij
.
v
);
/* Use the force, Luke! */
for
(
k
=
0
;
k
<
3
;
k
++
)
{
...
...
src/space.c
View file @
c207d539
...
...
@@ -134,7 +134,7 @@ int space_marktasks ( struct space *s ) {
/* Too much particle movement? */
if
(
!
t
->
skip
&&
t
->
tight
&&
(
t
->
ci
->
dx_max
>
t
->
ci
->
dmin
||
t
->
cj
->
dx_max
>
t
->
cj
->
dmin
)
)
b
re
ak
;
re
turn
1
;
/* Set the sort flags. */
if
(
!
t
->
skip
)
{
...
...
@@ -154,10 +154,6 @@ int space_marktasks ( struct space *s ) {
}
/* Did this not go through? */
if
(
k
<
s
->
nr_tasks
)
return
1
;
/* Run through the tasks and mark as skip or not. */
for
(
k
=
0
;
k
<
s
->
nr_tasks
;
k
++
)
{
...
...
@@ -295,7 +291,7 @@ void space_prepare ( struct space *s ) {
/* Run through the tasks and mark as skip or not. */
tic
=
getticks
();
rebuild
=
space_marktasks
(
s
);
printf
(
"space_prepare: space_marktasks
tasks
took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
printf
(
"space_prepare: space_marktasks took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
/* Did this not go through? */
if
(
rebuild
)
{
...
...
@@ -305,15 +301,11 @@ void space_prepare ( struct space *s ) {
space_rebuild
(
s
,
0
.
0
);
printf
(
"space_prepare: space_rebuild took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
/* Traverse the cells and set their dt_min and dx_max. */
tic
=
getticks
();
space_map_cells_post
(
s
,
1
,
&
space_map_prepare
,
NULL
);
printf
(
"space_prepare: space_map_prepare took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
/* Run through the tasks and mark as skip or not. */
tic
=
getticks
();
rebuild
=
space_marktasks
(
s
);
printf
(
"space_prepare: space_marktasks tasks took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
if
(
space_marktasks
(
s
)
)
error
(
"space_marktasks failed after space_rebuild."
);
printf
(
"space_prepare: space_marktasks took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
}
...
...
@@ -503,6 +495,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
c
->
h_max
=
h_max
;
c
->
dt_min
=
dt_min
;
c
->
dt_max
=
dt_max
;
c
->
dx_max
=
0
;
/* Un-split? */
if
(
count
<
c
->
count
*
space_splitratio
||
c
->
count
<
space_splitsize
)
{
...
...
@@ -529,6 +522,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
temp
->
h
[
0
]
=
c
->
h
[
0
]
/
2
;
temp
->
h
[
1
]
=
c
->
h
[
1
]
/
2
;
temp
->
h
[
2
]
=
c
->
h
[
2
]
/
2
;
temp
->
dmin
=
c
->
dmin
/
2
;
if
(
k
&
4
)
temp
->
loc
[
0
]
+=
temp
->
h
[
0
];
if
(
k
&
2
)
...
...
@@ -538,6 +532,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
temp
->
depth
=
c
->
depth
+
1
;
temp
->
split
=
0
;
temp
->
h_max
=
0
.
0
;
temp
->
dx_max
=
0
.
0
;
temp
->
parent
=
c
;
c
->
progeny
[
k
]
=
temp
;
}
...
...
@@ -589,7 +584,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
int
i
,
j
,
k
,
cdim
[
3
];
struct
cell
*
restrict
c
;
struct
part
*
restrict
finger
,
*
restrict
p
;
struct
cpart
*
restrict
cfinger
;
struct
cpart
*
restrict
cp
,
*
restrict
cfinger
;
int
*
ind
;
ticks
tic
;
...
...
@@ -664,6 +659,20 @@ void space_rebuild ( struct space *s , double cell_max ) {
}
/* re-build upper-level cells? */
// printf( "space_rebuild: rebuilding upper-level cells took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Otherwise, just clean up the cells. */
else
{
/* Free the old cells, if they were allocated. */
for
(
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
{
space_rebuild_recycle
(
s
,
&
s
->
cells
[
k
]
);
s
->
cells
[
k
].
sorts
=
NULL
;
s
->
cells
[
k
].
nr_tasks
=
0
;
s
->
cells
[
k
].
nr_density
=
0
;