Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
d48d9285
Commit
d48d9285
authored
Mar 19, 2013
by
Pedro Gonnet
Browse files
switch to using pseudo-verlet lists, i.e. sorts are only re-computed when the grid is rebuilt.
Former-commit-id: 07817db9de2a4f724322d80ffaae29480835f556
parent
2c057c42
Changes
12
Hide whitespace changes
Inline
Side-by-side
src/cell.h
View file @
d48d9285
...
...
@@ -51,7 +51,7 @@ struct cell {
float
slack
;
/* Maximum particle movement in this cell. */
float
dx_max
;
float
dx_max
,
h2dx_max
;
/* The depth of this cell in the tree. */
int
depth
,
split
;
...
...
src/debug.c
View file @
d48d9285
...
...
@@ -49,7 +49,7 @@ void printParticle ( struct part *parts , long long int id, int N ) {
parts
[
i
].
a
[
0
],
parts
[
i
].
a
[
1
],
parts
[
i
].
a
[
2
],
parts
[
i
].
h
,
parts
[
i
].
force
.
h_dt
,
parts
[
i
].
wcount
,
parts
[
i
].
density
.
wcount
,
parts
[
i
].
mass
,
parts
[
i
].
rho
,
parts
[
i
].
rho_dh
,
parts
[
i
].
density
.
div_v
,
...
...
src/engine.c
View file @
d48d9285
...
...
@@ -164,7 +164,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
int
j
,
k
;
struct
engine
*
e
=
(
struct
engine
*
)
data
;
float
pdt
,
dt_step
=
e
->
dt_step
,
dt
=
e
->
dt
,
hdt
=
0
.
5
f
*
dt
;
float
dt_min
,
dt_max
,
h_max
,
dx_max
,
a
[
3
],
v
[
3
],
u
,
u_dt
,
h
,
h_dt
,
rho
,
v_old
[
3
];
float
dt_min
,
dt_max
,
h_max
,
dx
,
h2dx_max
,
dx_max
,
a
[
3
],
v
[
3
],
u
,
u_dt
,
h
,
h_dt
,
rho
,
v_old
[
3
];
double
x
[
3
],
x_old
[
3
];
struct
part
*
restrict
p
;
struct
xpart
*
restrict
xp
;
...
...
@@ -178,6 +178,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
dt_max
=
0
.
0
f
;
h_max
=
0
.
0
f
;
dx_max
=
0
.
0
f
;
h2dx_max
=
0
.
0
f
;
/* Loop over parts. */
for
(
k
=
0
;
k
<
c
->
count
;
k
++
)
{
...
...
@@ -212,9 +213,11 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
p
->
x
[
0
]
=
x
[
0
]
+=
dt
*
v_old
[
0
];
p
->
x
[
1
]
=
x
[
1
]
+=
dt
*
v_old
[
1
];
p
->
x
[
2
]
=
x
[
2
]
+=
dt
*
v_old
[
2
];
dx_max
=
fmaxf
(
dx_max
,
sqrtf
(
(
x
[
0
]
-
x_old
[
0
])
*
(
x
[
0
]
-
x_old
[
0
])
+
(
x
[
1
]
-
x_old
[
1
])
*
(
x
[
1
]
-
x_old
[
1
])
+
(
x
[
2
]
-
x_old
[
2
])
*
(
x
[
2
]
-
x_old
[
2
])
)
*
2
+
h
);
dx
=
sqrtf
(
(
x
[
0
]
-
x_old
[
0
])
*
(
x
[
0
]
-
x_old
[
0
])
+
(
x
[
1
]
-
x_old
[
1
])
*
(
x
[
1
]
-
x_old
[
1
])
+
(
x
[
2
]
-
x_old
[
2
])
*
(
x
[
2
]
-
x_old
[
2
])
);
dx_max
=
fmaxf
(
dx_max
,
dx
);
h2dx_max
=
fmaxf
(
h2dx_max
,
dx
*
2
+
h
);
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
=
v
[
0
]
+=
dt
*
a
[
0
];
...
...
@@ -236,11 +239,10 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
/* Init fields for density calculation. */
if
(
pdt
>
dt_step
)
{
rho
=
p
->
rho
*=
expf
(
-
3
.
0
f
*
h_dt
/
h
*
dt
);
// rho = p->rho += cbrt( h_dt ) * dt;
p
->
force
.
POrho2
=
u
*
(
const_gamma
-
1
.
0
f
)
/
(
rho
+
h
*
p
->
rho_dh
/
3
.
0
f
);
}
else
{
p
->
wcount
=
0
.
0
f
;
p
->
density
.
wcount
=
0
.
0
f
;
p
->
density
.
wcount_dh
=
0
.
0
f
;
p
->
rho
=
0
.
0
f
;
p
->
rho_dh
=
0
.
0
f
;
...
...
@@ -262,6 +264,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
dt_max
=
c
->
progeny
[
k
]
->
dt_max
;
h_max
=
c
->
progeny
[
k
]
->
h_max
;
dx_max
=
c
->
progeny
[
k
]
->
dx_max
;
h2dx_max
=
c
->
progeny
[
k
]
->
h2dx_max
;
/* Loop over the remaining progeny. */
for
(
k
+=
1
;
k
<
8
;
k
++
)
...
...
@@ -270,6 +273,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
dt_max
=
fmaxf
(
dt_max
,
c
->
progeny
[
k
]
->
dt_max
);
h_max
=
fmaxf
(
h_max
,
c
->
progeny
[
k
]
->
h_max
);
dx_max
=
fmaxf
(
dx_max
,
c
->
progeny
[
k
]
->
dx_max
);
h2dx_max
=
fmaxf
(
h2dx_max
,
c
->
progeny
[
k
]
->
h2dx_max
);
}
}
...
...
@@ -279,7 +283,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
c
->
dt_max
=
dt_max
;
c
->
h_max
=
h_max
;
c
->
dx_max
=
dx_max
;
c
->
sorted
=
0
;
c
->
h2dx_max
=
h2dx_max
;
/* Clean out the task pointers. */
// c->sorts[0] = NULL;
...
...
@@ -340,7 +344,7 @@ void engine_single ( double *dim , long long int pid , struct part *__restrict__
/* Dump the result. */
ihg
=
kernel_igamma
/
p
.
h
;
printf
(
"pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.
\n
"
,
p
.
id
,
p
.
h
,
p
.
wcount
+
32
.
0
/
3
,
ihg
*
ihg
*
ihg
*
(
p
.
rho
+
p
.
mass
*
kernel_root
)
);
printf
(
"pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.
\n
"
,
p
.
id
,
p
.
h
,
p
.
density
.
wcount
+
32
.
0
/
3
,
ihg
*
ihg
*
ihg
*
(
p
.
rho
+
p
.
mass
*
kernel_root
)
);
fflush
(
stdout
);
}
...
...
@@ -365,6 +369,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
double
lang
[
3
],
ang
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
// double lent, ent = 0.0;
int
threadID
,
nthreads
,
count
=
0
,
lcount
;
TIMER_TIC2
/* Get the maximum dt. */
dt_step
=
2
.
0
f
*
dt
;
...
...
@@ -375,7 +381,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
/* Set the maximum dt. */
e
->
dt_step
=
dt_step
;
e
->
s
->
dt_step
=
dt_step
;
printf
(
"engine_step: dt_step set to %.3e.
\n
"
,
dt_step
);
fflush
(
stdout
);
printf
(
"engine_step: dt_step set to %.3e
(dt=%.3e)
.
\n
"
,
dt_step
,
e
->
dt
);
fflush
(
stdout
);
// printParticle( parts , 432626 );
...
...
@@ -415,7 +421,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
error
(
"Error while waiting for barrier."
);
/* Stop the clock. */
TIMER_TOC
(
timer_
step
);
TIMER_TOC
(
timer_
runners
);
// for(k=0; k<10; ++k)
// printParticle(parts, k);
...
...
@@ -523,6 +529,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
e
->
step
/=
2
;
printf
(
"engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
}
TIMER_TOC2
(
timer_step
);
}
...
...
@@ -533,12 +541,13 @@ void engine_step ( struct engine *e , int sort_queues ) {
*
* @param e The #engine.
* @param s The #space in which this #runner will run.
* @param dt The initial time step to use.
* @param nr_threads The number of threads to spawn.
* @param nr_queues The number of task queues to create.
* @param policy The queueing policy to use.
*/
void
engine_init
(
struct
engine
*
e
,
struct
space
*
s
,
int
nr_threads
,
int
nr_queues
,
int
policy
)
{
void
engine_init
(
struct
engine
*
e
,
struct
space
*
s
,
float
dt
,
int
nr_threads
,
int
nr_queues
,
int
policy
)
{
#if defined(HAVE_SETAFFINITY)
cpu_set_t
cpuset
;
...
...
@@ -550,7 +559,6 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
e
->
nr_threads
=
nr_threads
;
e
->
nr_queues
=
nr_queues
;
e
->
policy
=
policy
;
e
->
dt_min
=
0
.
0
f
;
e
->
step
=
0
;
/* First of all, init the barrier and lock it. */
...
...
@@ -562,6 +570,13 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
error
(
"Failed to lock barrier mutex."
);
e
->
barrier_count
=
0
;
/* Run through the parts and get the minimum time step. */
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
if
(
s
->
parts
[
k
].
dt
>
0
.
0
f
)
while
(
s
->
parts
[
k
].
dt
<
dt
)
dt
*=
0
.
5
;
e
->
dt_min
=
dt
;
/* Allocate the queues. */
if
(
posix_memalign
(
(
void
*
)(
&
e
->
queues
)
,
64
,
nr_queues
*
sizeof
(
struct
queue
)
)
!=
0
)
error
(
"Failed to allocate queues."
);
...
...
src/engine.h
View file @
d48d9285
...
...
@@ -72,6 +72,6 @@ struct engine {
/* Function prototypes. */
void
engine_barrier
(
struct
engine
*
e
);
void
engine_init
(
struct
engine
*
e
,
struct
space
*
s
,
int
nr_threads
,
int
nr_queues
,
int
policy
);
void
engine_init
(
struct
engine
*
e
,
struct
space
*
s
,
float
dt
,
int
nr_threads
,
int
nr_queues
,
int
policy
);
void
engine_prepare
(
struct
engine
*
e
);
void
engine_step
(
struct
engine
*
e
,
int
sort_queues
);
src/part.h
View file @
d48d9285
...
...
@@ -57,11 +57,23 @@ struct xpart {
/* Data of a single particle. */
struct
part
{
/* Particle acceleration. */
float
a
[
3
];
/* Particle velocity. */
float
v
[
3
];
/* Particle mass. */
float
mass
;
/* Particle position. */
double
x
[
3
];
/* Particle cutoff radius. */
float
h
;
/* Particle time-step. */
float
dt
;
/* Particle internal energy. */
float
u
;
/* Particle density. */
float
rho
;
...
...
@@ -83,6 +95,9 @@ struct part {
/* Particle velocity curl. */
float
curl_v
[
3
];
/* Particle number density. */
float
wcount
;
}
density
;
struct
{
...
...
@@ -112,14 +127,8 @@ struct part {
/* Particle pressure. */
// float P;
/* Particle acceleration. */
float
a
[
3
];
/* Particle number density. */
float
wcount
;
/* Particle internal energy. */
float
u
;
/* Particle mass. */
float
mass
;
/* Particle ID. */
unsigned
long
long
id
;
...
...
@@ -127,15 +136,6 @@ struct part {
/* Pointer to extra particle data. */
struct
xpart
*
xtras
;
/* Particle position. */
double
x
[
3
];
/* Particle cutoff radius. */
float
h
;
/* Particle time-step. */
float
dt
;
}
__attribute__
((
aligned
(
32
)));
src/runner.c
View file @
d48d9285
...
...
@@ -165,9 +165,11 @@ void runner_dosort_ascending ( struct entry *sort , int N ) {
* @param r The #runner.
* @param c The #cell.
* @param flags Cell flag.
* @param clock Flag indicating whether to record the timing or not, needed
* for recursive calls.
*/
void
runner_dosort
(
struct
runner
*
r
,
struct
cell
*
c
,
int
flags
)
{
void
runner_dosort
(
struct
runner
*
r
,
struct
cell
*
c
,
int
flags
,
int
clock
)
{
struct
entry
*
finger
;
struct
entry
*
fingers
[
8
];
...
...
@@ -176,8 +178,14 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
int
i
,
ind
,
off
[
8
],
inds
[
8
],
temp_i
,
missing
;
// float shift[3];
float
buff
[
8
],
px
[
3
];
TIMER_TIC
/* Clean-up the flags, i.e. filter out what's already been sorted. */
flags
&=
~
c
->
sorted
;
if
(
flags
==
0
)
return
;
/* start by allocating the entry arrays. */
if
(
lock_lock
(
&
c
->
lock
)
!=
0
)
error
(
"Failed to lock cell."
);
...
...
@@ -203,7 +211,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
else
missing
=
(
c
->
progeny
[
k
]
->
sorts
->
flags
^
flags
)
&
flags
;
if
(
missing
)
runner_dosort
(
r
,
c
->
progeny
[
k
]
,
missing
);
runner_dosort
(
r
,
c
->
progeny
[
k
]
,
missing
,
0
);
}
/* Loop over the 13 different sort arrays. */
...
...
@@ -314,7 +322,8 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
(
flags
&
0x1000
)
>>
12
,
(
flags
&
0x800
)
>>
11
,
(
flags
&
0x400
)
>>
10
,
(
flags
&
0x200
)
>>
9
,
(
flags
&
0x100
)
>>
8
,
(
flags
&
0x80
)
>>
7
,
(
flags
&
0x40
)
>>
6
,
(
flags
&
0x20
)
>>
5
,
(
flags
&
0x10
)
>>
4
,
(
flags
&
0x8
)
>>
3
,
(
flags
&
0x4
)
>>
2
,
(
flags
&
0x2
)
>>
1
,
(
flags
&
0x1
)
>>
0
,
((
double
)
TIMER_TOC
(
timer_dosort
))
/
CPU_TPS
*
1000
);
fflush
(
stdout
);
#else
TIMER_TOC
(
timer_dosort
);
if
(
clock
)
TIMER_TOC
(
timer_dosort
);
#endif
}
...
...
@@ -377,31 +386,35 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
ihg4
=
ihg2
*
ihg2
;
/* Adjust the computed weighted number of neighbours. */
p
->
wcount
+=
kernel_wroot
;
p
->
density
.
wcount
+=
kernel_wroot
;
/* Final operation on the density. */
p
->
rho
=
ihg
*
ihg2
*
(
p
->
rho
+
p
->
mass
*
kernel_root
);
p
->
rho_dh
*=
ihg4
;
/* Compute the smoothing length update (Newton step). */
h_corr
=
(
const_nwneigh
-
p
->
wcount
)
/
p
->
density
.
wcount_dh
;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr
=
fminf
(
h_corr
,
h
);
h_corr
=
fmaxf
(
h_corr
,
-
h
/
2
);
if
(
p
->
density
.
wcount_dh
==
0
.
0
f
)
h_corr
=
p
->
h
;
else
{
h_corr
=
(
const_nwneigh
-
p
->
density
.
wcount
)
/
p
->
density
.
wcount_dh
;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr
=
fminf
(
h_corr
,
h
);
h_corr
=
fmaxf
(
h_corr
,
-
h
/
2
);
}
/* Apply the correction to p->h. */
p
->
h
+=
h_corr
;
cp
->
h
=
p
->
h
;
/* Did we get the right number density? */
if
(
p
->
wcount
>
const_nwneigh
+
const_delta_nwneigh
||
p
->
wcount
<
const_nwneigh
-
const_delta_nwneigh
)
{
// printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->wcount ); fflush(stdout);
// p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh;
if
(
p
->
density
.
wcount
>
const_nwneigh
+
const_delta_nwneigh
||
p
->
density
.
wcount
<
const_nwneigh
-
const_delta_nwneigh
)
{
// printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->
density.
wcount ); fflush(stdout);
// p->h += ( p->
density.
wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh;
pid
[
redo
]
=
pid
[
i
];
redo
+=
1
;
p
->
wcount
=
0
.
0
;
p
->
density
.
wcount
=
0
.
0
;
p
->
density
.
wcount_dh
=
0
.
0
;
p
->
rho
=
0
.
0
;
p
->
rho_dh
=
0
.
0
;
...
...
@@ -632,7 +645,7 @@ void *runner_main ( void *data ) {
cell_unlocktree
(
cj
);
break
;
case
task_type_sort
:
runner_dosort
(
r
,
ci
,
t
->
flags
);
runner_dosort
(
r
,
ci
,
t
->
flags
,
1
);
break
;
case
task_type_sub
:
if
(
t
->
subtype
==
task_subtype_density
)
...
...
src/runner.h
View file @
d48d9285
...
...
@@ -81,5 +81,5 @@ void runner_doghost ( struct runner *r , struct cell *c );
void
runner_dopair_density
(
struct
runner
*
r
,
struct
cell
*
ci
,
struct
cell
*
cj
);
void
runner_doself_density
(
struct
runner
*
r
,
struct
cell
*
c
);
void
runner_dosub_density
(
struct
runner
*
r
,
struct
cell
*
ci
,
struct
cell
*
cj
,
int
flags
);
void
runner_dosort
(
struct
runner
*
r
,
struct
cell
*
c
,
int
flag
);
void
runner_dosort
(
struct
runner
*
r
,
struct
cell
*
c
,
int
flag
,
int
clock
);
void
*
runner_main
(
void
*
data
);
src/runner_doiact.h
View file @
d48d9285
...
...
@@ -328,7 +328,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
struct
part
*
restrict
pi
,
*
restrict
parts_j
=
cj
->
parts
;
struct
cpart
*
restrict
cpj
,
*
restrict
cparts_j
=
cj
->
cparts
;
double
pix
[
3
];
float
dx
[
3
],
hi
,
hi2
,
r2
,
di
;
float
dx
[
3
],
hi
,
hi2
,
r2
,
di
,
dxj
;
struct
entry
*
sort_j
;
#ifdef VECTORIZE
int
icount
=
0
;
...
...
@@ -363,6 +363,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
/* Pick-out the sorted lists. */
sort_j
=
&
cj
->
sort
[
sid
*
(
cj
->
count
+
1
)
];
dxj
=
cj
->
dx_max
;
/* Parts are on the left? */
if
(
!
flipped
)
{
...
...
@@ -376,7 +377,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
pix
[
k
]
=
pi
->
x
[
k
]
-
shift
[
k
];
hi
=
pi
->
h
;
hi2
=
hi
*
hi
;
di
=
hi
+
pix
[
0
]
*
runner_shift
[
3
*
sid
+
0
]
+
pix
[
1
]
*
runner_shift
[
3
*
sid
+
1
]
+
pix
[
2
]
*
runner_shift
[
3
*
sid
+
2
];
di
=
hi
+
dxj
+
pix
[
0
]
*
runner_shift
[
3
*
sid
+
0
]
+
pix
[
1
]
*
runner_shift
[
3
*
sid
+
1
]
+
pix
[
2
]
*
runner_shift
[
3
*
sid
+
2
];
/* Loop over the parts in cj. */
for
(
pjd
=
0
;
pjd
<
count_j
&&
sort_j
[
pjd
].
d
<
di
;
pjd
++
)
{
...
...
@@ -439,7 +440,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
pix
[
k
]
=
pi
->
x
[
k
]
-
shift
[
k
];
hi
=
pi
->
h
;
hi2
=
hi
*
hi
;
di
=
-
hi
+
pix
[
0
]
*
runner_shift
[
3
*
sid
+
0
]
+
pix
[
1
]
*
runner_shift
[
3
*
sid
+
1
]
+
pix
[
2
]
*
runner_shift
[
3
*
sid
+
2
];
di
=
-
hi
-
dxj
+
pix
[
0
]
*
runner_shift
[
3
*
sid
+
0
]
+
pix
[
1
]
*
runner_shift
[
3
*
sid
+
1
]
+
pix
[
2
]
*
runner_shift
[
3
*
sid
+
2
];
/* Loop over the parts in cj. */
for
(
pjd
=
count_j
-
1
;
pjd
>=
0
&&
di
<
sort_j
[
pjd
].
d
;
pjd
--
)
{
...
...
@@ -633,7 +634,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
struct
cpart
*
restrict
cpi
,
*
restrict
cparts_i
;
struct
cpart
*
restrict
cpj
,
*
restrict
cparts_j
;
double
pix
[
3
],
pjx
[
3
],
di
,
dj
;
float
dx
[
3
],
hi
,
hi2
,
hj
,
hj2
,
r2
;
float
dx
[
3
],
hi
,
hi2
,
hj
,
hj2
,
r2
,
dx_max
;
double
hi_max
,
hj_max
;
double
di_max
,
dj_min
;
int
count_i
,
count_j
;
...
...
@@ -674,13 +675,14 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
cparts_i
=
ci
->
cparts
;
cparts_j
=
cj
->
cparts
;
di_max
=
sort_i
[
count_i
-
1
].
d
-
rshift
;
dj_min
=
sort_j
[
0
].
d
;
dx_max
=
(
ci
->
dx_max
+
cj
->
dx_max
);
/* if ( ci->split && cj->split && sid == 4 )
printf( "boing!\n" ); */
/* Loop over the parts in ci. */
for
(
pid
=
count_i
-
1
;
pid
>=
0
&&
sort_i
[
pid
].
d
+
hi_max
>
dj_min
;
pid
--
)
{
for
(
pid
=
count_i
-
1
;
pid
>=
0
&&
sort_i
[
pid
].
d
+
hi_max
+
dx_max
>
dj_min
;
pid
--
)
{
/* Get a hold of the ith part in ci. */
pi
=
&
parts_i
[
sort_i
[
pid
].
i
];
...
...
@@ -688,7 +690,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
if
(
cpi
->
dt
>
dt_step
)
continue
;
hi
=
cpi
->
h
;
di
=
sort_i
[
pid
].
d
+
hi
-
rshift
;
di
=
sort_i
[
pid
].
d
+
hi
+
dx_max
-
rshift
;
if
(
di
<
dj_min
)
continue
;
...
...
@@ -747,7 +749,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
tic = getticks(); */
/* Loop over the parts in cj. */
for
(
pjd
=
0
;
pjd
<
count_j
&&
sort_j
[
pjd
].
d
-
hj_max
<
di_max
;
pjd
++
)
{
for
(
pjd
=
0
;
pjd
<
count_j
&&
sort_j
[
pjd
].
d
-
hj_max
-
dx_max
<
di_max
;
pjd
++
)
{
/* Get a hold of the jth part in cj. */
pj
=
&
parts_j
[
sort_j
[
pjd
].
i
];
...
...
@@ -755,7 +757,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
if
(
cpj
->
dt
>
dt_step
)
continue
;
hj
=
cpj
->
h
;
dj
=
sort_j
[
pjd
].
d
-
hj
-
rshift
;
dj
=
sort_j
[
pjd
].
d
-
hj
-
dx_max
-
rshift
;
if
(
dj
>
di_max
)
continue
;
...
...
@@ -838,7 +840,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
struct
cpart
*
restrict
cpi
,
*
restrict
cparts_i
;
struct
cpart
*
restrict
cpj
,
*
restrict
cparts_j
;
double
pix
[
3
],
pjx
[
3
],
di
,
dj
;
float
dx
[
3
],
hi
,
hi2
,
hj
,
hj2
,
r2
;
float
dx
[
3
],
hi
,
hi2
,
hj
,
hj2
,
r2
,
dx_max
;
double
hi_max
,
hj_max
;
double
di_max
,
dj_min
;
int
count_i
,
count_j
;
...
...
@@ -885,6 +887,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
cparts_i
=
ci
->
cparts
;
cparts_j
=
cj
->
cparts
;
di_max
=
sort_i
[
count_i
-
1
].
d
-
rshift
;
dj_min
=
sort_j
[
0
].
d
;
dx_max
=
(
ci
->
dx_max
+
cj
->
dx_max
);
/* Collect the number of parts left and right below dt. */
if
(
ci
->
dt_max
<=
dt_step
)
{
...
...
@@ -915,13 +918,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
}
/* Loop over the parts in ci. */
for
(
pid
=
count_i
-
1
;
pid
>=
0
&&
sort_i
[
pid
].
d
+
hi_max
>
dj_min
;
pid
--
)
{
for
(
pid
=
count_i
-
1
;
pid
>=
0
&&
sort_i
[
pid
].
d
+
hi_max
+
dx_max
>
dj_min
;
pid
--
)
{
/* Get a hold of the ith part in ci. */
pi
=
&
parts_i
[
sort_i
[
pid
].
i
];
cpi
=
&
cparts_i
[
sort_i
[
pid
].
i
];
hi
=
cpi
->
h
;
di
=
sort_i
[
pid
].
d
+
hi
-
rshift
;
di
=
sort_i
[
pid
].
d
+
hi
+
dx_max
-
rshift
;
if
(
di
<
dj_min
)
continue
;
...
...
@@ -1065,13 +1068,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
tic = getticks(); */
/* Loop over the parts in cj. */
for
(
pjd
=
0
;
pjd
<
count_j
&&
sort_j
[
pjd
].
d
-
hj_max
<
di_max
;
pjd
++
)
{
for
(
pjd
=
0
;
pjd
<
count_j
&&
sort_j
[
pjd
].
d
-
hj_max
-
dx_max
<
di_max
;
pjd
++
)
{
/* Get a hold of the jth part in cj. */
pj
=
&
parts_j
[
sort_j
[
pjd
].
i
];
cpj
=
&
cparts_j
[
sort_j
[
pjd
].
i
];
hj
=
cpj
->
h
;
dj
=
sort_j
[
pjd
].
d
-
hj
-
rshift
;
dj
=
sort_j
[
pjd
].
d
-
hj
-
dx_max
-
rshift
;
if
(
dj
>
di_max
)
continue
;
...
...
src/runner_iact.h
View file @
d48d9285
...
...
@@ -68,7 +68,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
pi
->
rho
+=
pj
->
mass
*
wi
;
pi
->
rho_dh
-=
pj
->
mass
*
(
3
.
0
*
wi
+
xi
*
wi_dx
);
pi
->
wcount
+=
wi
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
pi
->
density
.
wcount
+=
wi
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
pi
->
density
.
wcount_dh
-=
xi
*
h_inv
*
wi_dx
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
// pi->icount += 1;
...
...
@@ -87,7 +87,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
pj
->
rho
+=
pi
->
mass
*
wj
;
pj
->
rho_dh
-=
pi
->
mass
*
(
3
.
0
*
wj
+
xj
*
wj_dx
);
pj
->
wcount
+=
wj
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
pj
->
density
.
wcount
+=
wj
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
pj
->
density
.
wcount_dh
-=
xj
*
h_inv
*
wj_dx
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
// pj->icount += 1;
...
...
@@ -192,14 +192,14 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_density ( flo
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
rho
+=
rhoi
.
f
[
k
];
pi
[
k
]
->
rho_dh
-=
rhoi_dh
.
f
[
k
];
pi
[
k
]
->
wcount
+=
wcounti
.
f
[
k
];
pi
[
k
]
->
density
.
wcount
+=
wcounti
.
f
[
k
];
pi
[
k
]
->
density
.
wcount_dh
-=
wcounti_dh
.
f
[
k
];
pi
[
k
]
->
density
.
div_v
+=
div_vi
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
pi
[
k
]
->
density
.
curl_v
[
j
]
+=
curl_vi
[
j
].
f
[
k
];
pj
[
k
]
->
rho
+=
rhoj
.
f
[
k
];
pj
[
k
]
->
rho_dh
-=
rhoj_dh
.
f
[
k
];
pj
[
k
]
->
wcount
+=
wcountj
.
f
[
k
];
pj
[
k
]
->
density
.
wcount
+=
wcountj
.
f
[
k
];
pj
[
k
]
->
density
.
wcount_dh
-=
wcountj_dh
.
f
[
k
];
pj
[
k
]
->
density
.
div_v
+=
div_vj
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
...
...
@@ -251,7 +251,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
pi
->
rho
+=
pj
->
mass
*
wi
;
pi
->
rho_dh
-=
pj
->
mass
*
(
3
.
0
*
wi
+
xi
*
wi_dx
);
pi
->
wcount
+=
wi
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
pi
->
density
.
wcount
+=
wi
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
pi
->
density
.
wcount_dh
-=
xi
*
h_inv
*
wi_dx
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
// pi->icount += 1;
...
...
@@ -329,7 +329,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_densit
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
rho
+=
rhoi
.
f
[
k
];
pi
[
k
]
->
rho_dh
-=
rhoi_dh
.
f
[
k
];
pi
[
k
]
->
wcount
+=
wcounti
.
f
[
k
];
pi
[
k
]
->
density
.
wcount
+=
wcounti
.
f
[
k
];
pi
[
k
]
->
density
.
wcount_dh
-=
wcounti_dh
.
f
[
k
];
pi
[
k
]
->
density
.
div_v
+=
div_vi
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
...
...
src/space.c
View file @
d48d9285
...
...
@@ -133,7 +133,7 @@ int space_marktasks ( struct space *s ) {
/* Too much particle movement? */
if
(
!
t
->
skip
&&
t
->
tight
&&
(
t
->
ci
->
dx_max
>
t
->
ci
->
dmin
||
t
->
cj
->
dx_max
>
t
->
cj
->
dmin
)
)
(
t
->
ci
->
h2
dx_max
>
t
->
ci
->
dmin
||
t
->
cj
->
h2
dx_max
>
t
->
cj
->
dmin
)
)
return
1
;
/* Set the sort flags. */
...
...
@@ -176,94 +176,6 @@ int space_marktasks ( struct space *s ) {
}
/**
* @brief Mapping function to set dt_min and dt_max.
*/
void
space_map_prepare
(
struct
cell
*
c
,
void
*
data
)
{
int
k
;
float
dt_min
,
dt_max
,
h_max
,
dx_max
;
struct
part
*
restrict
p
;
struct
xpart
*
restrict
xp
;
struct
cpart
*
restrict
cp
;
/* No children? */
if
(
!
c
->
split
)
{
/* Init with first part. */
p
=
&
c
->
parts
[
0
];
xp
=
p
->
xtras
;
cp
=
&
c
->
cparts
[
0
];
dt_min
=
p
->
dt
;
dt_max
=
p
->
dt
;
h_max
=
p
->
h
;
dx_max
=
sqrtf
(
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
)
*
2
+
p
->
h
;
cp
->
x
[
0
]
=
p
->
x
[
0
];
cp
->
x
[
1
]
=
p
->
x
[
1
];
cp
->
x
[
2
]
=
p
->
x
[
2
];
cp
->
h
=
p
->
h
;
cp
->
dt
=
p
->
dt
;
/* Loop over parts. */
for
(
k
=
1
;
k
<
c
->
count
;
k
++
)
{
p
=
&
c
->
parts
[
k
];
xp
=
p
->
xtras
;
cp
=
&
c
->
cparts
[
k
];
dt_min
=
fminf
(
dt_min
,
p
->
dt
);
dt_max
=
fmaxf
(
dt_max
,
p
->
dt
);
h_max
=
fmaxf
(
h_max
,
p
->
h
);
dx_max
=
fmaxf
(
dx_max
,
sqrtf
(
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
)
*
2
+
p
->
h
);
cp
->
x
[
0
]
=
p
->
x
[
0
];
cp
->
x
[
1
]
=
p
->
x
[
1
];
cp
->
x
[
2
]
=
p
->
x
[
2
];
cp
->
h
=
p
->
h
;
cp
->
dt
=
p
->
dt
;
}
}
/* Otherwise, agregate from children. */
else
{
/* Init with the first non-null child. */
for
(
k
=
0
;
c
->
progeny
[
k
]
==
NULL
;
k
++
);