Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
6f6f3f54
Commit
6f6f3f54
authored
Feb 12, 2013
by
Pedro Gonnet
Browse files
several fixes to dopair2 and doself2 for multiple time-stepping.
Former-commit-id: fa77a1b6804434b679dc88ffed39f6b96ceaa5f4
parent
3f62f44a
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
src/Makefile.am
View file @
6f6f3f54
...
...
@@ -20,11 +20,11 @@
AUTOMAKE_OPTIONS
=
gnu
# Add the debug flag to the whole thing
# AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
# -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
AM_CFLAGS
=
-Wall
-Werror
$(OPENMP_CFLAGS)
\
AM_CFLAGS
=
-g
-O3
-Wall
-Werror
-ffast-math
-fstrict-aliasing
-ftree-vectorize
\
-funroll-loops
$(SIMD_FLAGS)
$(OPENMP_CFLAGS)
\
-DTIMER
-DCOUNTER
-DCPU_TPS
=
2.67e9
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# Assign a "safe" version number
AM_LDFLAGS
=
$(LAPACK_LIBS)
$(BLAS_LIBS)
$(HDF5_LDFLAGS)
-version-info
0:0:0
...
...
src/debug.c
View file @
6f6f3f54
/*******************************************************************************
* This file is part of SWIFT.
* Coypright (c) 2012 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
* Coypright (c) 2013 Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
...
...
@@ -29,7 +30,7 @@ void printParticle ( struct part *parts , long long int id ) {
/* Look for the particle. */
for
(
i
=
0
;
parts
[
i
].
id
!=
id
;
i
++
);
printf
(
"## Particle[%d]: id=%lld, x=
(
%f,%f,%f
)
, v=
(
%f,%f,%f
)
, a=
(
%f,%f,%f
)
, h=%
f
, h_dt=%
f
, wcount=%f, m=%
f
, rho=%f, u=%f, dudt=%f, dt=%.3e
\n
"
,
printf
(
"## Particle[%d]: id=%lld, x=
[
%f,%f,%f
]
, v=
[
%f,%f,%f
]
, a=
[
%f,%f,%f
]
, h=%
.3e
, h_dt=%
.3e
, wcount=%f, m=%
.3e
, rho=%f, u=%f, dudt=%f, dt=%.3e
\n
"
,
i
,
parts
[
i
].
id
,
parts
[
i
].
x
[
0
],
parts
[
i
].
x
[
1
],
parts
[
i
].
x
[
2
],
...
...
src/engine.c
View file @
6f6f3f54
...
...
@@ -66,7 +66,7 @@ void engine_prepare ( struct engine *e ) {
int
j
,
k
,
qid
;
struct
space
*
s
=
e
->
s
;
struct
queue
*
q
;
float
dt_
max
=
e
->
dt_
max
;
float
dt_
step
=
e
->
dt_
step
;
TIMER_TIC
...
...
@@ -95,7 +95,7 @@ void engine_prepare ( struct engine *e ) {
// tic = getticks();
#pragma omp parallel for schedule(static)
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
if
(
s
->
parts
[
k
].
dt
<=
dt_
max
)
{
if
(
s
->
parts
[
k
].
dt
<=
dt_
step
)
{
s
->
parts
[
k
].
wcount
=
0
.
0
f
;
s
->
parts
[
k
].
wcount_dh
=
0
.
0
f
;
s
->
parts
[
k
].
rho
=
0
.
0
f
;
...
...
@@ -179,71 +179,70 @@ void engine_step ( struct engine *e , int sort_queues ) {
int
k
,
nr_parts
=
e
->
s
->
nr_parts
;
struct
part
*
restrict
parts
=
e
->
s
->
parts
,
*
restrict
p
;
floa
t
*
restrict
v_bar
;
float
dt
=
e
->
dt
,
hdt
=
0
.
5
*
dt
,
dt_max
,
dt_min
,
ldt_min
,
ldt_max
;
double
e
t
ot
=
0
.
0
,
le
t
ot
,
lmom
[
3
],
mom
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
int
threadID
,
nthreads
;
struct
xpar
t
*
restrict
xp
;
float
dt
=
e
->
dt
,
hdt
=
0
.
5
*
dt
,
dt_step
,
dt_max
,
dt_min
,
ldt_min
,
ldt_max
;
double
e
p
ot
=
0
.
0
,
ekin
=
0
.
0
,
le
p
ot
,
lekin
,
lmom
[
3
],
mom
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
int
threadID
,
nthreads
,
count
=
0
,
lcount
;
// #ifdef __SSE2__
// VEC_MACRO(4,float) hdtv = _mm_set1_ps( hdt );
// #endif
/* Get the maximum dt. */
dt_
max
=
2
.
0
f
*
dt
;
dt_
step
=
2
.
0
f
*
dt
;
for
(
k
=
0
;
k
<
32
&&
(
e
->
step
&
(
1
<<
k
))
==
0
;
k
++
)
dt_
max
*=
2
;
dt_max
=
1
;
dt_
step
*=
2
;
// dt_step = FLT_MAX
;
/* Set the maximum dt. */
e
->
dt_
max
=
dt_
max
;
e
->
s
->
dt_
max
=
dt_
max
;
printf
(
"engine_step: dt_
max
set to %.3e.
\n
"
,
dt_
max
);
fflush
(
stdout
);
e
->
dt_
step
=
dt_
step
;
e
->
s
->
dt_
step
=
dt_
step
;
printf
(
"engine_step: dt_
step
set to %.3e.
\n
"
,
dt_
step
);
fflush
(
stdout
);
/* Allocate a buffer for the old velocities. */
if
(
posix_memalign
(
(
void
**
)
&
v_bar
,
16
,
sizeof
(
float
)
*
nr_parts
*
4
)
!=
0
)
error
(
"Failed to allocate v_old buffer."
);
/* First kick. */
TIMER_TIC
//
#pragma omp parallel for schedule(static) private(p)
#pragma omp parallel for schedule(static) private(p
,xp
)
for
(
k
=
0
;
k
<
nr_parts
;
k
++
)
{
/* Get a handle on the part. */
p
=
&
parts
[
k
];
xp
=
p
->
xtras
;
/* Step and store the velocity and internal energy. */
// #ifdef __SSE__
// _mm_store_ps( &v_bar[4*k] ,
_mm_add_ps(
_mm_load_ps( &p->v[0] )
, _mm_mul_ps(
hdtv
,
_mm_load_ps( &p->a[0] )
) )
);
// _mm_store_ps( &v_bar[4*k] , _mm_load_ps( &p->v[0] )
+
hdtv
*
_mm_load_ps( &p->a[0] ) );
// #else
v_bar
[
4
*
k
+
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
v_bar
[
4
*
k
+
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
v_bar
[
4
*
k
+
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
xp
->
v_old
[
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
xp
->
v_old
[
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
xp
->
v_old
[
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
// #endif
v_bar
[
4
*
k
+
3
]
=
p
->
u
+
hdt
*
p
->
u_dt
;
// xp->u_old = fmaxf( p->u + hdt * p->u_dt , FLT_EPSILON );
xp
->
u_old
=
p
->
u
+
hdt
*
p
->
u_dt
;
/* Move the particles with the velocitie at the half-step. */
p
->
x
[
0
]
+=
dt
*
v_bar
[
4
*
k
+
0
];
p
->
x
[
1
]
+=
dt
*
v_bar
[
4
*
k
+
1
];
p
->
x
[
2
]
+=
dt
*
v_bar
[
4
*
k
+
2
];
p
->
x
[
0
]
+=
dt
*
xp
->
v_old
[
0
];
p
->
x
[
1
]
+=
dt
*
xp
->
v_old
[
1
];
p
->
x
[
2
]
+=
dt
*
xp
->
v_old
[
2
];
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
+=
dt
*
p
->
a
[
0
];
p
->
v
[
1
]
+=
dt
*
p
->
a
[
1
];
p
->
v
[
2
]
+=
dt
*
p
->
a
[
2
];
p
->
u
*=
expf
(
p
->
u_dt
/
p
->
u
*
dt
);
//
p->h *= expf( p->h_dt / p->h * dt );
p
->
h
*=
expf
(
p
->
h_dt
/
p
->
h
*
dt
);
/* Integrate other values if this particle will not be updated. */
//
if ( p->dt > dt_
max
) {
//
p->rho *= expf( -3.0f * p->h_dt / p->h * dt );
//
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
//
}
if
(
p
->
dt
>
dt_
step
)
{
p
->
rho
*=
expf
(
-
3
.
0
f
*
p
->
h_dt
/
p
->
h
*
dt
);
p
->
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
p
->
h
*
p
->
rho_dh
/
3
.
0
f
);
}
}
TIMER_TOC
(
timer_kick1
);
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 494849 );
/* Prepare the space. */
engine_prepare
(
e
);
...
...
@@ -274,44 +273,52 @@ void engine_step ( struct engine *e , int sort_queues ) {
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 494849 );
/* Second kick. */
TIMER_TIC_ND
dt_min
=
FLT_MAX
;
dt_max
=
0
.
0
f
;
#pragma omp parallel private(p,k,ldt_min,ldt_max,lmom,le
t
ot,threadID,nthreads)
#pragma omp parallel private(p,
xp,
k,ldt_min,
lcount,
ldt_max,lmom,le
kin,lep
ot,threadID,nthreads)
{
threadID
=
omp_get_thread_num
();
nthreads
=
omp_get_num_threads
();
ldt_min
=
FLT_MAX
;
ldt_max
=
0
.
0
f
;
lmom
[
0
]
=
0
.
0
;
lmom
[
1
]
=
0
.
0
;
lmom
[
2
]
=
0
.
0
;
le
tot
=
0
.
0
;
le
kin
=
0
.
0
;
lepot
=
0
.
0
;
lcount
=
0
;
for
(
k
=
nr_parts
*
threadID
/
nthreads
;
k
<
nr_parts
*
(
threadID
+
1
)
/
nthreads
;
k
++
)
{
/* Get a handle on the part. */
p
=
&
parts
[
k
];
xp
=
p
->
xtras
;
/* Scale the derivatives if they're freshly computed. */
if
(
p
->
dt
<=
dt_
max
)
{
if
(
p
->
dt
<=
dt_
step
)
{
p
->
u_dt
*=
p
->
POrho2
;
p
->
h_dt
*=
p
->
h
*
0
.
333333333
f
;
lcount
+=
1
;
}
/* Update the particle's time step. */
p
->
dt
=
const_cfl
*
p
->
h
/
(
p
->
c
+
p
->
v_sig
);
/* Update positions and energies at the half-step. */
// #ifdef __SSE__
// _mm_store_ps( &p->v[0] ,
_mm_add_ps(
_mm_load_ps( &v_bar[4*k] )
, _mm_mul_ps(
hdtv
,
_mm_load_ps( &p->a[0] )
) )
);
// _mm_store_ps( &p->v[0] , _mm_load_ps( &v_bar[4*k] )
+
hdtv
*
_mm_load_ps( &p->a[0] ) );
// #else
p
->
v
[
0
]
=
v_bar
[
4
*
k
+
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
1
]
=
v_bar
[
4
*
k
+
1
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
2
]
=
v_bar
[
4
*
k
+
2
]
+
hdt
*
p
->
a
[
2
];
p
->
v
[
0
]
=
xp
->
v_old
[
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
1
]
=
xp
->
v_old
[
1
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
2
]
=
xp
->
v_old
[
2
]
+
hdt
*
p
->
a
[
2
];
// #endif
p
->
u
=
v_bar
[
4
*
k
+
3
]
+
hdt
*
p
->
u_dt
;
// p->u = fmaxf( xp->u_old + hdt * p->u_dt , FLT_EPSILON );
p
->
u
=
xp
->
u_old
+
hdt
*
p
->
u_dt
;
/* Get the smallest/largest dt. */
ldt_min
=
fminf
(
ldt_min
,
p
->
dt
);
ldt_max
=
fmaxf
(
ldt_max
,
p
->
dt
);
/* Collect total energy. */
letot
+=
0
.
5
*
p
->
mass
*
(
p
->
v
[
0
]
*
p
->
v
[
0
]
+
p
->
v
[
1
]
*
p
->
v
[
1
]
+
p
->
v
[
2
]
*
p
->
v
[
2
]
)
+
p
->
mass
*
p
->
u
;
lekin
+=
0
.
5
*
p
->
mass
*
(
p
->
v
[
0
]
*
p
->
v
[
0
]
+
p
->
v
[
1
]
*
p
->
v
[
1
]
+
p
->
v
[
2
]
*
p
->
v
[
2
]
);
lepot
+=
p
->
mass
*
p
->
u
;
/* Collect momentum */
lmom
[
0
]
+=
p
->
mass
*
p
->
v
[
0
];
...
...
@@ -326,31 +333,33 @@ void engine_step ( struct engine *e , int sort_queues ) {
mom
[
0
]
+=
lmom
[
0
];
mom
[
1
]
+=
lmom
[
1
];
mom
[
2
]
+=
lmom
[
2
];
etot
+=
letot
;
epot
+=
lepot
;
ekin
+=
lekin
;
count
+=
lcount
;
}
}
TIMER_TOC
(
timer_kick2
);
e
->
dt_min
=
dt_min
;
printf
(
"engine_step: dt_min/dt_max is %e/%e.
\n
"
,
dt_min
,
dt_max
);
fflush
(
stdout
);
printf
(
"engine_step: etot is %e
.
\n
"
,
et
ot
);
fflush
(
stdout
);
printf
(
"engine_step: etot is %e
(ekin=%e, epot=%e).
\n
"
,
ekin
+
epot
,
ekin
,
ep
ot
);
fflush
(
stdout
);
printf
(
"engine_step: total momentum is [ %e , %e , %e ].
\n
"
,
mom
[
0
]
,
mom
[
1
]
,
mom
[
2
]
);
fflush
(
stdout
);
printf
(
"engine_step: updated %i parts (dt_step=%.3e).
\n
"
,
count
,
dt_step
);
fflush
(
stdout
);
/* Increase the step counter. */
e
->
step
+=
1
;
/* Does the time step need adjusting? */
if
(
dt_min
<
e
->
dt
)
{
while
(
dt_min
<
e
->
dt
)
{
e
->
dt
*=
0
.
5
;
e
->
step
*=
2
;
printf
(
"engine_step: dt_min dropped below time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
}
else
if
(
dt_min
>
2
*
e
->
dt
)
{
while
(
dt_min
>
2
*
e
->
dt
&&
(
e
->
step
&
1
)
==
0
)
{
e
->
dt
*=
2
.
0
;
e
->
step
/=
2
;
printf
(
"engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
}
/* Clean up. */
free
(
v_bar
);
/* Increase the step counter. */
e
->
step
+=
1
;
}
...
...
src/engine.h
View file @
6f6f3f54
...
...
@@ -50,8 +50,10 @@ struct engine {
/* The queues. */
struct
queue
*
queues
;
/* The maximum dt to step. */
float
dt_max
;
/* The maximum dt to step (current). */
float
dt_step
;
/* The minimum dt over all particles in the system. */
float
dt_min
;
/* The system time step. */
...
...
src/part.h
View file @
6f6f3f54
...
...
@@ -38,6 +38,21 @@ struct cpart {
}
__attribute__
((
aligned
(
32
)));
/* Extra particle data not needed during the computation. */
struct
xpart
{
/* Old position, at last tree rebuild. */
double
x_old
[
3
];
/* Old velocity. */
float
v_old
[
3
];
/* Old entropy. */
float
u_old
;
}
__attribute__
((
aligned
(
32
)));
/* Data of a single particle. */
struct
part
{
...
...
@@ -66,6 +81,9 @@ struct part {
/* Particle acceleration. */
float
a
[
3
]
__attribute__
((
aligned
(
16
)));
/* Maximum neighbouring u. */
float
c
,
v_sig
;
/* Derivative of the density with respect to this particle's smoothing length. */
float
rho_dh
;
...
...
@@ -80,8 +98,8 @@ struct part {
/* Particle ID. */
unsigned
long
long
id
;
/*
Old position, at last tree rebuild
. */
double
x_old
[
3
]
;
/*
Pointer to extra particle data
. */
struct
xpart
*
xtras
;
/* Particle position. */
double
x
[
3
];
...
...
src/runner.c
View file @
6f6f3f54
...
...
@@ -329,8 +329,8 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
struct
cell
*
finger
;
int
i
,
k
,
redo
,
count
=
c
->
count
;
int
*
pid
;
float
ihg
,
ihg2
;
float
dt_
max
=
r
->
e
->
dt_
max
;
float
ihg
,
ihg2
,
h_corr
;
float
dt_
step
=
r
->
e
->
dt_
step
;
TIMER_TIC
/* Recurse? */
...
...
@@ -361,7 +361,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
cp
=
&
c
->
cparts
[
pid
[
i
]
];
/* Is this part within the timestep? */
if
(
cp
->
dt
<=
dt_
max
)
{
if
(
cp
->
dt
<=
dt_
step
)
{
/* Adjust the computed rho. */
ihg
=
kernel_igamma
/
p
->
h
;
...
...
@@ -370,8 +370,15 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
p
->
rho_dh
*=
ihg2
*
ihg2
;
p
->
wcount
+=
kernel_wroot
;
/* Update the smoothing length. */
p
->
h
-=
(
p
->
wcount
-
const_nwneigh
)
/
p
->
wcount_dh
;
/* Compute the smoothing length update (Newton step). */
h_corr
=
(
const_nwneigh
-
p
->
wcount
)
/
p
->
wcount_dh
;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr
=
fminf
(
h_corr
,
p
->
h
);
h_corr
=
fmaxf
(
h_corr
,
-
p
->
h
/
2
);
/* Apply the correction to p->h. */
p
->
h
+=
h_corr
;
cp
->
h
=
p
->
h
;
/* Did we get the right number density? */
...
...
@@ -389,8 +396,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
}
/* Compute this particle's time step. */
p
->
dt
=
const_cfl
*
p
->
h
/
sqrtf
(
const_gamma
*
(
const_gamma
-
1
.
0
f
)
*
p
->
u
);
cp
->
dt
=
p
->
dt
;
p
->
c
=
sqrtf
(
const_gamma
*
(
const_gamma
-
1
.
0
f
)
*
p
->
u
);
/* Compute the pressure. */
// p->P = p->rho * p->u * ( const_gamma - 1.0f );
...
...
@@ -405,6 +411,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Reset the time derivatives. */
p
->
u_dt
=
0
.
0
f
;
p
->
h_dt
=
0
.
0
f
;
p
->
v_sig
=
0
.
0
f
;
}
...
...
src/runner_doiact.h
View file @
6f6f3f54
This diff is collapsed.
Click to expand it.
src/runner_iact.h
View file @
6f6f3f54
...
...
@@ -329,26 +329,26 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
float
r
=
sqrtf
(
r2
),
ri
=
1
.
0
f
/
r
;
float
xi
,
xj
;
float
hi
g
_inv
,
hi
g
2_inv
;
float
hj
g
_inv
,
hj
g
2_inv
;
float
hi_inv
,
hi2_inv
;
float
hj_inv
,
hj2_inv
;
float
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
w
,
dvdr
;
float
f
;
int
k
;
/* Get the kernel for hi. */
hi
g
_inv
=
kernel_igamma
/
hi
;
hi
g
2_inv
=
hi
g
_inv
*
hi
g
_inv
;
xi
=
r
*
hi
g
_inv
;
hi_inv
=
1
.
0
f
/
hi
;
hi2_inv
=
hi_inv
*
hi_inv
;
xi
=
r
*
hi_inv
*
kernel_igamma
;
kernel_deval
(
xi
,
&
wi
,
&
wi_dx
);
wi_dr
=
hi
g
2_inv
*
hi
g
2_inv
*
wi_dx
;
wi_dr
=
hi2_inv
*
hi2_inv
*
wi_dx
;
/* Get the kernel for hj. */
hj
g
_inv
=
kernel_igamma
/
hj
;
hj
g
2_inv
=
hj
g
_inv
*
hj
g
_inv
;
xj
=
r
*
hj
g
_inv
;
hj_inv
=
1
.
0
f
/
hj
;
hj2_inv
=
hj_inv
*
hj_inv
;
xj
=
r
*
hj_inv
*
kernel_igamma
;
kernel_deval
(
xj
,
&
wj
,
&
wj_dx
);
wj_dr
=
hj
g
2_inv
*
hj
g
2_inv
*
wj_dx
;
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
/* Get the common factor out. */
w
=
ri
*
(
pi
->
POrho2
*
wi_dr
+
pj
->
POrho2
*
wj_dr
);
...
...
@@ -370,7 +370,11 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
/* Get the time derivative for h. */
pi
->
h_dt
-=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
pj
->
h_dt
-=
pi
->
mass
/
pi
->
rho
*
dvdr
*
wj_dr
;
/* Update the signal velocity. */
pi
->
v_sig
=
fmaxf
(
pi
->
v_sig
,
pj
->
c
-
3
*
dvdr
);
pj
->
v_sig
=
fmaxf
(
pj
->
v_sig
,
pi
->
c
-
3
*
dvdr
);
#ifdef HIST
if
(
hi
>
hj
)
runner_hist_hit
(
hi
/
hj
);
...
...
@@ -388,8 +392,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
vector
r
,
r2
,
ri
;
vector
xi
,
xj
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hig_inv
,
hi
g
2_inv
;
vector
hjg_inv
,
hj
g
2_inv
;
vector
hig_inv
,
hi2_inv
;
vector
hjg_inv
,
hj2_inv
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
w
;
vector
piPOrho2
,
pjPOrho2
,
pirho
,
pjrho
;
...
...
@@ -444,20 +448,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi
.
v
*
hi_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hig_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hi_inv
.
v
;
hi
g
2_inv
.
v
=
hi
g
_inv
.
v
*
hi
g
_inv
.
v
;
hi2_inv
.
v
=
hi_inv
.
v
*
hi_inv
.
v
;
xi
.
v
=
r
.
v
*
hig_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
wi_dr
.
v
=
hi
g
2_inv
.
v
*
hi
g
2_inv
.
v
*
wi_dx
.
v
;
wi_dr
.
v
=
hi2_inv
.
v
*
hi2_inv
.
v
*
wi_dx
.
v
;
/* Get the kernel for hj. */
hj
.
v
=
vec_load
(
Hj
);
hj_inv
.
v
=
vec_rcp
(
hj
.
v
);
hj_inv
.
v
=
hj_inv
.
v
-
hj_inv
.
v
*
(
hj
.
v
*
hj_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hjg_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hj_inv
.
v
;
hj
g
2_inv
.
v
=
hj
g
_inv
.
v
*
hj
g
_inv
.
v
;
hj2_inv
.
v
=
hj_inv
.
v
*
hj_inv
.
v
;
xj
.
v
=
r
.
v
*
hjg_inv
.
v
;
kernel_deval_vec
(
&
xj
,
&
wj
,
&
wj_dx
);
wj_dr
.
v
=
hj
g
2_inv
.
v
*
hj
g
2_inv
.
v
*
wj_dx
.
v
;
wj_dr
.
v
=
hj2_inv
.
v
*
hj2_inv
.
v
*
wj_dx
.
v
;
/* Get the common factor out. */
w
.
v
=
ri
.
v
*
(
piPOrho2
.
v
*
wi_dr
.
v
+
pjPOrho2
.
v
*
wj_dr
.
v
);
...
...
@@ -508,25 +512,25 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
float
r
=
sqrtf
(
r2
),
ri
=
1
.
0
f
/
r
;
float
xi
,
xj
;
float
hi
g
_inv
,
hi
g
2_inv
;
float
hj
g
_inv
,
hj
g
2_inv
;
float
hi_inv
,
hi2_inv
;
float
hj_inv
,
hj2_inv
;
float
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
w
,
dvdr
;
float
f
;
int
k
;
/* Get the kernel for hi. */
hi
g
_inv
=
kernel_igamma
/
hi
;
hi
g
2_inv
=
hi
g
_inv
*
hi
g
_inv
;
xi
=
r
*
hi
g
_inv
;
hi_inv
=
1
.
0
f
/
hi
;
hi2_inv
=
hi_inv
*
hi_inv
;
xi
=
r
*
hi_inv
*
kernel_igamma
;
kernel_deval
(
xi
,
&
wi
,
&
wi_dx
);
wi_dr
=
hi
g
2_inv
*
hi
g
2_inv
*
wi_dx
;
wi_dr
=
hi2_inv
*
hi2_inv
*
wi_dx
;
/* Get the kernel for hj. */
hj
g
_inv
=
kernel_igamma
/
hj
;
hj
g
2_inv
=
hj
g
_inv
*
hj
g
_inv
;
xj
=
r
*
hj
g
_inv
;
hj_inv
=
1
.
0
f
/
hj
;
hj2_inv
=
hj_inv
*
hj_inv
;
xj
=
r
*
hj_inv
*
kernel_igamma
;
kernel_deval
(
xj
,
&
wj
,
&
wj_dx
);
wj_dr
=
hj
g
2_inv
*
hj
g
2_inv
*
wj_dx
;
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
/* Get the common factor out. */
w
=
ri
*
(
pi
->
POrho2
*
wi_dr
+
pj
->
POrho2
*
wj_dr
);
...
...
@@ -547,6 +551,10 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
/* Get the time derivative for h. */
pi
->
h_dt
-=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
/* Update the signal velocity (this is always symmetrical). */
pi
->
v_sig
=
fmaxf
(
pi
->
v_sig
,
pj
->
c
-
3
*
dvdr
);
pj
->
v_sig
=
fmaxf
(
pj
->
v_sig
,
pi
->
c
-
3
*
dvdr
);
}
...
...
@@ -557,8 +565,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
vector
r
,
r2
,
ri
;
vector
xi
,
xj
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hig_inv
,
hi
g
2_inv
;
vector
hjg_inv
,
hj
g
2_inv
;
vector
hig_inv
,
hi2_inv
;
vector
hjg_inv
,
hj2_inv
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
w
;
vector
piPOrho2
,
pjPOrho2
,
pjrho
;
...
...
@@ -609,20 +617,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi
.
v
*
hi_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hig_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hi_inv
.
v
;
hi
g
2_inv
.
v
=
hi
g
_inv
.
v
*
hi
g
_inv
.
v
;
hi2_inv
.
v
=
hi_inv
.
v
*
hi_inv
.
v
;
xi
.
v
=
r
.
v
*
hig_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
wi_dr
.
v
=
hi
g
2_inv
.
v
*
hi
g
2_inv
.
v
*
wi_dx
.
v
;
wi_dr
.
v
=
hi2_inv
.
v
*
hi2_inv
.
v
*
wi_dx
.
v
;
/* Get the kernel for hj. */
hj
.
v
=
vec_load
(
Hj
);
hj_inv
.
v
=
vec_rcp
(
hj
.
v
);
hj_inv
.
v
=
hj_inv
.
v
-
hj_inv
.
v
*
(
hj
.
v
*
hj_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hjg_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hj_inv
.
v
;
hj
g
2_inv
.
v
=
hj
g
_inv
.
v
*
hj
g
_inv
.
v
;
hj2_inv
.
v
=
hj_inv
.
v
*
hj_inv
.
v
;
xj
.
v
=
r
.
v
*
hjg_inv
.
v
;
kernel_deval_vec
(
&
xj
,
&
wj
,
&
wj_dx
);
wj_dr
.
v
=
hj
g
2_inv
.
v
*
hj
g
2_inv
.
v
*
wj_dx
.
v
;
wj_dr
.
v
=
hj2_inv
.
v
*
hj2_inv
.
v
*
wj_dx
.
v
;
/* Get the common factor out. */
w
.
v
=
ri
.
v
*
(
piPOrho2
.
v
*
wi_dr
.
v
+
pjPOrho2
.
v
*
wj_dr
.
v
);
...
...
src/space.c
View file @
6f6f3f54
...
...
@@ -86,28 +86,31 @@ void space_map_prepare ( struct cell *c , void *data ) {
int
k
;
float
dt_min
,
dt_max
,
h_max
,
dx_max
;
struct
part
*
p
;
struct
xpart
*
xp
;
/* No children? */
if
(
!
c
->
split
)
{
/* Init with first part. */
p
=
&
c
->
parts
[
0
];
xp
=
p
->
xtras
;
dt_min
=
p
->
dt
;
dt_max
=
p
->
dt
;
h_max
=
p
->
h
;
dx_max
=
sqrtf
(
(
p
->
x
[
0
]
-
p
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
p
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
p
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
p
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
p
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
p
->
x_old
[
2
])
)
*
2
+
p
->
h
;
dx_max
=
sqrtf
(
(
p
->
x
[
0
]
-
x
p
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
x
p
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
x
p
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
x
p
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
x
p
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
x
p
->
x_old
[
2
])
)
*
2
+
p
->
h
;
/* Loop over parts. */
for
(
k
=
1
;
k
<
c
->
count
;
k
++
)
{
p
=
&
c
->
parts
[
k
];
xp
=
p
->
xtras
;
dt_min
=
fminf
(
dt_min
,
p
->
dt
);
dt_max
=
fmaxf
(
dt_max
,
p
->
dt
);
h_max
=
fmaxf
(
h_max
,
p
->
h
);
dx_max
=
fmaxf
(
dx_max
,
sqrtf
(
(
p
->
x
[
0
]
-
p
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
p
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
p
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
p
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
p
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
p
->
x_old
[
2
])
)
*
2
+
p
->
h
);
dx_max
=
fmaxf
(
dx_max
,
sqrtf
(
(
p
->
x
[
0
]
-
x
p
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
x
p
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
x
p
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
x
p
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
x
p
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
x
p
->
x_old
[
2
])
)
*
2
+
p
->
h
);
}
}
...
...
@@ -161,10 +164,10 @@ void space_prepare ( struct space *s ) {
int
k
;
struct
task
*
t
;
float
dt_
max
=
s
->
dt_
max
,
dx_max
=
0
.
0
f
;
float
dt_
step
=
s
->
dt_
step
,
dx_max
=
0
.
0
f
;
int
counts
[
task_type_count
+
1
];
/* Traverse the cells and set their dt_min and d
t
_max. */
/* Traverse the cells and set their dt_min and d
x
_max. */
space_map_cells_post
(
s
,
1
,
&
space_map_prepare
,
NULL
);
/* Get the maximum displacement in the whole system. */
...
...
@@ -179,9 +182,9 @@ void space_prepare ( struct space *s ) {
t
->
type
==
task_type_self
||
t
->
type
==
task_type_ghost
||
(
t
->
type
==
task_type_sub
&&
t
->
cj
==
NULL
)
)
t
->
skip
=
(
t
->
ci
->
dt_min
>
dt_
max
);
t
->
skip
=
(
t
->
ci
->
dt_min
>
dt_
step
);
else
if
(
t
->
type
==
task_type_pair
||
(
t
->
type
==
task_type_sub
&&
t
->
cj
!=
NULL
)
)
{