Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
6f6f3f54
Commit
6f6f3f54
authored
Feb 12, 2013
by
Pedro Gonnet
Browse files
several fixes to dopair2 and doself2 for multiple time-stepping.
Former-commit-id: fa77a1b6804434b679dc88ffed39f6b96ceaa5f4
parent
3f62f44a
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
src/Makefile.am
View file @
6f6f3f54
...
@@ -20,11 +20,11 @@
...
@@ -20,11 +20,11 @@
AUTOMAKE_OPTIONS
=
gnu
AUTOMAKE_OPTIONS
=
gnu
# Add the debug flag to the whole thing
# Add the debug flag to the whole thing
# AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
AM_CFLAGS
=
-g
-O3
-Wall
-Werror
-ffast-math
-fstrict-aliasing
-ftree-vectorize
\
# -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
-funroll-loops
$(SIMD_FLAGS)
$(OPENMP_CFLAGS)
\
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
AM_CFLAGS
=
-Wall
-Werror
$(OPENMP_CFLAGS)
\
-DTIMER
-DCOUNTER
-DCPU_TPS
=
2.67e9
-DTIMER
-DCOUNTER
-DCPU_TPS
=
2.67e9
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# Assign a "safe" version number
# Assign a "safe" version number
AM_LDFLAGS
=
$(LAPACK_LIBS)
$(BLAS_LIBS)
$(HDF5_LDFLAGS)
-version-info
0:0:0
AM_LDFLAGS
=
$(LAPACK_LIBS)
$(BLAS_LIBS)
$(HDF5_LDFLAGS)
-version-info
0:0:0
...
...
src/debug.c
View file @
6f6f3f54
/*******************************************************************************
/*******************************************************************************
* This file is part of SWIFT.
* This file is part of SWIFT.
* Coypright (c) 2012 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
* Coypright (c) 2013 Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk).
*
*
* This program is free software: you can redistribute it and/or modify
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* it under the terms of the GNU Lesser General Public License as published
...
@@ -29,7 +30,7 @@ void printParticle ( struct part *parts , long long int id ) {
...
@@ -29,7 +30,7 @@ void printParticle ( struct part *parts , long long int id ) {
/* Look for the particle. */
/* Look for the particle. */
for
(
i
=
0
;
parts
[
i
].
id
!=
id
;
i
++
);
for
(
i
=
0
;
parts
[
i
].
id
!=
id
;
i
++
);
printf
(
"## Particle[%d]: id=%lld, x=
(
%f,%f,%f
)
, v=
(
%f,%f,%f
)
, a=
(
%f,%f,%f
)
, h=%
f
, h_dt=%
f
, wcount=%f, m=%
f
, rho=%f, u=%f, dudt=%f, dt=%.3e
\n
"
,
printf
(
"## Particle[%d]: id=%lld, x=
[
%f,%f,%f
]
, v=
[
%f,%f,%f
]
, a=
[
%f,%f,%f
]
, h=%
.3e
, h_dt=%
.3e
, wcount=%f, m=%
.3e
, rho=%f, u=%f, dudt=%f, dt=%.3e
\n
"
,
i
,
i
,
parts
[
i
].
id
,
parts
[
i
].
id
,
parts
[
i
].
x
[
0
],
parts
[
i
].
x
[
1
],
parts
[
i
].
x
[
2
],
parts
[
i
].
x
[
0
],
parts
[
i
].
x
[
1
],
parts
[
i
].
x
[
2
],
...
...
src/engine.c
View file @
6f6f3f54
...
@@ -66,7 +66,7 @@ void engine_prepare ( struct engine *e ) {
...
@@ -66,7 +66,7 @@ void engine_prepare ( struct engine *e ) {
int
j
,
k
,
qid
;
int
j
,
k
,
qid
;
struct
space
*
s
=
e
->
s
;
struct
space
*
s
=
e
->
s
;
struct
queue
*
q
;
struct
queue
*
q
;
float
dt_
max
=
e
->
dt_
max
;
float
dt_
step
=
e
->
dt_
step
;
TIMER_TIC
TIMER_TIC
...
@@ -95,7 +95,7 @@ void engine_prepare ( struct engine *e ) {
...
@@ -95,7 +95,7 @@ void engine_prepare ( struct engine *e ) {
// tic = getticks();
// tic = getticks();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
if
(
s
->
parts
[
k
].
dt
<=
dt_
max
)
{
if
(
s
->
parts
[
k
].
dt
<=
dt_
step
)
{
s
->
parts
[
k
].
wcount
=
0
.
0
f
;
s
->
parts
[
k
].
wcount
=
0
.
0
f
;
s
->
parts
[
k
].
wcount_dh
=
0
.
0
f
;
s
->
parts
[
k
].
wcount_dh
=
0
.
0
f
;
s
->
parts
[
k
].
rho
=
0
.
0
f
;
s
->
parts
[
k
].
rho
=
0
.
0
f
;
...
@@ -179,71 +179,70 @@ void engine_step ( struct engine *e , int sort_queues ) {
...
@@ -179,71 +179,70 @@ void engine_step ( struct engine *e , int sort_queues ) {
int
k
,
nr_parts
=
e
->
s
->
nr_parts
;
int
k
,
nr_parts
=
e
->
s
->
nr_parts
;
struct
part
*
restrict
parts
=
e
->
s
->
parts
,
*
restrict
p
;
struct
part
*
restrict
parts
=
e
->
s
->
parts
,
*
restrict
p
;
floa
t
*
restrict
v_bar
;
struct
xpar
t
*
restrict
xp
;
float
dt
=
e
->
dt
,
hdt
=
0
.
5
*
dt
,
dt_max
,
dt_min
,
ldt_min
,
ldt_max
;
float
dt
=
e
->
dt
,
hdt
=
0
.
5
*
dt
,
dt_step
,
dt_max
,
dt_min
,
ldt_min
,
ldt_max
;
double
e
t
ot
=
0
.
0
,
le
t
ot
,
lmom
[
3
],
mom
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
double
e
p
ot
=
0
.
0
,
ekin
=
0
.
0
,
le
p
ot
,
lekin
,
lmom
[
3
],
mom
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
int
threadID
,
nthreads
;
int
threadID
,
nthreads
,
count
=
0
,
lcount
;
// #ifdef __SSE2__
// #ifdef __SSE2__
// VEC_MACRO(4,float) hdtv = _mm_set1_ps( hdt );
// VEC_MACRO(4,float) hdtv = _mm_set1_ps( hdt );
// #endif
// #endif
/* Get the maximum dt. */
/* Get the maximum dt. */
dt_
max
=
2
.
0
f
*
dt
;
dt_
step
=
2
.
0
f
*
dt
;
for
(
k
=
0
;
k
<
32
&&
(
e
->
step
&
(
1
<<
k
))
==
0
;
k
++
)
for
(
k
=
0
;
k
<
32
&&
(
e
->
step
&
(
1
<<
k
))
==
0
;
k
++
)
dt_
max
*=
2
;
dt_
step
*=
2
;
dt_max
=
1
;
// dt_step = FLT_MAX
;
/* Set the maximum dt. */
/* Set the maximum dt. */
e
->
dt_
max
=
dt_
max
;
e
->
dt_
step
=
dt_
step
;
e
->
s
->
dt_
max
=
dt_
max
;
e
->
s
->
dt_
step
=
dt_
step
;
printf
(
"engine_step: dt_
max
set to %.3e.
\n
"
,
dt_
max
);
fflush
(
stdout
);
printf
(
"engine_step: dt_
step
set to %.3e.
\n
"
,
dt_
step
);
fflush
(
stdout
);
/* Allocate a buffer for the old velocities. */
if
(
posix_memalign
(
(
void
**
)
&
v_bar
,
16
,
sizeof
(
float
)
*
nr_parts
*
4
)
!=
0
)
error
(
"Failed to allocate v_old buffer."
);
/* First kick. */
/* First kick. */
TIMER_TIC
TIMER_TIC
//
#pragma omp parallel for schedule(static) private(p)
#pragma omp parallel for schedule(static) private(p
,xp
)
for
(
k
=
0
;
k
<
nr_parts
;
k
++
)
{
for
(
k
=
0
;
k
<
nr_parts
;
k
++
)
{
/* Get a handle on the part. */
/* Get a handle on the part. */
p
=
&
parts
[
k
];
p
=
&
parts
[
k
];
xp
=
p
->
xtras
;
/* Step and store the velocity and internal energy. */
/* Step and store the velocity and internal energy. */
// #ifdef __SSE__
// #ifdef __SSE__
// _mm_store_ps( &v_bar[4*k] ,
_mm_add_ps(
_mm_load_ps( &p->v[0] )
, _mm_mul_ps(
hdtv
,
_mm_load_ps( &p->a[0] )
) )
);
// _mm_store_ps( &v_bar[4*k] , _mm_load_ps( &p->v[0] )
+
hdtv
*
_mm_load_ps( &p->a[0] ) );
// #else
// #else
v_bar
[
4
*
k
+
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
xp
->
v_old
[
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
v_bar
[
4
*
k
+
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
xp
->
v_old
[
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
v_bar
[
4
*
k
+
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
xp
->
v_old
[
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
// #endif
// #endif
v_bar
[
4
*
k
+
3
]
=
p
->
u
+
hdt
*
p
->
u_dt
;
// xp->u_old = fmaxf( p->u + hdt * p->u_dt , FLT_EPSILON );
xp
->
u_old
=
p
->
u
+
hdt
*
p
->
u_dt
;
/* Move the particles with the velocitie at the half-step. */
/* Move the particles with the velocitie at the half-step. */
p
->
x
[
0
]
+=
dt
*
v_bar
[
4
*
k
+
0
];
p
->
x
[
0
]
+=
dt
*
xp
->
v_old
[
0
];
p
->
x
[
1
]
+=
dt
*
v_bar
[
4
*
k
+
1
];
p
->
x
[
1
]
+=
dt
*
xp
->
v_old
[
1
];
p
->
x
[
2
]
+=
dt
*
v_bar
[
4
*
k
+
2
];
p
->
x
[
2
]
+=
dt
*
xp
->
v_old
[
2
];
/* Update positions and energies at the half-step. */
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
+=
dt
*
p
->
a
[
0
];
p
->
v
[
0
]
+=
dt
*
p
->
a
[
0
];
p
->
v
[
1
]
+=
dt
*
p
->
a
[
1
];
p
->
v
[
1
]
+=
dt
*
p
->
a
[
1
];
p
->
v
[
2
]
+=
dt
*
p
->
a
[
2
];
p
->
v
[
2
]
+=
dt
*
p
->
a
[
2
];
p
->
u
*=
expf
(
p
->
u_dt
/
p
->
u
*
dt
);
p
->
u
*=
expf
(
p
->
u_dt
/
p
->
u
*
dt
);
//
p->h *= expf( p->h_dt / p->h * dt );
p
->
h
*=
expf
(
p
->
h_dt
/
p
->
h
*
dt
);
/* Integrate other values if this particle will not be updated. */
/* Integrate other values if this particle will not be updated. */
//
if ( p->dt > dt_
max
) {
if
(
p
->
dt
>
dt_
step
)
{
//
p->rho *= expf( -3.0f * p->h_dt / p->h * dt );
p
->
rho
*=
expf
(
-
3
.
0
f
*
p
->
h_dt
/
p
->
h
*
dt
);
//
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
p
->
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
p
->
h
*
p
->
rho_dh
/
3
.
0
f
);
//
}
}
}
}
TIMER_TOC
(
timer_kick1
);
TIMER_TOC
(
timer_kick1
);
// for(k=0; k<10; ++k)
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle(parts, k);
// printParticle( parts , 494849 );
/* Prepare the space. */
/* Prepare the space. */
engine_prepare
(
e
);
engine_prepare
(
e
);
...
@@ -274,44 +273,52 @@ void engine_step ( struct engine *e , int sort_queues ) {
...
@@ -274,44 +273,52 @@ void engine_step ( struct engine *e , int sort_queues ) {
// for(k=0; k<10; ++k)
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle(parts, k);
// printParticle( parts , 494849 );
/* Second kick. */
/* Second kick. */
TIMER_TIC_ND
TIMER_TIC_ND
dt_min
=
FLT_MAX
;
dt_max
=
0
.
0
f
;
dt_min
=
FLT_MAX
;
dt_max
=
0
.
0
f
;
#pragma omp parallel private(p,k,ldt_min,ldt_max,lmom,le
t
ot,threadID,nthreads)
#pragma omp parallel private(p,
xp,
k,ldt_min,
lcount,
ldt_max,lmom,le
kin,lep
ot,threadID,nthreads)
{
{
threadID
=
omp_get_thread_num
();
threadID
=
omp_get_thread_num
();
nthreads
=
omp_get_num_threads
();
nthreads
=
omp_get_num_threads
();
ldt_min
=
FLT_MAX
;
ldt_max
=
0
.
0
f
;
ldt_min
=
FLT_MAX
;
ldt_max
=
0
.
0
f
;
lmom
[
0
]
=
0
.
0
;
lmom
[
1
]
=
0
.
0
;
lmom
[
2
]
=
0
.
0
;
lmom
[
0
]
=
0
.
0
;
lmom
[
1
]
=
0
.
0
;
lmom
[
2
]
=
0
.
0
;
le
tot
=
0
.
0
;
le
kin
=
0
.
0
;
lepot
=
0
.
0
;
lcount
=
0
;
for
(
k
=
nr_parts
*
threadID
/
nthreads
;
k
<
nr_parts
*
(
threadID
+
1
)
/
nthreads
;
k
++
)
{
for
(
k
=
nr_parts
*
threadID
/
nthreads
;
k
<
nr_parts
*
(
threadID
+
1
)
/
nthreads
;
k
++
)
{
/* Get a handle on the part. */
/* Get a handle on the part. */
p
=
&
parts
[
k
];
p
=
&
parts
[
k
];
xp
=
p
->
xtras
;
/* Scale the derivatives if they're freshly computed. */
/* Scale the derivatives if they're freshly computed. */
if
(
p
->
dt
<=
dt_
max
)
{
if
(
p
->
dt
<=
dt_
step
)
{
p
->
u_dt
*=
p
->
POrho2
;
p
->
u_dt
*=
p
->
POrho2
;
p
->
h_dt
*=
p
->
h
*
0
.
333333333
f
;
p
->
h_dt
*=
p
->
h
*
0
.
333333333
f
;
lcount
+=
1
;
}
}
/* Update the particle's time step. */
p
->
dt
=
const_cfl
*
p
->
h
/
(
p
->
c
+
p
->
v_sig
);
/* Update positions and energies at the half-step. */
/* Update positions and energies at the half-step. */
// #ifdef __SSE__
// #ifdef __SSE__
// _mm_store_ps( &p->v[0] ,
_mm_add_ps(
_mm_load_ps( &v_bar[4*k] )
, _mm_mul_ps(
hdtv
,
_mm_load_ps( &p->a[0] )
) )
);
// _mm_store_ps( &p->v[0] , _mm_load_ps( &v_bar[4*k] )
+
hdtv
*
_mm_load_ps( &p->a[0] ) );
// #else
// #else
p
->
v
[
0
]
=
v_bar
[
4
*
k
+
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
0
]
=
xp
->
v_old
[
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
1
]
=
v_bar
[
4
*
k
+
1
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
1
]
=
xp
->
v_old
[
1
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
2
]
=
v_bar
[
4
*
k
+
2
]
+
hdt
*
p
->
a
[
2
];
p
->
v
[
2
]
=
xp
->
v_old
[
2
]
+
hdt
*
p
->
a
[
2
];
// #endif
// #endif
p
->
u
=
v_bar
[
4
*
k
+
3
]
+
hdt
*
p
->
u_dt
;
// p->u = fmaxf( xp->u_old + hdt * p->u_dt , FLT_EPSILON );
p
->
u
=
xp
->
u_old
+
hdt
*
p
->
u_dt
;
/* Get the smallest/largest dt. */
/* Get the smallest/largest dt. */
ldt_min
=
fminf
(
ldt_min
,
p
->
dt
);
ldt_min
=
fminf
(
ldt_min
,
p
->
dt
);
ldt_max
=
fmaxf
(
ldt_max
,
p
->
dt
);
ldt_max
=
fmaxf
(
ldt_max
,
p
->
dt
);
/* Collect total energy. */
/* Collect total energy. */
letot
+=
0
.
5
*
p
->
mass
*
(
p
->
v
[
0
]
*
p
->
v
[
0
]
+
p
->
v
[
1
]
*
p
->
v
[
1
]
+
p
->
v
[
2
]
*
p
->
v
[
2
]
)
+
p
->
mass
*
p
->
u
;
lekin
+=
0
.
5
*
p
->
mass
*
(
p
->
v
[
0
]
*
p
->
v
[
0
]
+
p
->
v
[
1
]
*
p
->
v
[
1
]
+
p
->
v
[
2
]
*
p
->
v
[
2
]
);
lepot
+=
p
->
mass
*
p
->
u
;
/* Collect momentum */
/* Collect momentum */
lmom
[
0
]
+=
p
->
mass
*
p
->
v
[
0
];
lmom
[
0
]
+=
p
->
mass
*
p
->
v
[
0
];
...
@@ -326,31 +333,33 @@ void engine_step ( struct engine *e , int sort_queues ) {
...
@@ -326,31 +333,33 @@ void engine_step ( struct engine *e , int sort_queues ) {
mom
[
0
]
+=
lmom
[
0
];
mom
[
0
]
+=
lmom
[
0
];
mom
[
1
]
+=
lmom
[
1
];
mom
[
1
]
+=
lmom
[
1
];
mom
[
2
]
+=
lmom
[
2
];
mom
[
2
]
+=
lmom
[
2
];
etot
+=
letot
;
epot
+=
lepot
;
ekin
+=
lekin
;
count
+=
lcount
;
}
}
}
}
TIMER_TOC
(
timer_kick2
);
TIMER_TOC
(
timer_kick2
);
e
->
dt_min
=
dt_min
;
e
->
dt_min
=
dt_min
;
printf
(
"engine_step: dt_min/dt_max is %e/%e.
\n
"
,
dt_min
,
dt_max
);
fflush
(
stdout
);
printf
(
"engine_step: dt_min/dt_max is %e/%e.
\n
"
,
dt_min
,
dt_max
);
fflush
(
stdout
);
printf
(
"engine_step: etot is %e
.
\n
"
,
et
ot
);
fflush
(
stdout
);
printf
(
"engine_step: etot is %e
(ekin=%e, epot=%e).
\n
"
,
ekin
+
epot
,
ekin
,
ep
ot
);
fflush
(
stdout
);
printf
(
"engine_step: total momentum is [ %e , %e , %e ].
\n
"
,
mom
[
0
]
,
mom
[
1
]
,
mom
[
2
]
);
fflush
(
stdout
);
printf
(
"engine_step: total momentum is [ %e , %e , %e ].
\n
"
,
mom
[
0
]
,
mom
[
1
]
,
mom
[
2
]
);
fflush
(
stdout
);
printf
(
"engine_step: updated %i parts (dt_step=%.3e).
\n
"
,
count
,
dt_step
);
fflush
(
stdout
);
/* Increase the step counter. */
e
->
step
+=
1
;
/* Does the time step need adjusting? */
/* Does the time step need adjusting? */
if
(
dt_min
<
e
->
dt
)
{
while
(
dt_min
<
e
->
dt
)
{
e
->
dt
*=
0
.
5
;
e
->
dt
*=
0
.
5
;
e
->
step
*=
2
;
printf
(
"engine_step: dt_min dropped below time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
printf
(
"engine_step: dt_min dropped below time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
}
}
else
if
(
dt_min
>
2
*
e
->
dt
)
{
while
(
dt_min
>
2
*
e
->
dt
&&
(
e
->
step
&
1
)
==
0
)
{
e
->
dt
*=
2
.
0
;
e
->
dt
*=
2
.
0
;
e
->
step
/=
2
;
printf
(
"engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
printf
(
"engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
}
}
/* Clean up. */
free
(
v_bar
);
/* Increase the step counter. */
e
->
step
+=
1
;
}
}
...
...
src/engine.h
View file @
6f6f3f54
...
@@ -50,8 +50,10 @@ struct engine {
...
@@ -50,8 +50,10 @@ struct engine {
/* The queues. */
/* The queues. */
struct
queue
*
queues
;
struct
queue
*
queues
;
/* The maximum dt to step. */
/* The maximum dt to step (current). */
float
dt_max
;
float
dt_step
;
/* The minimum dt over all particles in the system. */
float
dt_min
;
float
dt_min
;
/* The system time step. */
/* The system time step. */
...
...
src/part.h
View file @
6f6f3f54
...
@@ -38,6 +38,21 @@ struct cpart {
...
@@ -38,6 +38,21 @@ struct cpart {
}
__attribute__
((
aligned
(
32
)));
}
__attribute__
((
aligned
(
32
)));
/* Extra particle data not needed during the computation. */
struct
xpart
{
/* Old position, at last tree rebuild. */
double
x_old
[
3
];
/* Old velocity. */
float
v_old
[
3
];
/* Old entropy. */
float
u_old
;
}
__attribute__
((
aligned
(
32
)));
/* Data of a single particle. */
/* Data of a single particle. */
struct
part
{
struct
part
{
...
@@ -66,6 +81,9 @@ struct part {
...
@@ -66,6 +81,9 @@ struct part {
/* Particle acceleration. */
/* Particle acceleration. */
float
a
[
3
]
__attribute__
((
aligned
(
16
)));
float
a
[
3
]
__attribute__
((
aligned
(
16
)));
/* Maximum neighbouring u. */
float
c
,
v_sig
;
/* Derivative of the density with respect to this particle's smoothing length. */
/* Derivative of the density with respect to this particle's smoothing length. */
float
rho_dh
;
float
rho_dh
;
...
@@ -80,8 +98,8 @@ struct part {
...
@@ -80,8 +98,8 @@ struct part {
/* Particle ID. */
/* Particle ID. */
unsigned
long
long
id
;
unsigned
long
long
id
;
/*
Old position, at last tree rebuild
. */
/*
Pointer to extra particle data
. */
double
x_old
[
3
]
;
struct
xpart
*
xtras
;
/* Particle position. */
/* Particle position. */
double
x
[
3
];
double
x
[
3
];
...
...
src/runner.c
View file @
6f6f3f54
...
@@ -329,8 +329,8 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
...
@@ -329,8 +329,8 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
struct
cell
*
finger
;
struct
cell
*
finger
;
int
i
,
k
,
redo
,
count
=
c
->
count
;
int
i
,
k
,
redo
,
count
=
c
->
count
;
int
*
pid
;
int
*
pid
;
float
ihg
,
ihg2
;
float
ihg
,
ihg2
,
h_corr
;
float
dt_
max
=
r
->
e
->
dt_
max
;
float
dt_
step
=
r
->
e
->
dt_
step
;
TIMER_TIC
TIMER_TIC
/* Recurse? */
/* Recurse? */
...
@@ -361,7 +361,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
...
@@ -361,7 +361,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
cp
=
&
c
->
cparts
[
pid
[
i
]
];
cp
=
&
c
->
cparts
[
pid
[
i
]
];
/* Is this part within the timestep? */
/* Is this part within the timestep? */
if
(
cp
->
dt
<=
dt_
max
)
{
if
(
cp
->
dt
<=
dt_
step
)
{
/* Adjust the computed rho. */
/* Adjust the computed rho. */
ihg
=
kernel_igamma
/
p
->
h
;
ihg
=
kernel_igamma
/
p
->
h
;
...
@@ -370,8 +370,15 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
...
@@ -370,8 +370,15 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
p
->
rho_dh
*=
ihg2
*
ihg2
;
p
->
rho_dh
*=
ihg2
*
ihg2
;
p
->
wcount
+=
kernel_wroot
;
p
->
wcount
+=
kernel_wroot
;
/* Update the smoothing length. */
/* Compute the smoothing length update (Newton step). */
p
->
h
-=
(
p
->
wcount
-
const_nwneigh
)
/
p
->
wcount_dh
;
h_corr
=
(
const_nwneigh
-
p
->
wcount
)
/
p
->
wcount_dh
;
/* Truncate to the range [ -p->h/2 , p->h ]. */
h_corr
=
fminf
(
h_corr
,
p
->
h
);
h_corr
=
fmaxf
(
h_corr
,
-
p
->
h
/
2
);
/* Apply the correction to p->h. */
p
->
h
+=
h_corr
;
cp
->
h
=
p
->
h
;
cp
->
h
=
p
->
h
;
/* Did we get the right number density? */
/* Did we get the right number density? */
...
@@ -389,8 +396,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
...
@@ -389,8 +396,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
}
}
/* Compute this particle's time step. */
/* Compute this particle's time step. */
p
->
dt
=
const_cfl
*
p
->
h
/
sqrtf
(
const_gamma
*
(
const_gamma
-
1
.
0
f
)
*
p
->
u
);
p
->
c
=
sqrtf
(
const_gamma
*
(
const_gamma
-
1
.
0
f
)
*
p
->
u
);
cp
->
dt
=
p
->
dt
;
/* Compute the pressure. */
/* Compute the pressure. */
// p->P = p->rho * p->u * ( const_gamma - 1.0f );
// p->P = p->rho * p->u * ( const_gamma - 1.0f );
...
@@ -405,6 +411,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
...
@@ -405,6 +411,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Reset the time derivatives. */
/* Reset the time derivatives. */
p
->
u_dt
=
0
.
0
f
;
p
->
u_dt
=
0
.
0
f
;
p
->
h_dt
=
0
.
0
f
;
p
->
h_dt
=
0
.
0
f
;
p
->
v_sig
=
0
.
0
f
;
}
}
...
...
src/runner_doiact.h
View file @
6f6f3f54
This diff is collapsed.
Click to expand it.
src/runner_iact.h
View file @
6f6f3f54
...
@@ -329,26 +329,26 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
...
@@ -329,26 +329,26 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
float
r
=
sqrtf
(
r2
),
ri
=
1
.
0
f
/
r
;
float
r
=
sqrtf
(
r2
),
ri
=
1
.
0
f
/
r
;
float
xi
,
xj
;
float
xi
,
xj
;
float
hi
g
_inv
,
hi
g
2_inv
;
float
hi_inv
,
hi2_inv
;
float
hj
g
_inv
,
hj
g
2_inv
;
float
hj_inv
,
hj2_inv
;
float
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
w
,
dvdr
;
float
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
w
,
dvdr
;
float
f
;
float
f
;
int
k
;
int
k
;
/* Get the kernel for hi. */
/* Get the kernel for hi. */
hi
g
_inv
=
kernel_igamma
/
hi
;
hi_inv
=
1
.
0
f
/
hi
;
hi
g
2_inv
=
hi
g
_inv
*
hi
g
_inv
;
hi2_inv
=
hi_inv
*
hi_inv
;
xi
=
r
*
hi
g
_inv
;
xi
=
r
*
hi_inv
*
kernel_igamma
;
kernel_deval
(
xi
,
&
wi
,
&
wi_dx
);
kernel_deval
(
xi
,
&
wi
,
&
wi_dx
);
wi_dr
=
hi
g
2_inv
*
hi
g
2_inv
*
wi_dx
;
wi_dr
=
hi2_inv
*
hi2_inv
*
wi_dx
;
/* Get the kernel for hj. */
/* Get the kernel for hj. */
hj
g
_inv
=
kernel_igamma
/
hj
;
hj_inv
=
1
.
0
f
/
hj
;
hj
g
2_inv
=
hj
g
_inv
*
hj
g
_inv
;
hj2_inv
=
hj_inv
*
hj_inv
;
xj
=
r
*
hj
g
_inv
;
xj
=
r
*
hj_inv
*
kernel_igamma
;
kernel_deval
(
xj
,
&
wj
,
&
wj_dx
);
kernel_deval
(
xj
,
&
wj
,
&
wj_dx
);
wj_dr
=
hj
g
2_inv
*
hj
g
2_inv
*
wj_dx
;
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
/* Get the common factor out. */
/* Get the common factor out. */
w
=
ri
*
(
pi
->
POrho2
*
wi_dr
+
pj
->
POrho2
*
wj_dr
);
w
=
ri
*
(
pi
->
POrho2
*
wi_dr
+
pj
->
POrho2
*
wj_dr
);
...
@@ -370,7 +370,11 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
...
@@ -370,7 +370,11 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
/* Get the time derivative for h. */
/* Get the time derivative for h. */
pi
->
h_dt
-=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
pi
->
h_dt
-=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
pj
->
h_dt
-=
pi
->
mass
/
pi
->
rho
*
dvdr
*
wj_dr
;
pj
->
h_dt
-=
pi
->
mass
/
pi
->
rho
*
dvdr
*
wj_dr
;
/* Update the signal velocity. */
pi
->
v_sig
=
fmaxf
(
pi
->
v_sig
,
pj
->
c
-
3
*
dvdr
);
pj
->
v_sig
=
fmaxf
(
pj
->
v_sig
,
pi
->
c
-
3
*
dvdr
);
#ifdef HIST
#ifdef HIST
if
(
hi
>
hj
)
if
(
hi
>
hj
)
runner_hist_hit
(
hi
/
hj
);
runner_hist_hit
(
hi
/
hj
);
...
@@ -388,8 +392,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
...
@@ -388,8 +392,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
vector
r
,
r2
,
ri
;
vector
r
,
r2
,
ri
;
vector
xi
,
xj
;
vector
xi
,
xj
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hig_inv
,
hi
g
2_inv
;
vector
hig_inv
,
hi2_inv
;
vector
hjg_inv
,
hj
g
2_inv
;
vector
hjg_inv
,
hj2_inv
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
w
;
vector
w
;
vector
piPOrho2
,
pjPOrho2
,
pirho
,
pjrho
;
vector
piPOrho2
,
pjPOrho2
,
pirho
,
pjrho
;
...
@@ -444,20 +448,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
...
@@ -444,20 +448,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi
.
v
*
hi_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi
.
v
*
hi_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hig_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hi_inv
.
v
;
hig_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hi_inv
.
v
;
hi
g
2_inv
.
v
=
hi
g
_inv
.
v
*
hi
g
_inv
.
v
;
hi2_inv
.
v
=
hi_inv
.
v
*
hi_inv
.
v
;
xi
.
v
=
r
.
v
*
hig_inv
.
v
;
xi
.
v
=
r
.
v
*
hig_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
wi_dr
.
v
=
hi
g
2_inv
.
v
*
hi
g
2_inv
.
v
*
wi_dx
.
v
;
wi_dr
.
v
=
hi2_inv
.
v
*
hi2_inv
.
v
*
wi_dx
.
v
;
/* Get the kernel for hj. */
/* Get the kernel for hj. */