Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
c969365b
Commit
c969365b
authored
Feb 07, 2013
by
Pedro Gonnet
Browse files
several bug fixes using the new test cases.
Former-commit-id: 36567220a6b658f9dbff8091561e53ed45c4bf7a
parent
cfe714f1
Changes
11
Hide whitespace changes
Inline
Side-by-side
examples/PertubedBox/makeIC.py
View file @
c969365b
...
...
@@ -32,7 +32,7 @@ L = 50 # Number of particles along one axis
rho
=
1.
# Density
P
=
1.
# Pressure
gamma
=
5.
/
3.
# Gas adiabatic index
pert
=
0.1
# Perturbation scale (in units of the interparticle separation)
pert
=
0.
0
1
# Perturbation scale (in units of the interparticle separation)
fileName
=
"perturbedBox.hdf5"
...
...
examples/UniformBox/makeIC.py
View file @
c969365b
...
...
@@ -28,7 +28,7 @@ from numpy import *
periodic
=
1
# 1 For periodic box
boxSize
=
1.
L
=
50
# Number of particles along one axis
rho
=
1
.
# Density
rho
=
2
.
# Density
P
=
1.
# Pressure
gamma
=
5.
/
3.
# Gas adiabatic index
fileName
=
"uniformBox.hdf5"
...
...
examples/test.c
View file @
c969365b
...
...
@@ -28,6 +28,7 @@
#include
<string.h>
#include
<pthread.h>
#include
<math.h>
#include
<fenv.h>
#include
<omp.h>
/* Conditional headers. */
...
...
@@ -193,6 +194,24 @@ void map_wcount_max ( struct part *p , struct cell *c , void *data ) {
}
void
map_h_min
(
struct
part
*
p
,
struct
cell
*
c
,
void
*
data
)
{
struct
part
**
p2
=
(
struct
part
**
)
data
;
if
(
p
->
h
<
(
*
p2
)
->
h
)
*
p2
=
p
;
}
void
map_h_max
(
struct
part
*
p
,
struct
cell
*
c
,
void
*
data
)
{
struct
part
**
p2
=
(
struct
part
**
)
data
;
if
(
p
->
h
>
(
*
p2
)
->
h
)
*
p2
=
p
;
}
/**
* @brief Mapping function for neighbour count.
...
...
@@ -689,6 +708,9 @@ int main ( int argc , char *argv[] ) {
float
dt_max
=
0
.
0
f
;
ticks
tic
;
/* Choke on FP-exceptions. */
feenableexcept
(
FE_DIVBYZERO
|
FE_INVALID
|
FE_OVERFLOW
);
/* Init the space. */
bzero
(
&
s
,
sizeof
(
struct
space
)
);
...
...
@@ -862,6 +884,7 @@ int main ( int argc , char *argv[] ) {
parts
[
k
].
x
[
2
]
+=
shift
[
2
];
}
/* Dump the first few particles. */
for
(
k
=
0
;
k
<
10
;
++
k
)
printParticle
(
parts
,
k
);
...
...
@@ -910,12 +933,6 @@ int main ( int argc , char *argv[] ) {
/* Dump the particle positions. */
// space_map_parts( &s , &map_dump , shift );
/* Dump the acceleration of the first particle. */
for
(
k
=
0
;
k
<
3
;
k
++
)
{
printf
(
"main: parts[%lli].a is [ %.16e %.16e %.16e ].
\n
"
,
s
.
parts
[
k
].
id
,
s
.
parts
[
k
].
a
[
0
]
,
s
.
parts
[
k
].
a
[
1
]
,
s
.
parts
[
k
].
a
[
2
]
);
printf
(
"main: parts[%lli].a has h=%e, rho=%e, wcount=%.3f.
\n
"
,
s
.
parts
[
k
].
id
,
s
.
parts
[
k
].
h
,
s
.
parts
[
k
].
rho
,
s
.
parts
[
k
].
wcount
+
32
.
0
/
3
);
}
/* Initialize the runner with this space. */
tic
=
getticks
();
engine_init
(
&
e
,
&
s
,
nr_threads
,
nr_queues
,
engine_policy_steal
|
engine_policy_keep
);
...
...
@@ -943,6 +960,23 @@ int main ( int argc , char *argv[] ) {
/* Take a step. */
engine_step
(
&
e
,
0
);
/* Dump the first few particles. */
for
(
k
=
0
;
k
<
10
;
++
k
)
printParticle
(
parts
,
k
);
printParticle
(
parts
,
113531
);
/* Get the particle with the lowest h. */
p
=
&
s
.
parts
[
0
];
space_map_parts
(
&
s
,
&
map_h_min
,
&
p
);
printf
(
"main: particle %lli/%i at [ %e %e %e ] has minimum h=%.3e (h_dt=%.3e).
\n
"
,
p
->
id
,
(
int
)(
p
-
s
.
parts
)
,
p
->
x
[
0
]
,
p
->
x
[
1
]
,
p
->
x
[
2
]
,
p
->
h
,
p
->
h_dt
);
/* Get the particle with the highest h. */
p
=
&
s
.
parts
[
0
];
space_map_parts
(
&
s
,
&
map_h_max
,
&
p
);
printf
(
"main: particle %lli/%i at [ %e %e %e ] has maximum h=%.3e (h_dt=%.3e).
\n
"
,
p
->
id
,
(
int
)(
p
-
s
.
parts
)
,
p
->
x
[
0
]
,
p
->
x
[
1
]
,
p
->
x
[
2
]
,
p
->
h
,
p
->
h_dt
);
/* Output. */
#ifdef TIMER
printf
(
"main: runner timers are [ %.3f"
,
timers
[
0
]
/
CPU_TPS
*
1000
);
...
...
@@ -1010,11 +1044,9 @@ int main ( int argc , char *argv[] ) {
// space_map_parts( &s , &map_icount , &icount );
// printf( "main: average neighbours per particle is %.3f.\n" , (double)icount / s.nr_parts );
/* Dump the acceleration of the first particle. */
for
(
k
=
0
;
k
<
3
;
k
++
)
{
printf
(
"main: parts[%lli].a is [ %.16e %.16e %.16e ].
\n
"
,
s
.
parts
[
k
].
id
,
s
.
parts
[
k
].
a
[
0
]
,
s
.
parts
[
k
].
a
[
1
]
,
s
.
parts
[
k
].
a
[
2
]
);
printf
(
"main: parts[%lli].a has h=%e, rho=%e, wcount=%.3f.
\n
"
,
s
.
parts
[
k
].
id
,
s
.
parts
[
k
].
h
,
s
.
parts
[
k
].
rho
,
s
.
parts
[
k
].
wcount
);
}
/* Dump the first few particles. */
for
(
k
=
0
;
k
<
10
;
++
k
)
printParticle
(
parts
,
k
);
/* Get all the cells of a certain depth. */
// icount = 1;
...
...
src/Makefile.am
View file @
c969365b
...
...
@@ -20,11 +20,11 @@
AUTOMAKE_OPTIONS
=
gnu
# Add the debug flag to the whole thing
AM_CFLAGS
=
-g
-O3
-Wall
-Werror
-ffast-math
-fstrict-aliasing
-ftree-vectorize
\
-funroll-loops
$(SIMD_FLAGS)
$(OPENMP_CFLAGS)
\
-DTIMER
-DCOUNTER
-DCPU_TPS
=
2.67e9
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
# -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
AM_CFLAGS
=
-Wall
-Werror
$(OPENMP_CFLAGS)
\
-DTIMER
-DCOUNTER
-DCPU_TPS
=
2.67e9
# Assign a "safe" version number
AM_LDFLAGS
=
$(LAPACK_LIBS)
$(BLAS_LIBS)
$(HDF5_LDFLAGS)
-version-info
0:0:0
...
...
src/debug.c
View file @
c969365b
...
...
@@ -22,17 +22,26 @@
#include
"part.h"
void
printParticle
(
struct
part
*
parts
,
int
i
)
{
printf
(
"## Particle[%d]: id= %lld x=( %f, %f, %f) v=( %f, %f, %f) h= %f m= %f rho= %f u= %f dt= %f
\n
"
,
void
printParticle
(
struct
part
*
parts
,
long
long
int
id
)
{
int
i
;
/* Look for the particle. */
for
(
i
=
0
;
parts
[
i
].
id
!=
id
;
i
++
);
printf
(
"## Particle[%d]: id=%lld, x=(%f,%f,%f), v=(%f,%f,%f), a=(%f,%f,%f), h=%f, h_dt=%f, wcount=%f, m=%f, rho=%f, u=%f, dudt=%f, dt=%.3e
\n
"
,
i
,
parts
[
i
].
id
,
parts
[
i
].
x
[
0
],
parts
[
i
].
x
[
1
],
parts
[
i
].
x
[
2
],
parts
[
i
].
v
[
0
],
parts
[
i
].
v
[
1
],
parts
[
i
].
v
[
2
],
parts
[
i
].
a
[
0
],
parts
[
i
].
a
[
1
],
parts
[
i
].
a
[
2
],
parts
[
i
].
h
,
parts
[
i
].
h_dt
,
parts
[
i
].
wcount
,
parts
[
i
].
mass
,
parts
[
i
].
rho
,
parts
[
i
].
u
,
parts
[
i
].
u_dt
,
parts
[
i
].
dt
);
}
...
...
src/debug.h
View file @
c969365b
...
...
@@ -20,4 +20,4 @@
void
printParticle
(
struct
part
*
parts
,
int
i
);
void
printParticle
(
struct
part
*
parts
,
long
long
int
i
);
src/engine.c
View file @
c969365b
...
...
@@ -39,6 +39,7 @@
#include
"lock.h"
#include
"task.h"
#include
"part.h"
#include
"debug.h"
#include
"cell.h"
#include
"space.h"
#include
"queue.h"
...
...
@@ -65,6 +66,7 @@ void engine_prepare ( struct engine *e ) {
int
j
,
k
,
qid
;
struct
space
*
s
=
e
->
s
;
struct
queue
*
q
;
float
dt_max
=
e
->
dt_max
;
TIMER_TIC
...
...
@@ -92,12 +94,13 @@ void engine_prepare ( struct engine *e ) {
/* Re-set the particle data. */
// tic = getticks();
#pragma omp parallel for schedule(static)
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
{
s
->
parts
[
k
].
wcount
=
0
.
0
f
;
s
->
parts
[
k
].
wcount_dh
=
0
.
0
f
;
s
->
parts
[
k
].
rho
=
0
.
0
f
;
s
->
parts
[
k
].
rho_dh
=
0
.
0
f
;
}
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
if
(
s
->
parts
[
k
].
dt
<=
dt_max
)
{
s
->
parts
[
k
].
wcount
=
0
.
0
f
;
s
->
parts
[
k
].
wcount_dh
=
0
.
0
f
;
s
->
parts
[
k
].
rho
=
0
.
0
f
;
s
->
parts
[
k
].
rho_dh
=
0
.
0
f
;
}
// printf( "engine_prepare: re-setting particle data took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Run throught the tasks and get all the waits right. */
...
...
@@ -177,15 +180,18 @@ void engine_step ( struct engine *e , int sort_queues ) {
int
k
,
nr_parts
=
e
->
s
->
nr_parts
;
struct
part
*
restrict
parts
=
e
->
s
->
parts
,
*
restrict
p
;
float
*
restrict
v_bar
;
float
dt
=
e
->
dt
,
hdt
=
0
.
5
*
dt
,
dt_max
;
#ifdef __SSE2__
VEC_MACRO
(
4
,
float
)
hdtv
=
_mm_set1_ps
(
hdt
);
#endif
float
dt
=
e
->
dt
,
hdt
=
0
.
5
*
dt
,
dt_max
,
dt_min
,
ldt_min
,
ldt_max
;
double
etot
=
0
.
0
,
letot
,
lmom
[
3
],
mom
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
int
threadID
,
nthreads
;
// #ifdef __SSE2__
// VEC_MACRO(4,float) hdtv = _mm_set1_ps( hdt );
// #endif
/* Get the maximum dt. */
dt_max
=
dt
;
dt_max
=
2
.
0
f
*
dt
;
for
(
k
=
0
;
k
<
32
&&
(
e
->
step
&
(
1
<<
k
))
==
0
;
k
++
)
dt_max
*=
2
;
dt_max
=
1
;
/* Set the maximum dt. */
e
->
dt_max
=
dt_max
;
...
...
@@ -198,43 +204,46 @@ void engine_step ( struct engine *e , int sort_queues ) {
/* First kick. */
TIMER_TIC
#pragma omp parallel for schedule(static) private(p)
//
#pragma omp parallel for schedule(static) private(p)
for
(
k
=
0
;
k
<
nr_parts
;
k
++
)
{
/* Get a handle on the part. */
p
=
&
parts
[
k
];
/* Step and store the velocity and internal energy. */
#ifdef __SSE__
_mm_store_ps
(
&
v_bar
[
4
*
k
]
,
_mm_add_ps
(
_mm_load_ps
(
&
p
->
v
[
0
]
)
,
_mm_mul_ps
(
hdtv
,
_mm_load_ps
(
&
p
->
a
[
0
]
)
)
)
);
#else
//
#ifdef __SSE__
//
_mm_store_ps( &v_bar[4*k] , _mm_add_ps( _mm_load_ps( &p->v[0] ) , _mm_mul_ps( hdtv , _mm_load_ps( &p->a[0] ) ) ) );
//
#else
v_bar
[
4
*
k
+
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
v_bar
[
4
*
k
+
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
v_bar
[
4
*
k
+
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
#endif
//
#endif
v_bar
[
4
*
k
+
3
]
=
p
->
u
+
hdt
*
p
->
u_dt
;
/* Move the particles with the velocitie at the half-step. */
//
p->x[0] += dt * v_bar[
3
*k+0];
//
p->x[1] += dt * v_bar[
3
*k+1];
//
p->x[2] += dt * v_bar[
3
*k+2];
p
->
x
[
0
]
+=
dt
*
v_bar
[
4
*
k
+
0
];
p
->
x
[
1
]
+=
dt
*
v_bar
[
4
*
k
+
1
];
p
->
x
[
2
]
+=
dt
*
v_bar
[
4
*
k
+
2
];
/* Update positions and energies at the half-step. */
//
p->v[0] += dt * p->a[0];
//
p->v[1] += dt * p->a[1];
//
p->v[2] += dt * p->a[2];
//
p->u *= expf( p->u_dt / p->u * dt );
// p->h *= expf(
-1.0f *
p->h_dt / p->h * dt );
p
->
v
[
0
]
+=
dt
*
p
->
a
[
0
];
p
->
v
[
1
]
+=
dt
*
p
->
a
[
1
];
p
->
v
[
2
]
+=
dt
*
p
->
a
[
2
];
p
->
u
*=
expf
(
p
->
u_dt
/
p
->
u
*
dt
);
// p->h *= expf( p->h_dt / p->h * dt );
/* Integrate other values if this particle will not be updated. */
if
(
p
->
dt
>
dt_max
)
{
p
->
rho
*=
expf
(
-
3
.
0
f
*
p
->
h_dt
/
p
->
h
*
dt
);
p
->
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
p
->
h
*
p
->
rho_dh
/
3
.
0
f
);
}
//
if ( p->dt > dt_max ) {
//
p->rho *= expf( -3.0f * p->h_dt / p->h * dt );
//
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
//
}
}
TIMER_TOC
(
timer_kick1
);
// for(k=0; k<10; ++k)
// printParticle(parts, k);
/* Prepare the space. */
engine_prepare
(
e
);
...
...
@@ -263,43 +272,79 @@ void engine_step ( struct engine *e , int sort_queues ) {
/* Stop the clock. */
TIMER_TOC
(
timer_step
);
// for(k=0; k<10; ++k)
// printParticle(parts, k);
/* Second kick. */
TIMER_TIC_ND
e
->
dt_min
=
FLT_MAX
;
#pragma omp parallel private(p,k)
dt_min
=
FLT_MAX
;
dt_max
=
0
.
0
f
;
#pragma omp parallel private(p,k
,ldt_min,ldt_max,lmom,letot,threadID,nthreads
)
{
int
threadID
=
omp_get_thread_num
();
int
nthreads
=
omp_get_num_threads
();
float
dt_min
=
FLT_MAX
;
threadID
=
omp_get_thread_num
();
nthreads
=
omp_get_num_threads
();
ldt_min
=
FLT_MAX
;
ldt_max
=
0
.
0
f
;
lmom
[
0
]
=
0
.
0
;
lmom
[
1
]
=
0
.
0
;
lmom
[
2
]
=
0
.
0
;
letot
=
0
.
0
;
for
(
k
=
nr_parts
*
threadID
/
nthreads
;
k
<
nr_parts
*
(
threadID
+
1
)
/
nthreads
;
k
++
)
{
/* Get a handle on the part. */
p
=
&
parts
[
k
];
/* Scale the derivatives. */
p
->
u_dt
*=
p
->
POrho2
;
p
->
h_dt
*=
p
->
h
*
0
.
333333333
f
;
/* Scale the derivatives if they're freshly computed. */
if
(
p
->
dt
<=
dt_max
)
{
p
->
u_dt
*=
p
->
POrho2
;
p
->
h_dt
*=
p
->
h
*
0
.
333333333
f
;
}
/* Update positions and energies at the half-step. */
#ifdef __SSE__
_mm_store_ps
(
&
p
->
v
[
0
]
,
_mm_add_ps
(
_mm_load_ps
(
&
v_bar
[
4
*
k
]
)
,
_mm_mul_ps
(
hdtv
,
_mm_load_ps
(
&
p
->
a
[
0
]
)
)
)
);
#else
//
#ifdef __SSE__
//
_mm_store_ps( &p->v[0] , _mm_add_ps( _mm_load_ps( &v_bar[4*k] ) , _mm_mul_ps( hdtv , _mm_load_ps( &p->a[0] ) ) ) );
//
#else
p
->
v
[
0
]
=
v_bar
[
4
*
k
+
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
1
]
=
v_bar
[
4
*
k
+
1
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
2
]
=
v_bar
[
4
*
k
+
2
]
+
hdt
*
p
->
a
[
2
];
#endif
//
p->u = v_bar[4*k+3] + hdt * p->u_dt;
//
#endif
p
->
u
=
v_bar
[
4
*
k
+
3
]
+
hdt
*
p
->
u_dt
;
/* Get the smallest dt. */
dt_min
=
fminf
(
dt_min
,
p
->
dt
);
/* Get the smallest/largest dt. */
ldt_min
=
fminf
(
ldt_min
,
p
->
dt
);
ldt_max
=
fmaxf
(
ldt_max
,
p
->
dt
);
/* Collect total energy. */
letot
+=
0
.
5
*
p
->
mass
*
(
p
->
v
[
0
]
*
p
->
v
[
0
]
+
p
->
v
[
1
]
*
p
->
v
[
1
]
+
p
->
v
[
2
]
*
p
->
v
[
2
]
)
+
p
->
mass
*
p
->
u
;
/* Collect momentum */
lmom
[
0
]
+=
p
->
mass
*
p
->
v
[
0
];
lmom
[
1
]
+=
p
->
mass
*
p
->
v
[
1
];
lmom
[
2
]
+=
p
->
mass
*
p
->
v
[
2
];
}
#pragma omp critical
e
->
dt_min
=
fminf
(
e
->
dt_min
,
dt_min
);
{
dt_min
=
fminf
(
dt_min
,
ldt_min
);
dt_max
=
fmaxf
(
dt_max
,
ldt_max
);
mom
[
0
]
+=
lmom
[
0
];
mom
[
1
]
+=
lmom
[
1
];
mom
[
2
]
+=
lmom
[
2
];
etot
+=
letot
;
}
}
TIMER_TOC
(
timer_kick2
);
printf
(
"engine_step: dt_min is %e.
\n
"
,
e
->
dt_min
);
fflush
(
stdout
);
e
->
dt_min
=
dt_min
;
printf
(
"engine_step: dt_min/dt_max is %e/%e.
\n
"
,
dt_min
,
dt_max
);
fflush
(
stdout
);
printf
(
"engine_step: etot is %e.
\n
"
,
etot
);
fflush
(
stdout
);
printf
(
"engine_step: total momentum is [ %e , %e , %e ].
\n
"
,
mom
[
0
]
,
mom
[
1
]
,
mom
[
2
]
);
fflush
(
stdout
);
/* Does the time step need adjusting? */
if
(
dt_min
<
e
->
dt
)
{
e
->
dt
*=
0
.
5
;
printf
(
"engine_step: dt_min dropped below time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
}
else
if
(
dt_min
>
2
*
e
->
dt
)
{
e
->
dt
*=
2
.
0
;
printf
(
"engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.
\n
"
,
e
->
dt
);
}
/* Clean up. */
free
(
v_bar
);
...
...
src/runner.c
View file @
c969365b
...
...
@@ -325,6 +325,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
void
runner_doghost
(
struct
runner
*
r
,
struct
cell
*
c
)
{
struct
part
*
p
;
struct
cpart
*
cp
;
struct
cell
*
finger
;
int
i
,
k
,
redo
,
count
=
c
->
count
;
int
*
pid
;
...
...
@@ -357,9 +358,10 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Get a direct pointer on the part. */
p
=
&
c
->
parts
[
pid
[
i
]
];
cp
=
&
c
->
cparts
[
pid
[
i
]
];
/* Is this part within the timestep? */
if
(
p
->
dt
<=
dt_max
)
{
if
(
c
p
->
dt
<=
dt_max
)
{
/* Adjust the computed rho. */
ihg
=
kernel_igamma
/
p
->
h
;
...
...
@@ -370,11 +372,12 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Update the smoothing length. */
p
->
h
-=
(
p
->
wcount
-
const_nwneigh
)
/
p
->
wcount_dh
;
cp
->
h
=
p
->
h
;
/* Did we get the right number density? */
if
(
p
->
wcount
>
const_nwneigh
+
1
||
p
->
wcount
<
const_nwneigh
-
1
)
{
printf
(
"runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%f.
\n
"
,
p
->
id
,
p
->
h
,
c
->
depth
,
p
->
wcount
);
fflush
(
stdout
);
//
printf( "runner_doghost: particle %lli (h=%e,
h_dt=%e,
depth=%i) has bad wcount=%
.3
f.\n" , p->id , p->h ,
p->h_dt ,
c->depth , p->wcount ); fflush(stdout);
// p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->wcount_dh;
pid
[
redo
]
=
pid
[
i
];
redo
+=
1
;
...
...
@@ -387,6 +390,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Compute this particle's time step. */
p
->
dt
=
const_cfl
*
p
->
h
/
sqrtf
(
const_gamma
*
(
const_gamma
-
1
.
0
f
)
*
p
->
u
);
cp
->
dt
=
p
->
dt
;
/* Compute the pressure. */
// p->P = p->rho * p->u * ( const_gamma - 1.0f );
...
...
@@ -394,15 +398,15 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Compute the P/Omega/rho2. */
p
->
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
p
->
h
*
p
->
rho_dh
/
3
.
0
f
);
}
/* Reset the acceleration. */
for
(
k
=
0
;
k
<
3
;
k
++
)
p
->
a
[
k
]
=
0
.
0
f
;
/* Reset the
acceleration
. */
for
(
k
=
0
;
k
<
3
;
k
++
)
p
->
a
[
k
]
=
0
.
0
f
;
/* Reset the
time derivatives
. */
p
->
u_dt
=
0
.
0
f
;
p
->
h_dt
=
0
.
0
f
;
/* Reset the time derivatives. */
p
->
u_dt
=
0
.
0
f
;
p
->
h_dt
=
0
.
0
f
;
}
}
...
...
src/runner_iact.h
View file @
c969365b
...
...
@@ -368,7 +368,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
pj
->
u_dt
+=
pi
->
mass
*
dvdr
*
wj_dr
;
/* Get the time derivative for h. */
pi
->
h_dt
+
=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
pi
->
h_dt
-
=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
pj
->
h_dt
-=
pi
->
mass
/
pi
->
rho
*
dvdr
*
wj_dr
;
#ifdef HIST
...
...
@@ -485,8 +485,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
u_dt
+=
piu_dt
.
f
[
k
];
pj
[
k
]
->
u_dt
+=
pju_dt
.
f
[
k
];
pi
[
k
]
->
h_dt
+
=
pih_dt
.
f
[
k
];
pj
[
k
]
->
h_dt
+
=
pjh_dt
.
f
[
k
];
pi
[
k
]
->
h_dt
-
=
pih_dt
.
f
[
k
];
pj
[
k
]
->
h_dt
-
=
pjh_dt
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
{
pi
[
k
]
->
a
[
j
]
-=
pia
[
j
].
f
[
k
];
pj
[
k
]
->
a
[
j
]
+=
pja
[
j
].
f
[
k
];
...
...
@@ -545,7 +545,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
pi
->
u_dt
+=
pj
->
mass
*
dvdr
*
wi_dr
;
/* Get the time derivative for h. */
pi
->
h_dt
+
=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
pi
->
h_dt
-
=
pj
->
mass
/
pj
->
rho
*
dvdr
*
wi_dr
;
}
...
...
@@ -646,7 +646,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
/* Store the forces back on the particles. */
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
u_dt
+=
piu_dt
.
f
[
k
];
pi
[
k
]
->
h_dt
+
=
pih_dt
.
f
[
k
];
pi
[
k
]
->
h_dt
-
=
pih_dt
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
pi
[
k
]
->
a
[
j
]
-=
pia
[
j
].
f
[
k
];
}
...
...
src/space.c
View file @
c969365b
...
...
@@ -161,12 +161,17 @@ void space_prepare ( struct space *s ) {
int
k
;
struct
task
*
t
;
float
dt_max
=
s
->
dt_max
;
float
dt_max
=
s
->
dt_max
,
dx_max
=
0
.
0
f
;
int
counts
[
task_type_count
+
1
];
/* Traverse the cells and set their dt_min and dt_max. */
space_map_cells_post
(
s
,
1
,
&
space_map_prepare
,
NULL
);
/* Get the maximum displacement in the whole system. */
for
(
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
dx_max
=
fmaxf
(
dx_max
,
s
->
cells
[
k
].
dx_max
);
printf
(
"space_prepare: dx_max is %e.
\n
"
,
dx_max
);
/* Run through the tasks and mark as skip or not. */
for
(
k
=
0
;
k
<
s
->
nr_tasks
;
k
++
)
{
t
=
&
s
->
tasks
[
k
];
...
...
@@ -192,15 +197,6 @@ void space_prepare ( struct space *s ) {
/* Traverse the cells and set their dt_min and dt_max. */
space_map_cells_post
(
s
,
1
,
&
space_map_prepare
,
NULL
);
/* Run through the tasks and mark as skip or not. */
for
(
k
=
0
;
k
<
s
->
nr_tasks
;
k
++
)
{
t
=
&
s
->
tasks
[
k
];
if
(
t
->
type
==
task_type_sort
||
t
->
type
==
task_type_self
||
t
->
type
==
task_type_ghost
)
t
->
skip
=
(
t
->
ci
->
dt_min
>
dt_max
);
else
if
(
t
->
type
==
task_type_pair
)
t
->
skip
=
(
t
->
ci
->
dt_min
>
dt_max
&&
t
->
cj
->
dt_min
>
dt_max
);
}
}
/* Store the condensed particle data. */
...
...
@@ -262,6 +258,10 @@ void space_ranktasks ( struct space *s ) {
temp
=
tid
[
j
];
tid
[
j
]
=
tid
[
k
];
tid
[
k
]
=
temp
;
j
+=
1
;
}
/* Did we get anything? */
if
(
j
==
left
)
error
(
"Unsatisfiable task dependencies detected."
);
/* Traverse the task tree and add tasks with no weight. */
for
(
i
=
left
;
i
<
j
;
i
++
)
{
...
...
@@ -488,6 +488,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
}
s
->
h_min
=
h_min
;
s
->
h_max
=
h_max
;
printf
(
"space_rebuild: h_min/h_max is %.3e/%.3e.
\n
"
,
h_min
,
h_max
);
// printf( "space_rebuild: getting h_min and h_max took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Get the new putative cell dimensions. */
...
...
@@ -537,6 +538,9 @@ void space_rebuild ( struct space *s , double cell_max ) {
c
->
dmin
=
dmin
;
c
->
depth
=
0
;
}
/* Be verbose about the change. */
printf
(
"space_rebuild: set cell dimensions to [ %i %i %i ].
\n
"
,
cdim
[
0
]
,
cdim
[
1
]
,
cdim
[
2
]
);
fflush
(
stdout
);
}
/* re-build upper-level cells? */
// printf( "space_rebuild: rebuilding upper-level cells took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
...
...
@@ -844,7 +848,7 @@ struct task *space_addtask ( struct space *s , int type , int subtype , int flag
t
->
ci
=
ci
;
t
->
cj
=
cj
;
t
->
skip
=
0
;
t
->
tight
=
0
;
t
->
tight
=
tight
;
t
->
nr_unlock_tasks
=
0
;
t
->
nr_unlock_cells
=
0
;
...
...
@@ -1398,9 +1402,14 @@ void space_maketasks ( struct space *s , int do_sort ) {
if
(
t
->
skip
)
continue
;
if
(
t
->
type
==
task_type_sort
&&
t
->
ci
->
split
)
for
(
j
=
0
;
j
<
8
;
j
++
)
if
(
t
->
ci
->
progeny
[
j
]
!=
NULL
&&
t
->
ci
->
progeny
[
j
]
->
sorts
[
0
]
!=
NULL
)
task_addunlock
(
t
->
ci
->
progeny
[
j
]
->
sorts
[
0
]
,
t
);
for
(
j
=
0
;
j
<
8
;
j
++
)
{
if
(
t
->
ci
->
progeny
[
j
]
==
NULL
)
continue
;
if
(
t
->
ci
->
progeny
[
j
]
->
sorts
[
0
]
==
NULL
)
t
->
ci
->
progeny
[
j
]
->
sorts
[
0
]
=
space_addtask
(
s
,
task_type_sort
,
task_subtype_none
,
0
/* t->flags? */
,
0
,
t
->
ci
,
NULL
,
0
);
t
->
ci
->
progeny
[
j
]
->
sorts
[
0
]
->
skip
=
0
;
task_addunlock
(
t
->
ci
->
progeny
[
j
]
->
sorts
[
0
]
,
t
);
}
}
/* Count the number of tasks associated with each cell and
...
...
src/vector.h
View file @
c969365b
...
...
@@ -27,8 +27,11 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */
#ifdef __AVX__
#ifdef NO__AVX__
#define VECTORIZE
#define VEC_SIZE 8
#define VEC_FLOAT __m256
#define VEC_INT __m256i
#define vec_load(a) _mm256_load_ps(a)
#define vec_set1(a) _mm256_set1_ps(a)
#define vec_sqrt(a) _mm256_sqrt_ps(a)
...
...
@@ -36,8 +39,11 @@
#define vec_rsqrt(a) _mm256_rsqrt_ps(a)
#define vec_ftoi(a) _mm256_cvttps_epi32(a)
#define vec_fmin(a,b) _mm256_min_ps(a,b)
#else
#elif defined( NO__SSE2__ )
#define VECTORIZE
#define VEC_SIZE 4
#define VEC_FLOAT __m128
#define VEC_INT __m128i
#define vec_load(a) _mm_load_ps(a)
#define vec_set1(a) _mm_set1_ps(a)
#define vec_sqrt(a) _mm_sqrt_ps(a)
...
...
@@ -45,6 +51,8 @@
#define vec_rsqrt(a) _mm_rsqrt_ps(a)
#define vec_ftoi(a) _mm_cvttps_epi32(a)