Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
50a466ff
Commit
50a466ff
authored
Jan 31, 2013
by
Pedro Gonnet
Browse files
several fixes to the particle tree and task construction.
Former-commit-id: c6a968263d1d47644bfac82ba3459a44e618890e
parent
920b17ef
Changes
10
Hide whitespace changes
Inline
Side-by-side
src/Makefile.am
View file @
50a466ff
...
...
@@ -20,18 +20,19 @@
AUTOMAKE_OPTIONS
=
gnu
# Add the debug flag to the whole thing
# AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
# -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
AM_CFLAGS
=
-Wall
-Werror
$(OPENMP_CFLAGS)
\
AM_CFLAGS
=
-g
-O3
-Wall
-Werror
-ffast-math
-fstrict-aliasing
-ftree-vectorize
\
-funroll-loops
$(SIMD_FLAGS)
$(OPENMP_CFLAGS)
\
-DTIMER
-DCOUNTER
-DCPU_TPS
=
2.67e9
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# Assign a "safe" version number
AM_LDFLAGS
=
$(LAPACK_LIBS)
$(BLAS_LIBS)
$(HDF5_LDFLAGS)
-version-info
0:0:0
# Build the libswiftsim library
lib_LTLIBRARIES
=
libswiftsim.la
libswiftsim_la_SOURCES
=
space.c runner.c queue.c task.c cell.c engine.c input.c output.c timers.c debug.c
libswiftsim_la_SOURCES
=
space.c runner.c queue.c task.c cell.c engine.c
\
input.c output.c timers.c debug.c
# List required headers
include_HEADERS
=
space.h runner.h queue.h task.h lock.h cell.h part.h const.h
\
...
...
src/cell.h
View file @
50a466ff
...
...
@@ -80,6 +80,7 @@ struct cell {
/* The tasks computing this cell's sorts. */
struct
task
*
sorts
[
13
];
int
sortsize
;
/* The tasks computing this cell's density. */
struct
task
*
density
[
27
];
...
...
src/engine.c
View file @
50a466ff
...
...
@@ -35,6 +35,7 @@
#include
"cycle.h"
#include
"timers.h"
#include
"const.h"
#include
"vector.h"
#include
"lock.h"
#include
"task.h"
#include
"part.h"
...
...
@@ -175,8 +176,11 @@ void engine_step ( struct engine *e , int sort_queues ) {
int
k
,
nr_parts
=
e
->
s
->
nr_parts
;
struct
part
*
restrict
parts
=
e
->
s
->
parts
,
*
restrict
p
;
float
*
v_bar
,
*
u
_bar
;
float
*
restrict
v
_bar
;
float
dt
=
e
->
dt
,
hdt
=
0
.
5
*
dt
,
dt_max
;
#ifdef __SSE2__
VEC_MACRO
(
4
,
float
)
hdtv
=
_mm_set1_ps
(
hdt
);
#endif
/* Get the maximum dt. */
dt_max
=
dt
;
...
...
@@ -189,12 +193,11 @@ void engine_step ( struct engine *e , int sort_queues ) {
printf
(
"engine_step: dt_max set to %.3e.
\n
"
,
dt_max
);
fflush
(
stdout
);
/* Allocate a buffer for the old velocities. */
if
(
(
v_bar
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
nr_parts
*
3
)
)
==
NULL
)
error
(
"Failed to allocate v_old buffer."
);
if
(
(
u_bar
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
nr_parts
)
)
==
NULL
)
if
(
posix_memalign
(
(
void
**
)
&
v_bar
,
16
,
sizeof
(
float
)
*
nr_parts
*
4
)
!=
0
)
error
(
"Failed to allocate v_old buffer."
);
/* First kick. */
TIMER_TIC
#pragma omp parallel for schedule(static) private(p)
for
(
k
=
0
;
k
<
nr_parts
;
k
++
)
{
...
...
@@ -202,10 +205,14 @@ void engine_step ( struct engine *e , int sort_queues ) {
p
=
&
parts
[
k
];
/* Step and store the velocity and internal energy. */
v_bar
[
3
*
k
+
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
v_bar
[
3
*
k
+
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
v_bar
[
3
*
k
+
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
u_bar
[
k
]
=
p
->
u
+
hdt
*
p
->
u_dt
;
#ifdef __SSE__
_mm_store_ps
(
&
v_bar
[
4
*
k
]
,
_mm_add_ps
(
_mm_load_ps
(
&
p
->
v
[
0
]
)
,
_mm_mul_ps
(
hdtv
,
_mm_load_ps
(
&
p
->
a
[
0
]
)
)
)
);
#else
v_bar
[
4
*
k
+
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
v_bar
[
4
*
k
+
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
v_bar
[
4
*
k
+
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
#endif
v_bar
[
4
*
k
+
3
]
=
p
->
u
+
hdt
*
p
->
u_dt
;
/* Move the particles with the velocitie at the half-step. */
// p->x[0] += dt * v_bar[3*k+0];
...
...
@@ -226,6 +233,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
}
TIMER_TOC
(
timer_kick1
);
/* Prepare the space. */
engine_prepare
(
e
);
...
...
@@ -240,7 +248,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
/* Start the clock. */
TIMER_TIC
TIMER_TIC
_ND
/* Cry havoc and let loose the dogs of war. */
e
->
barrier_count
=
-
e
->
barrier_count
;
...
...
@@ -256,6 +264,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
TIMER_TOC
(
timer_step
);
/* Second kick. */
TIMER_TIC_ND
e
->
dt_min
=
FLT_MAX
;
#pragma omp parallel private(p,k)
{
...
...
@@ -272,10 +281,14 @@ void engine_step ( struct engine *e , int sort_queues ) {
p
->
h_dt
*=
p
->
h
*
0
.
333333333
f
;
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
=
v_bar
[
3
*
k
+
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
1
]
=
v_bar
[
3
*
k
+
0
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
2
]
=
v_bar
[
3
*
k
+
0
]
+
hdt
*
p
->
a
[
2
];
// p->u = u_bar[k] + hdt * p->u_dt;
#ifdef __SSE__
_mm_store_ps
(
&
p
->
v
[
0
]
,
_mm_add_ps
(
_mm_load_ps
(
&
v_bar
[
4
*
k
]
)
,
_mm_mul_ps
(
hdtv
,
_mm_load_ps
(
&
p
->
a
[
0
]
)
)
)
);
#else
p
->
v
[
0
]
=
v_bar
[
4
*
k
+
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
1
]
=
v_bar
[
4
*
k
+
1
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
2
]
=
v_bar
[
4
*
k
+
2
]
+
hdt
*
p
->
a
[
2
];
#endif
// p->u = v_bar[4*k+3] + hdt * p->u_dt;
/* Get the smallest dt. */
dt_min
=
fminf
(
dt_min
,
p
->
dt
);
...
...
@@ -284,11 +297,11 @@ void engine_step ( struct engine *e , int sort_queues ) {
#pragma omp critical
e
->
dt_min
=
fminf
(
e
->
dt_min
,
dt_min
);
}
TIMER_TOC
(
timer_kick2
);
printf
(
"engine_step: dt_min is %e.
\n
"
,
e
->
dt_min
);
fflush
(
stdout
);
/* Clean up. */
free
(
v_bar
);
free
(
u_bar
);
/* Increase the step counter. */
e
->
step
+=
1
;
...
...
src/input.c
View file @
50a466ff
...
...
@@ -130,7 +130,7 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int dim
printf
(
"readArray: Reading %s '%s' array...
\n
"
,
importance
==
COMPULSORY
?
"compulsory"
:
"optional "
,
name
);
/* Open data space */
h_data
=
H5Dopen
(
grp
,
name
);
h_data
=
H5Dopen
1
(
grp
,
name
);
if
(
h_data
<
0
)
{
char
buf
[
100
];
...
...
@@ -224,7 +224,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
/* Open header to read simulation properties */
printf
(
"read_ic: Reading runtime parameters...
\n
"
);
h_grp
=
H5Gopen
(
h_file
,
"/RuntimePars"
);
h_grp
=
H5Gopen
1
(
h_file
,
"/RuntimePars"
);
if
(
h_grp
<
0
)
error
(
"Error while opening runtime parameters
\n
"
);
...
...
@@ -236,7 +236,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
/* Open header to read simulation properties */
printf
(
"read_ic: Reading file header...
\n
"
);
h_grp
=
H5Gopen
(
h_file
,
"/Header"
);
h_grp
=
H5Gopen
1
(
h_file
,
"/Header"
);
if
(
h_grp
<
0
)
error
(
"Error while opening file header
\n
"
);
...
...
@@ -262,7 +262,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
/* Open SPH particles group */
printf
(
"read_ic: Reading particle arrays...
\n
"
);
h_grp
=
H5Gopen
(
h_file
,
"/PartType0"
);
h_grp
=
H5Gopen
1
(
h_file
,
"/PartType0"
);
if
(
h_grp
<
0
)
error
(
"Error while opening particle group.
\n
"
);
...
...
src/output.c
View file @
50a466ff
...
...
@@ -77,7 +77,7 @@ void writeAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data, int
error
(
buf
);
}
h_attr
=
H5Acreate
(
grp
,
name
,
hdf5Type
(
type
),
h_space
,
H5P_DEFAULT
);
h_attr
=
H5Acreate
1
(
grp
,
name
,
hdf5Type
(
type
),
h_space
,
H5P_DEFAULT
);
if
(
h_attr
<
0
)
{
char
buf
[
100
];
...
...
@@ -169,7 +169,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, enu
}
/* Create dataset */
h_data
=
H5Dcreate
(
grp
,
name
,
hdf5Type
(
type
),
h_space
,
H5P_DEFAULT
);
h_data
=
H5Dcreate
1
(
grp
,
name
,
hdf5Type
(
type
),
h_space
,
H5P_DEFAULT
);
if
(
h_data
<
0
)
{
char
buf
[
100
];
...
...
@@ -248,7 +248,7 @@ void write_output ( char* fileName, double dim[3], struct part *parts, int N, i
/* Open header to read simulation properties */
printf
(
"write_output: Writing runtime parameters...
\n
"
);
h_grp
=
H5Gcreate
(
h_file
,
"/RuntimePars"
,
0
);
h_grp
=
H5Gcreate
1
(
h_file
,
"/RuntimePars"
,
0
);
if
(
h_grp
<
0
)
error
(
"Error while creating runtime parameters group
\n
"
);
...
...
@@ -262,7 +262,7 @@ void write_output ( char* fileName, double dim[3], struct part *parts, int N, i
/* Open header to write simulation properties */
printf
(
"write_output: Writing file header...
\n
"
);
h_grp
=
H5Gcreate
(
h_file
,
"/Header"
,
0
);
h_grp
=
H5Gcreate
1
(
h_file
,
"/Header"
,
0
);
if
(
h_grp
<
0
)
error
(
"Error while creating file header
\n
"
);
...
...
@@ -275,7 +275,7 @@ void write_output ( char* fileName, double dim[3], struct part *parts, int N, i
/* Create SPH particles group */
printf
(
"write_output: Writing particle arrays...
\n
"
);
h_grp
=
H5Gcreate
(
h_file
,
"/PartType0"
,
0
);
h_grp
=
H5Gcreate
1
(
h_file
,
"/PartType0"
,
0
);
if
(
h_grp
<
0
)
error
(
"Error while creating particle group.
\n
"
);
...
...
src/part.h
View file @
50a466ff
...
...
@@ -42,12 +42,12 @@ struct cpart {
/* Data of a single particle. */
struct
part
{
/* Particle velocity. */
float
v
[
3
]
__attribute__
((
aligned
(
16
)));
/* Particle mass. */
float
mass
;
/* Particle velocity. */
float
v
[
3
];
/* Particle density. */
float
rho
;
...
...
@@ -63,12 +63,12 @@ struct part {
/* Change in smoothing length over time. */
float
h_dt
;
/* Particle acceleration. */
float
a
[
3
]
__attribute__
((
aligned
(
16
)));
/* Derivative of the density with respect to this particle's smoothing length. */
float
rho_dh
;
/* Particle acceleration. */
float
a
[
3
];
/* Particle number density. */
// int icount;
float
wcount
;
...
...
src/runner.c
View file @
50a466ff
...
...
@@ -180,9 +180,13 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
/* start by allocating the entry arrays. */
if
(
lock_lock
(
&
c
->
lock
)
!=
0
)
error
(
"Failed to lock cell."
);
if
(
c
->
sort
==
NULL
)
if
(
(
c
->
sort
=
(
struct
entry
*
)
malloc
(
sizeof
(
struct
entry
)
*
(
c
->
count
+
1
)
*
13
)
)
==
NULL
)
if
(
c
->
sort
==
NULL
||
c
->
sortsize
<
c
->
count
)
{
if
(
c
->
sort
!=
NULL
)
free
(
c
->
sort
);
c
->
sortsize
=
c
->
count
*
1
.
1
;
if
(
(
c
->
sort
=
(
struct
entry
*
)
malloc
(
sizeof
(
struct
entry
)
*
(
c
->
sortsize
+
1
)
*
13
)
)
==
NULL
)
error
(
"Failed to allocate sort memory."
);
}
if
(
lock_unlock
(
&
c
->
lock
)
!=
0
)
error
(
"Failed to unlock cell."
);
...
...
src/runner_iact.h
View file @
50a466ff
...
...
@@ -19,43 +19,44 @@
/* Load the vector stuff. */
#ifdef __SSE2__
#define VECTORIZE
#include
<immintrin.h>
#ifdef __AVX__
typedef
union
{
__m256
v
;
__m256i
m
;
float
f
[
8
];
int
i
[
8
];
}
vector
;
#define VEC_SIZE 8
#define vec_load(a) _mm256_load_ps(a)
#define vec_set1(a) _mm256_set1_ps(a)
#define vec_sqrt(a) _mm256_sqrt_ps(a)
#define vec_rcp(a) _mm256_rcp_ps(a)
#define vec_rsqrt(a) _mm256_rsqrt_ps(a)
#define vec_ftoi(a) _mm256_cvttps_epi32(a)
#define vec_fmin(a,b) _mm256_min_ps(a,b)
#else
typedef
union
{
__m128
v
;
__m128i
m
;
float
f
[
4
];
int
i
[
4
];
}
vector
;
#define VEC_SIZE 4
#define vec_load(a) _mm_load_ps(a)
#define vec_set1(a) _mm_set1_ps(a)
#define vec_sqrt(a) _mm_sqrt_ps(a)
#define vec_rcp(a) _mm_rcp_ps(a)
#define vec_rsqrt(a) _mm_rsqrt_ps(a)
#define vec_ftoi(a) _mm_cvttps_epi32(a)
#define vec_fmin(a,b) _mm_min_ps(a,b)
#endif
#else
#define VEC_SIZE 4
#endif
// #ifdef __SSE2__
// #define VECTORIZE
// #include <immintrin.h>
// #ifdef __AVX__
// typedef union {
// __m256 v;
// __m256i m;
// float f[8];
// int i[8];
// } vector;
// #define VEC_SIZE 8
// #define vec_load(a) _mm256_load_ps(a)
// #define vec_set1(a) _mm256_set1_ps(a)
// #define vec_sqrt(a) _mm256_sqrt_ps(a)
// #define vec_rcp(a) _mm256_rcp_ps(a)
// #define vec_rsqrt(a) _mm256_rsqrt_ps(a)
// #define vec_ftoi(a) _mm256_cvttps_epi32(a)
// #define vec_fmin(a,b) _mm256_min_ps(a,b)
// #else
// typedef union {
// __m128 v;
// __m128i m;
// float f[4];
// int i[4];
// } vector;
// #define VEC_SIZE 4
// #define vec_load(a) _mm_load_ps(a)
// #define vec_set1(a) _mm_set1_ps(a)
// #define vec_sqrt(a) _mm_sqrt_ps(a)
// #define vec_rcp(a) _mm_rcp_ps(a)
// #define vec_rsqrt(a) _mm_rsqrt_ps(a)
// #define vec_ftoi(a) _mm_cvttps_epi32(a)
// #define vec_fmin(a,b) _mm_min_ps(a,b)
// #endif
// #else
// #define VEC_SIZE 4
// #endif
#include
"vector.h"
/* Coefficients for the kernel. */
#define kernel_degree 3
...
...
@@ -164,7 +165,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
kernel_deval
(
xj
,
&
wj
,
&
wj_dx
);
pj
->
rho
+=
pi
->
mass
*
wj
;
pj
->
rho_dh
+
=
-
pi
->
mass
*
(
3
.
0
*
wj
+
xj
*
wj_dx
);
pj
->
rho_dh
-
=
pi
->
mass
*
(
3
.
0
*
wj
+
xj
*
wj_dx
);
pj
->
wcount
+=
wj
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
pj
->
wcount_dh
-=
xj
*
h_inv
*
wj_dx
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
// pj->icount += 1;
...
...
@@ -266,7 +267,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
kernel_deval
(
xi
,
&
wi
,
&
wi_dx
);
pi
->
rho
+=
pj
->
mass
*
wi
;
pi
->
rho_dh
+
=
-
pj
->
mass
*
(
3
.
0
*
wi
+
xi
*
wi_dx
);
pi
->
rho_dh
-
=
pj
->
mass
*
(
3
.
0
*
wi
+
xi
*
wi_dx
);
pi
->
wcount
+=
wi
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
pi
->
wcount_dh
-=
xi
*
h_inv
*
wi_dx
*
(
4
.
0
f
*
M_PI
/
3
.
0
f
*
kernel_igamma3
);
// pi->icount += 1;
...
...
src/space.c
View file @
50a466ff
...
...
@@ -475,6 +475,9 @@ void space_rebuild ( struct space *s , double cell_max ) {
int
*
ind
;
// ticks tic;
/* Be verbose about this. */
printf
(
"space_rebuild: (re)building space...
\n
"
);
fflush
(
stdout
);
/* Run through the parts and get the current h_max. */
// tic = getticks();
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
{
...
...
src/timers.h
View file @
50a466ff
...
...
@@ -23,6 +23,8 @@
enum
{
timer_none
=
0
,
timer_prepare
,
timer_kick1
,
timer_kick2
,
timer_dosort
,
timer_doself_density
,
timer_doself_force
,
...
...
@@ -53,6 +55,8 @@ extern ticks timers[ timer_count ];
#endif
#endif
#ifdef TIMER
#define TIMER_TIC_ND tic = getticks();
#define TIMER_TIC2_ND ticks tic2 = getticks();
#define TIMER_TIC ticks tic = getticks();
#define TIMER_TOC(t) timers_toc( t , tic )
#define TIMER_TIC2 ticks tic2 = getticks();
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment