Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
aa89030c
Commit
aa89030c
authored
Apr 11, 2013
by
Pedro Gonnet
Browse files
several bug fixes.
Former-commit-id: 896c798930a15c2269b8d990aa54c7b3e10788fb
parent
0dea4d65
Changes
5
Hide whitespace changes
Inline
Side-by-side
src/engine.c
View file @
aa89030c
...
...
@@ -176,7 +176,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
struct
engine
*
e
=
(
struct
engine
*
)
data
;
float
pdt
,
dt_step
=
e
->
dt_step
,
dt
=
e
->
dt
,
hdt
=
0
.
5
f
*
dt
;
float
dt_min
,
dt_max
,
h_max
,
dx
,
h2dx_max
,
dx_max
;
float
a
[
3
],
v
[
3
],
u
,
u_dt
,
h
,
h_dt
,
rho
,
v_old
[
3
];
float
a
[
3
],
v
[
3
],
u
,
u_dt
,
h
,
h_dt
,
rho
,
v_old
[
3
]
,
w
;
double
x
[
3
],
x_old
[
3
];
struct
part
*
restrict
p
;
struct
xpart
*
restrict
xp
;
...
...
@@ -235,12 +235,16 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
p
->
v
[
0
]
=
v
[
0
]
+=
dt
*
a
[
0
];
p
->
v
[
1
]
=
v
[
1
]
+=
dt
*
a
[
1
];
p
->
v
[
2
]
=
v
[
2
]
+=
dt
*
a
[
2
];
p
->
u
=
u
+=
u_dt
*
dt
;
// p->h = h += h_dt * dt;
{
float
w
=
h_dt
/
h
*
dt
;
p
->
h
=
h
*=
1
.
0
f
+
w
*
(
-
1
.
0
f
+
w
*
(
0
.
5
f
+
w
*
(
-
1
.
0
f
/
6
.
0
f
+
1
.
0
f
/
24
.
0
*
w
)
)
);
}
w
=
u_dt
/
u
*
dt
;
if
(
w
>
0
.
9
f
&&
w
<
1
.
1
f
)
p
->
u
=
u
*=
1
.
0
f
+
w
*
(
-
1
.
0
f
+
w
*
(
0
.
5
f
-
w
*
1
.
0
f
/
6
.
0
f
)
);
else
p
->
u
=
u
*=
expf
(
w
);
w
=
h_dt
/
h
*
dt
;
if
(
w
>
0
.
9
f
&&
w
<
1
.
1
f
)
p
->
h
=
h
*=
1
.
0
f
+
w
*
(
-
1
.
0
f
+
w
*
(
0
.
5
f
-
w
*
1
.
0
f
/
6
.
0
f
)
);
else
p
->
h
=
h
*=
expf
(
w
);
h_max
=
fmaxf
(
h_max
,
h
);
...
...
@@ -421,7 +425,6 @@ void engine_step ( struct engine *e , int sort_queues ) {
float
dt
=
e
->
dt
,
dt_step
,
dt_max
=
0
.
0
f
,
dt_min
=
FLT_MAX
;
float
epot
=
0
.
0
,
ekin
=
0
.
0
,
mom
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
float
ang
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
// double lent, ent = 0.0;
int
count
=
0
;
struct
cell
*
c
;
...
...
@@ -518,6 +521,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
else
{
if
(
e
->
dt
==
0
)
{
// e->nullstep += 1;
e
->
dt
=
e
->
dt_orig
;
while
(
dt_min
<
e
->
dt
)
e
->
dt
*=
0
.
5
;
...
...
@@ -540,7 +544,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
}
/* Set the system time. */
e
->
time
=
e
->
dt
*
e
->
step
;
e
->
time
=
e
->
dt
*
(
e
->
step
-
e
->
nullstep
)
;
TIMER_TOC2
(
timer_step
);
...
...
@@ -573,6 +577,8 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
e
->
nr_queues
=
nr_queues
;
e
->
policy
=
policy
;
e
->
step
=
0
;
e
->
nullstep
=
0
;
e
->
time
=
0
.
0
;
/* First of all, init the barrier and lock it. */
if
(
pthread_mutex_init
(
&
e
->
barrier_mutex
,
NULL
)
!=
0
)
...
...
src/engine.h
View file @
aa89030c
...
...
@@ -61,7 +61,7 @@ struct engine {
float
dt
,
dt_orig
;
/* The current step number. */
int
step
;
int
step
,
nullstep
;
/* The current system time. */
float
time
;
...
...
src/runner.c
View file @
aa89030c
...
...
@@ -432,7 +432,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Balsara switch */
p
->
force
.
balsara
=
normCurl_v
/
(
normDiv_v
+
normCurl_v
+
0
.
0001
f
*
fc
*
ihg
);
/* Reset the acceleration. */
for
(
k
=
0
;
k
<
3
;
k
++
)
p
->
a
[
k
]
=
0
.
0
f
;
...
...
src/runner_iact.h
View file @
aa89030c
...
...
@@ -362,16 +362,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
int
k
;
/* Get the kernel for hi. */
hi_inv
=
1
.
0
f
/
hi
;
hi_inv
=
kernel_igamma
/
hi
;
hi2_inv
=
hi_inv
*
hi_inv
;
xi
=
r
*
hi_inv
*
kernel_igamma
;
xi
=
r
*
hi_inv
;
kernel_deval
(
xi
,
&
wi
,
&
wi_dx
);
wi_dr
=
hi2_inv
*
hi2_inv
*
wi_dx
;
/* Get the kernel for hj. */
hj_inv
=
1
.
0
f
/
hj
;
hj_inv
=
kernel_igamma
/
hj
;
hj2_inv
=
hj_inv
*
hj_inv
;
xj
=
r
*
hj_inv
*
kernel_igamma
;
xj
=
r
*
hj_inv
;
kernel_deval
(
xj
,
&
wj
,
&
wj_dx
);
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
...
...
@@ -389,7 +389,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
Pi_ij
=
-
const_viscosity_alpha
*
v_sig
*
omega_ij
/
(
pi
->
rho
+
pj
->
rho
);
/* Apply balsara switch */
Pi_ij
*=
(
pi
->
force
.
balsara
+
pj
->
force
.
balsara
);
Pi_ij
*=
(
pi
->
force
.
balsara
+
pj
->
force
.
balsara
);
/* Get the common factor out. */
w
=
ri
*
(
(
pi
->
force
.
POrho2
*
wi_dr
+
pj
->
force
.
POrho2
*
wj_dr
)
+
0
.
25
f
*
Pi_ij
*
(
wi_dr
+
wj_dr
)
);
...
...
@@ -434,8 +434,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
vector
r
,
r2
,
ri
;
vector
xi
,
xj
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hig_inv
,
hi2_inv
;
vector
hjg_inv
,
hj2_inv
;
vector
hi2_inv
,
hj2_inv
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
w
;
vector
piPOrho2
,
pjPOrho2
,
pirho
,
pjrho
;
...
...
@@ -500,22 +499,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
r
.
v
=
r2
.
v
*
ri
.
v
;
/* Get the kernel for hi. */
hi
.
v
=
vec_load
(
Hi
);
hi
.
v
=
vec_set1
(
kernel_gamma
)
*
vec_load
(
Hi
);
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi
.
v
*
hi_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hig_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hi_inv
.
v
;
hi2_inv
.
v
=
hi_inv
.
v
*
hi_inv
.
v
;
xi
.
v
=
r
.
v
*
hi
g
_inv
.
v
;
xi
.
v
=
r
.
v
*
hi_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
wi_dr
.
v
=
hi2_inv
.
v
*
hi2_inv
.
v
*
wi_dx
.
v
;
/* Get the kernel for hj. */
hj
.
v
=
vec_load
(
Hj
);
hj
.
v
=
vec_set1
(
kernel_gamma
)
*
vec_load
(
Hj
);
hj_inv
.
v
=
vec_rcp
(
hj
.
v
);
hj_inv
.
v
=
hj_inv
.
v
-
hj_inv
.
v
*
(
hj
.
v
*
hj_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hjg_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hj_inv
.
v
;
hj2_inv
.
v
=
hj_inv
.
v
*
hj_inv
.
v
;
xj
.
v
=
r
.
v
*
hj
g
_inv
.
v
;
xj
.
v
=
r
.
v
*
hj_inv
.
v
;
kernel_deval_vec
(
&
xj
,
&
wj
,
&
wj_dx
);
wj_dr
.
v
=
hj2_inv
.
v
*
hj2_inv
.
v
*
wj_dx
.
v
;
...
...
@@ -595,16 +592,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
int
k
;
/* Get the kernel for hi. */
hi_inv
=
1
.
0
f
/
hi
;
hi_inv
=
kernel_igamma
/
hi
;
hi2_inv
=
hi_inv
*
hi_inv
;
xi
=
r
*
hi_inv
*
kernel_igamma
;
xi
=
r
*
hi_inv
;
kernel_deval
(
xi
,
&
wi
,
&
wi_dx
);
wi_dr
=
hi2_inv
*
hi2_inv
*
wi_dx
;
/* Get the kernel for hj. */
hj_inv
=
1
.
0
f
/
hj
;
hj_inv
=
kernel_igamma
/
hj
;
hj2_inv
=
hj_inv
*
hj_inv
;
xj
=
r
*
hj_inv
*
kernel_igamma
;
xj
=
r
*
hj_inv
;
kernel_deval
(
xj
,
&
wj
,
&
wj_dx
);
wj_dr
=
hj2_inv
*
hj2_inv
*
wj_dx
;
...
...
@@ -664,8 +661,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
vector
r
,
r2
,
ri
;
vector
xi
,
xj
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hig_inv
,
hi2_inv
;
vector
hjg_inv
,
hj2_inv
;
vector
hi2_inv
,
hj2_inv
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
w
;
vector
piPOrho2
,
pjPOrho2
,
pirho
,
pjrho
;
...
...
@@ -728,22 +724,20 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
r
.
v
=
r2
.
v
*
ri
.
v
;
/* Get the kernel for hi. */
hi
.
v
=
vec_load
(
Hi
);
hi
.
v
=
vec_set1
(
kernel_gamma
)
*
vec_load
(
Hi
);
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi
.
v
*
hi_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hig_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hi_inv
.
v
;
hi2_inv
.
v
=
hi_inv
.
v
*
hi_inv
.
v
;
xi
.
v
=
r
.
v
*
hi
g
_inv
.
v
;
xi
.
v
=
r
.
v
*
hi_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
wi_dr
.
v
=
hi2_inv
.
v
*
hi2_inv
.
v
*
wi_dx
.
v
;
/* Get the kernel for hj. */
hj
.
v
=
vec_load
(
Hj
);
hj
.
v
=
vec_set1
(
kernel_gamma
)
*
vec_load
(
Hj
);
hj_inv
.
v
=
vec_rcp
(
hj
.
v
);
hj_inv
.
v
=
hj_inv
.
v
-
hj_inv
.
v
*
(
hj
.
v
*
hj_inv
.
v
-
vec_set1
(
1
.
0
f
)
);
hjg_inv
.
v
=
vec_set1
(
kernel_igamma
)
*
hj_inv
.
v
;
hj2_inv
.
v
=
hj_inv
.
v
*
hj_inv
.
v
;
xj
.
v
=
r
.
v
*
hj
g
_inv
.
v
;
xj
.
v
=
r
.
v
*
hj_inv
.
v
;
kernel_deval_vec
(
&
xj
,
&
wj
,
&
wj_dx
);
wj_dr
.
v
=
hj2_inv
.
v
*
hj2_inv
.
v
*
wj_dx
.
v
;
...
...
src/vector.h
View file @
aa89030c
...
...
@@ -27,7 +27,7 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */
#ifdef
NO
__AVX__
#ifdef __AVX__
#define VECTORIZE
#define VEC_SIZE 8
#define VEC_FLOAT __m256
...
...
@@ -54,7 +54,7 @@
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
#elif defined(
NO
__SSE2__ )
#elif defined( __SSE2__ )
#define VECTORIZE
#define VEC_SIZE 4
#define VEC_FLOAT __m128
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment