Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
d86fc798
Commit
d86fc798
authored
Feb 13, 2013
by
Pedro Gonnet
Browse files
added new signal velocity for dt computation.
Former-commit-id: 472384e59f372e67228d9ef7fee6e32b8316f7a0
parent
6f6f3f54
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/runner_iact.h
View file @
d86fc798
...
...
@@ -404,6 +404,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
vector
pia
[
3
],
pja
[
3
];
vector
piu_dt
,
pju_dt
;
vector
pih_dt
,
pjh_dt
;
vector
ci
,
cj
,
vi_sig
,
vj_sig
;
int
j
,
k
;
/* Load stuff. */
...
...
@@ -414,6 +415,10 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
pjPOrho2
.
v
=
_mm256_set_ps
(
pj
[
7
]
->
POrho2
,
pj
[
6
]
->
POrho2
,
pj
[
5
]
->
POrho2
,
pj
[
4
]
->
POrho2
,
pj
[
3
]
->
POrho2
,
pj
[
2
]
->
POrho2
,
pj
[
1
]
->
POrho2
,
pj
[
0
]
->
POrho2
);
pirho
.
v
=
_mm256_set_ps
(
pi
[
7
]
->
rho
,
pi
[
6
]
->
rho
,
pi
[
5
]
->
rho
,
pi
[
4
]
->
rho
,
pi
[
3
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
0
]
->
rho
);
pjrho
.
v
=
_mm256_set_ps
(
pj
[
7
]
->
rho
,
pj
[
6
]
->
rho
,
pj
[
5
]
->
rho
,
pj
[
4
]
->
rho
,
pj
[
3
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
0
]
->
rho
);
ci
.
v
=
_mm256_set_ps
(
pi
[
7
]
->
c
,
pi
[
6
]
->
c
,
pi
[
5
]
->
c
,
pi
[
4
]
->
c
,
pi
[
3
]
->
c
,
pi
[
2
]
->
c
,
pi
[
1
]
->
c
,
pi
[
0
]
->
c
);
cj
.
v
=
_mm256_set_ps
(
pj
[
7
]
->
c
,
pj
[
6
]
->
c
,
pj
[
5
]
->
c
,
pj
[
4
]
->
c
,
pj
[
3
]
->
c
,
pj
[
2
]
->
c
,
pj
[
1
]
->
c
,
pj
[
0
]
->
c
);
vi_sig
.
v
=
_mm256_set_ps
(
pi
[
7
]
->
v_sig
,
pi
[
6
]
->
v_sig
,
pi
[
5
]
->
v_sig
,
pi
[
4
]
->
v_sig
,
pi
[
3
]
->
v_sig
,
pi
[
2
]
->
v_sig
,
pi
[
1
]
->
v_sig
,
pi
[
0
]
->
v_sig
);
vj_sig
.
v
=
_mm256_set_ps
(
pj
[
7
]
->
v_sig
,
pj
[
6
]
->
v_sig
,
pj
[
5
]
->
v_sig
,
pj
[
4
]
->
v_sig
,
pj
[
3
]
->
v_sig
,
pj
[
2
]
->
v_sig
,
pj
[
1
]
->
v_sig
,
pj
[
0
]
->
v_sig
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
_mm256_set_ps
(
pi
[
7
]
->
v
[
k
]
,
pi
[
6
]
->
v
[
k
]
,
pi
[
5
]
->
v
[
k
]
,
pi
[
4
]
->
v
[
k
]
,
pi
[
3
]
->
v
[
k
]
,
pi
[
2
]
->
v
[
k
]
,
pi
[
1
]
->
v
[
k
]
,
pi
[
0
]
->
v
[
k
]
);
vj
[
k
].
v
=
_mm256_set_ps
(
pj
[
7
]
->
v
[
k
]
,
pj
[
6
]
->
v
[
k
]
,
pj
[
5
]
->
v
[
k
]
,
pj
[
4
]
->
v
[
k
]
,
pj
[
3
]
->
v
[
k
]
,
pj
[
2
]
->
v
[
k
]
,
pj
[
1
]
->
v
[
k
]
,
pj
[
0
]
->
v
[
k
]
);
...
...
@@ -427,6 +432,10 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
pjPOrho2
.
v
=
_mm_set_ps
(
pj
[
3
]
->
POrho2
,
pj
[
2
]
->
POrho2
,
pj
[
1
]
->
POrho2
,
pj
[
0
]
->
POrho2
);
pirho
.
v
=
_mm_set_ps
(
pi
[
3
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
0
]
->
rho
);
pjrho
.
v
=
_mm_set_ps
(
pj
[
3
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
0
]
->
rho
);
ci
.
v
=
_mm_set_ps
(
pi
[
3
]
->
c
,
pi
[
2
]
->
c
,
pi
[
1
]
->
c
,
pi
[
0
]
->
c
);
cj
.
v
=
_mm_set_ps
(
pj
[
3
]
->
c
,
pj
[
2
]
->
c
,
pj
[
1
]
->
c
,
pj
[
0
]
->
c
);
vi_sig
.
v
=
_mm_set_ps
(
pi
[
3
]
->
v_sig
,
pi
[
2
]
->
v_sig
,
pi
[
1
]
->
v_sig
,
pi
[
0
]
->
v_sig
);
vj_sig
.
v
=
_mm_set_ps
(
pj
[
3
]
->
v_sig
,
pj
[
2
]
->
v_sig
,
pj
[
1
]
->
v_sig
,
pj
[
0
]
->
v_sig
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
_mm_set_ps
(
pi
[
3
]
->
v
[
k
]
,
pi
[
2
]
->
v
[
k
]
,
pi
[
1
]
->
v
[
k
]
,
pi
[
0
]
->
v
[
k
]
);
vj
[
k
].
v
=
_mm_set_ps
(
pj
[
3
]
->
v
[
k
]
,
pj
[
2
]
->
v
[
k
]
,
pj
[
1
]
->
v
[
k
]
,
pj
[
0
]
->
v
[
k
]
);
...
...
@@ -485,12 +494,18 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
pih_dt
.
v
=
mj
.
v
/
pjrho
.
v
*
dvdr
.
v
*
wi_dr
.
v
;
pjh_dt
.
v
=
mi
.
v
/
pirho
.
v
*
dvdr
.
v
*
wj_dr
.
v
;
/* compute the signal velocity (this is always symmetrical). */
vi_sig
.
v
=
vec_fmax
(
vi_sig
.
v
,
cj
.
v
-
3
.
0
f
*
dvdr
.
v
);
vj_sig
.
v
=
vec_fmax
(
vj_sig
.
v
,
ci
.
v
-
3
.
0
f
*
dvdr
.
v
);
/* Store the forces back on the particles. */
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
u_dt
+=
piu_dt
.
f
[
k
];
pj
[
k
]
->
u_dt
+=
pju_dt
.
f
[
k
];
pi
[
k
]
->
h_dt
-=
pih_dt
.
f
[
k
];
pj
[
k
]
->
h_dt
-=
pjh_dt
.
f
[
k
];
pi
[
k
]
->
v_sig
=
vi_sig
.
f
[
k
];
pj
[
k
]
->
v_sig
=
vj_sig
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
{
pi
[
k
]
->
a
[
j
]
-=
pia
[
j
].
f
[
k
];
pj
[
k
]
->
a
[
j
]
+=
pja
[
j
].
f
[
k
];
...
...
@@ -577,6 +592,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
vector
pia
[
3
];
vector
piu_dt
;
vector
pih_dt
;
vector
ci
,
cj
,
vi_sig
,
vj_sig
;
int
j
,
k
;
/* Load stuff. */
...
...
@@ -585,6 +601,10 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
piPOrho2
.
v
=
_mm256_set_ps
(
pi
[
7
]
->
POrho2
,
pi
[
6
]
->
POrho2
,
pi
[
5
]
->
POrho2
,
pi
[
4
]
->
POrho2
,
pi
[
3
]
->
POrho2
,
pi
[
2
]
->
POrho2
,
pi
[
1
]
->
POrho2
,
pi
[
0
]
->
POrho2
);
pjPOrho2
.
v
=
_mm256_set_ps
(
pj
[
7
]
->
POrho2
,
pj
[
6
]
->
POrho2
,
pj
[
5
]
->
POrho2
,
pj
[
4
]
->
POrho2
,
pj
[
3
]
->
POrho2
,
pj
[
2
]
->
POrho2
,
pj
[
1
]
->
POrho2
,
pj
[
0
]
->
POrho2
);
pjrho
.
v
=
_mm256_set_ps
(
pj
[
7
]
->
rho
,
pj
[
6
]
->
rho
,
pj
[
5
]
->
rho
,
pj
[
4
]
->
rho
,
pj
[
3
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
0
]
->
rho
);
ci
.
v
=
_mm256_set_ps
(
pi
[
7
]
->
c
,
pi
[
6
]
->
c
,
pi
[
5
]
->
c
,
pi
[
4
]
->
c
,
pi
[
3
]
->
c
,
pi
[
2
]
->
c
,
pi
[
1
]
->
c
,
pi
[
0
]
->
c
);
cj
.
v
=
_mm256_set_ps
(
pj
[
7
]
->
c
,
pj
[
6
]
->
c
,
pj
[
5
]
->
c
,
pj
[
4
]
->
c
,
pj
[
3
]
->
c
,
pj
[
2
]
->
c
,
pj
[
1
]
->
c
,
pj
[
0
]
->
c
);
vi_sig
.
v
=
_mm256_set_ps
(
pi
[
7
]
->
v_sig
,
pi
[
6
]
->
v_sig
,
pi
[
5
]
->
v_sig
,
pi
[
4
]
->
v_sig
,
pi
[
3
]
->
v_sig
,
pi
[
2
]
->
v_sig
,
pi
[
1
]
->
v_sig
,
pi
[
0
]
->
v_sig
);
vj_sig
.
v
=
_mm256_set_ps
(
pj
[
7
]
->
v_sig
,
pj
[
6
]
->
v_sig
,
pj
[
5
]
->
v_sig
,
pj
[
4
]
->
v_sig
,
pj
[
3
]
->
v_sig
,
pj
[
2
]
->
v_sig
,
pj
[
1
]
->
v_sig
,
pj
[
0
]
->
v_sig
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
_mm256_set_ps
(
pi
[
7
]
->
v
[
k
]
,
pi
[
6
]
->
v
[
k
]
,
pi
[
5
]
->
v
[
k
]
,
pi
[
4
]
->
v
[
k
]
,
pi
[
3
]
->
v
[
k
]
,
pi
[
2
]
->
v
[
k
]
,
pi
[
1
]
->
v
[
k
]
,
pi
[
0
]
->
v
[
k
]
);
vj
[
k
].
v
=
_mm256_set_ps
(
pj
[
7
]
->
v
[
k
]
,
pj
[
6
]
->
v
[
k
]
,
pj
[
5
]
->
v
[
k
]
,
pj
[
4
]
->
v
[
k
]
,
pj
[
3
]
->
v
[
k
]
,
pj
[
2
]
->
v
[
k
]
,
pj
[
1
]
->
v
[
k
]
,
pj
[
0
]
->
v
[
k
]
);
...
...
@@ -596,6 +616,10 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
piPOrho2
.
v
=
_mm_set_ps
(
pi
[
3
]
->
POrho2
,
pi
[
2
]
->
POrho2
,
pi
[
1
]
->
POrho2
,
pi
[
0
]
->
POrho2
);
pjPOrho2
.
v
=
_mm_set_ps
(
pj
[
3
]
->
POrho2
,
pj
[
2
]
->
POrho2
,
pj
[
1
]
->
POrho2
,
pj
[
0
]
->
POrho2
);
pjrho
.
v
=
_mm_set_ps
(
pj
[
3
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
0
]
->
rho
);
ci
.
v
=
_mm_set_ps
(
pi
[
3
]
->
c
,
pi
[
2
]
->
c
,
pi
[
1
]
->
c
,
pi
[
0
]
->
c
);
cj
.
v
=
_mm_set_ps
(
pj
[
3
]
->
c
,
pj
[
2
]
->
c
,
pj
[
1
]
->
c
,
pj
[
0
]
->
c
);
vi_sig
.
v
=
_mm_set_ps
(
pi
[
3
]
->
v_sig
,
pi
[
2
]
->
v_sig
,
pi
[
1
]
->
v_sig
,
pi
[
0
]
->
v_sig
);
vj_sig
.
v
=
_mm_set_ps
(
pj
[
3
]
->
v_sig
,
pj
[
2
]
->
v_sig
,
pj
[
1
]
->
v_sig
,
pj
[
0
]
->
v_sig
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
_mm_set_ps
(
pi
[
3
]
->
v
[
k
]
,
pi
[
2
]
->
v
[
k
]
,
pi
[
1
]
->
v
[
k
]
,
pi
[
0
]
->
v
[
k
]
);
vj
[
k
].
v
=
_mm_set_ps
(
pj
[
3
]
->
v
[
k
]
,
pj
[
2
]
->
v
[
k
]
,
pj
[
1
]
->
v
[
k
]
,
pj
[
0
]
->
v
[
k
]
);
...
...
@@ -651,10 +675,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
/* Get the time derivative for h. */
pih_dt
.
v
=
mj
.
v
/
pjrho
.
v
*
dvdr
.
v
*
wi_dr
.
v
;
/* compute the signal velocity (this is always symmetrical). */
vi_sig
.
v
=
vec_fmax
(
vi_sig
.
v
,
cj
.
v
-
3
.
0
f
*
dvdr
.
v
);
vj_sig
.
v
=
vec_fmax
(
vj_sig
.
v
,
ci
.
v
-
3
.
0
f
*
dvdr
.
v
);
/* Store the forces back on the particles. */
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
u_dt
+=
piu_dt
.
f
[
k
];
pi
[
k
]
->
h_dt
-=
pih_dt
.
f
[
k
];
pi
[
k
]
->
v_sig
=
vi_sig
.
f
[
k
];
pj
[
k
]
->
v_sig
=
vj_sig
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
pi
[
k
]
->
a
[
j
]
-=
pia
[
j
].
f
[
k
];
}
...
...
src/vector.h
View file @
d86fc798
...
...
@@ -27,7 +27,7 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */
#ifdef
NO
__AVX__
#ifdef __AVX__
#define VECTORIZE
#define VEC_SIZE 8
#define VEC_FLOAT __m256
...
...
@@ -39,7 +39,8 @@
#define vec_rsqrt(a) _mm256_rsqrt_ps(a)
#define vec_ftoi(a) _mm256_cvttps_epi32(a)
#define vec_fmin(a,b) _mm256_min_ps(a,b)
#elif defined( NO__SSE2__ )
#define vec_fmax(a,b) _mm256_max_ps(a,b)
#elif defined( __SSE2__ )
#define VECTORIZE
#define VEC_SIZE 4
#define VEC_FLOAT __m128
...
...
@@ -51,6 +52,7 @@
#define vec_rsqrt(a) _mm_rsqrt_ps(a)
#define vec_ftoi(a) _mm_cvttps_epi32(a)
#define vec_fmin(a,b) _mm_min_ps(a,b)
#define vec_fmax(a,b) _mm_max_ps(a,b)
#else
#define VEC_SIZE 4
#endif
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment