Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
bebc572a
Commit
bebc572a
authored
Oct 24, 2017
by
James Willis
Browse files
Created an inline function to perform vector loads.
parent
ea3ac791
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/hydro/Gadget2/hydro_iact.h
View file @
bebc572a
...
...
@@ -170,17 +170,15 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz,
mask_t
mask
)
{
vector
r
,
ri
,
ui
,
wi
,
wi_dx
;
vector
mj
;
vector
dvx
,
dvy
,
dvz
;
vector
vjx
,
vjy
,
vjz
;
vector
dvdr
;
vector
curlvrx
,
curlvry
,
curlvrz
;
/* Fill the vectors. */
mj
.
v
=
vec_load
(
Mj
);
vjx
.
v
=
vec_load
(
Vjx
);
vjy
.
v
=
vec_load
(
Vjy
);
vjz
.
v
=
vec_load
(
Vjz
);
const
vector
mj
=
vec
tor
_load
(
Mj
);
const
vector
vjx
=
vec
tor
_load
(
Vjx
);
const
vector
vjy
=
vec
tor
_load
(
Vjy
);
const
vector
vjz
=
vec
tor
_load
(
Vjz
);
/* Get the radius and inverse radius. */
ri
=
vec_reciprocal_sqrt
(
*
r2
);
...
...
@@ -245,38 +243,34 @@ runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz,
vector
*
curlvySum
,
vector
*
curlvzSum
,
mask_t
mask
,
mask_t
mask2
,
short
mask_cond
)
{
vector
r
,
ri
,
r2
,
ui
,
wi
,
wi_dx
;
vector
mj
;
vector
dx
,
dy
,
dz
,
dvx
,
dvy
,
dvz
;
vector
vjx
,
vjy
,
vjz
;
vector
r
,
ri
,
ui
,
wi
,
wi_dx
;
vector
dvx
,
dvy
,
dvz
;
vector
dvdr
;
vector
curlvrx
,
curlvry
,
curlvrz
;
vector
r_2
,
ri2
,
r2_2
,
ui2
,
wi2
,
wi_dx2
;
vector
mj2
;
vector
dx2
,
dy2
,
dz2
,
dvx2
,
dvy2
,
dvz2
;
vector
vjx2
,
vjy2
,
vjz2
;
vector
r_2
,
ri2
,
ui2
,
wi2
,
wi_dx2
;
vector
dvx2
,
dvy2
,
dvz2
;
vector
dvdr2
;
vector
curlvrx2
,
curlvry2
,
curlvrz2
;
/* Fill the vectors. */
mj
.
v
=
vec_load
(
Mj
);
mj2
.
v
=
vec_load
(
&
Mj
[
VEC_SIZE
]);
vjx
.
v
=
vec_load
(
Vjx
);
vjx2
.
v
=
vec_load
(
&
Vjx
[
VEC_SIZE
]);
vjy
.
v
=
vec_load
(
Vjy
);
vjy2
.
v
=
vec_load
(
&
Vjy
[
VEC_SIZE
]);
vjz
.
v
=
vec_load
(
Vjz
);
vjz2
.
v
=
vec_load
(
&
Vjz
[
VEC_SIZE
]);
dx
.
v
=
vec_load
(
Dx
);
dx2
.
v
=
vec_load
(
&
Dx
[
VEC_SIZE
]);
dy
.
v
=
vec_load
(
Dy
);
dy2
.
v
=
vec_load
(
&
Dy
[
VEC_SIZE
]);
dz
.
v
=
vec_load
(
Dz
);
dz2
.
v
=
vec_load
(
&
Dz
[
VEC_SIZE
]);
const
vector
mj
=
vec
tor
_load
(
Mj
);
const
vector
mj2
=
vec
tor
_load
(
&
Mj
[
VEC_SIZE
]);
const
vector
vjx
=
vec
tor
_load
(
Vjx
);
const
vector
vjx2
=
vec
tor
_load
(
&
Vjx
[
VEC_SIZE
]);
const
vector
vjy
=
vec
tor
_load
(
Vjy
);
const
vector
vjy2
=
vec
tor
_load
(
&
Vjy
[
VEC_SIZE
]);
const
vector
vjz
=
vec
tor
_load
(
Vjz
);
const
vector
vjz2
=
vec
tor
_load
(
&
Vjz
[
VEC_SIZE
]);
const
vector
dx
=
vec
tor
_load
(
Dx
);
const
vector
dx2
=
vec
tor
_load
(
&
Dx
[
VEC_SIZE
]);
const
vector
dy
=
vec
tor
_load
(
Dy
);
const
vector
dy2
=
vec
tor
_load
(
&
Dy
[
VEC_SIZE
]);
const
vector
dz
=
vec
tor
_load
(
Dz
);
const
vector
dz2
=
vec
tor
_load
(
&
Dz
[
VEC_SIZE
]);
/* Get the radius and inverse radius. */
r2
.
v
=
vec_load
(
R2
);
r2_2
.
v
=
vec_load
(
&
R2
[
VEC_SIZE
]);
const
vector
r2
=
vec
tor
_load
(
R2
);
const
vector
r2_2
=
vec
tor
_load
(
&
R2
[
VEC_SIZE
]);
ri
=
vec_reciprocal_sqrt
(
r2
);
ri2
=
vec_reciprocal_sqrt
(
r2_2
);
r
.
v
=
vec_mul
(
r2
.
v
,
ri
.
v
);
...
...
@@ -592,8 +586,7 @@ runner_iact_nonsym_1_vec_force(
#ifdef WITH_VECTORIZATION
vector
r
,
ri
;
vector
vjx
,
vjy
,
vjz
,
dvx
,
dvy
,
dvz
;
vector
pjrho
,
grad_hj
,
pjPOrho2
,
balsara_j
,
cj
,
mj
;
vector
dvx
,
dvy
,
dvz
;
vector
xi
,
xj
;
vector
hid_inv
,
hjd_inv
;
vector
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
...
...
@@ -604,16 +597,15 @@ runner_iact_nonsym_1_vec_force(
vector
rho_ij
,
visc
,
visc_term
,
sph_term
,
acc
,
entropy_dt
;
/* Fill vectors. */
vjx
.
v
=
vec_load
(
Vjx
);
vjy
.
v
=
vec_load
(
Vjy
);
vjz
.
v
=
vec_load
(
Vjz
);
mj
.
v
=
vec_load
(
Mj
);
pjrho
.
v
=
vec_load
(
Pjrho
);
grad_hj
.
v
=
vec_load
(
Grad_hj
);
pjPOrho2
.
v
=
vec_load
(
PjPOrho2
);
balsara_j
.
v
=
vec_load
(
Balsara_j
);
cj
.
v
=
vec_load
(
Cj
);
const
vector
vjx
=
vector_load
(
Vjx
);
const
vector
vjy
=
vector_load
(
Vjy
);
const
vector
vjz
=
vector_load
(
Vjz
);
const
vector
mj
=
vector_load
(
Mj
);
const
vector
pjrho
=
vector_load
(
Pjrho
);
const
vector
grad_hj
=
vector_load
(
Grad_hj
);
const
vector
pjPOrho2
=
vector_load
(
PjPOrho2
);
const
vector
balsara_j
=
vector_load
(
Balsara_j
);
const
vector
cj
=
vector_load
(
Cj
);
fac_mu
.
v
=
vec_set1
(
1
.
f
);
/* Will change with cosmological integration */
...
...
@@ -720,10 +712,8 @@ runner_iact_nonsym_2_vec_force(
#ifdef WITH_VECTORIZATION
vector
r
,
r2
,
ri
;
vector
dx
,
dy
,
dz
,
dvx
,
dvy
,
dvz
;
vector
vjx
,
vjy
,
vjz
;
vector
pjrho
,
grad_hj
,
pjPOrho2
,
balsara_j
,
cj
,
mj
,
hj_inv
;
vector
r
,
ri
;
vector
dvx
,
dvy
,
dvz
;
vector
ui
,
uj
;
vector
hid_inv
,
hjd_inv
;
vector
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
...
...
@@ -733,10 +723,8 @@ runner_iact_nonsym_2_vec_force(
vector
omega_ij
,
mu_ij
,
fac_mu
,
balsara
;
vector
rho_ij
,
visc
,
visc_term
,
sph_term
,
acc
,
entropy_dt
;
vector
r_2
,
r2_2
,
ri_2
;
vector
dx_2
,
dy_2
,
dz_2
,
dvx_2
,
dvy_2
,
dvz_2
;
vector
vjx_2
,
vjy_2
,
vjz_2
;
vector
pjrho_2
,
grad_hj_2
,
pjPOrho2_2
,
balsara_j_2
,
cj_2
,
mj_2
,
hj_inv_2
;
vector
r_2
,
ri_2
;
vector
dvx_2
,
dvy_2
,
dvz_2
;
vector
ui_2
,
uj_2
;
vector
hjd_inv_2
;
vector
wi_dx_2
,
wj_dx_2
,
wi_dr_2
,
wj_dr_2
,
dvdr_2
;
...
...
@@ -747,42 +735,42 @@ runner_iact_nonsym_2_vec_force(
vector
rho_ij_2
,
visc_2
,
visc_term_2
,
sph_term_2
,
acc_2
,
entropy_dt_2
;
/* Fill vectors. */
mj
.
v
=
vec_load
(
Mj
);
mj_2
.
v
=
vec_load
(
&
Mj
[
VEC_SIZE
]);
vjx
.
v
=
vec_load
(
Vjx
);
vjx_2
.
v
=
vec_load
(
&
Vjx
[
VEC_SIZE
]);
vjy
.
v
=
vec_load
(
Vjy
);
vjy_2
.
v
=
vec_load
(
&
Vjy
[
VEC_SIZE
]);
vjz
.
v
=
vec_load
(
Vjz
);
vjz_2
.
v
=
vec_load
(
&
Vjz
[
VEC_SIZE
]);
dx
.
v
=
vec_load
(
Dx
);
dx_2
.
v
=
vec_load
(
&
Dx
[
VEC_SIZE
]);
dy
.
v
=
vec_load
(
Dy
);
dy_2
.
v
=
vec_load
(
&
Dy
[
VEC_SIZE
]);
dz
.
v
=
vec_load
(
Dz
);
dz_2
.
v
=
vec_load
(
&
Dz
[
VEC_SIZE
]);
const
vector
mj
=
vec
tor
_load
(
Mj
);
const
vector
mj_2
=
vec
tor
_load
(
&
Mj
[
VEC_SIZE
]);
const
vector
vjx
=
vec
tor
_load
(
Vjx
);
const
vector
vjx_2
=
vec
tor
_load
(
&
Vjx
[
VEC_SIZE
]);
const
vector
vjy
=
vec
tor
_load
(
Vjy
);
const
vector
vjy_2
=
vec
tor
_load
(
&
Vjy
[
VEC_SIZE
]);
const
vector
vjz
=
vec
tor
_load
(
Vjz
);
const
vector
vjz_2
=
vec
tor
_load
(
&
Vjz
[
VEC_SIZE
]);
const
vector
dx
=
vec
tor
_load
(
Dx
);
const
vector
dx_2
=
vec
tor
_load
(
&
Dx
[
VEC_SIZE
]);
const
vector
dy
=
vec
tor
_load
(
Dy
);
const
vector
dy_2
=
vec
tor
_load
(
&
Dy
[
VEC_SIZE
]);
const
vector
dz
=
vec
tor
_load
(
Dz
);
const
vector
dz_2
=
vec
tor
_load
(
&
Dz
[
VEC_SIZE
]);
/* Get the radius and inverse radius. */
r2
.
v
=
vec_load
(
R2
);
r2_2
.
v
=
vec_load
(
&
R2
[
VEC_SIZE
]);
const
vector
r2
=
vec
tor
_load
(
R2
);
const
vector
r2_2
=
vec
tor
_load
(
&
R2
[
VEC_SIZE
]);
ri
=
vec_reciprocal_sqrt
(
r2
);
ri_2
=
vec_reciprocal_sqrt
(
r2_2
);
r
.
v
=
vec_mul
(
r2
.
v
,
ri
.
v
);
r_2
.
v
=
vec_mul
(
r2_2
.
v
,
ri_2
.
v
);
/* Get remaining properties. */
pjrho
.
v
=
vec_load
(
Pjrho
);
pjrho_2
.
v
=
vec_load
(
&
Pjrho
[
VEC_SIZE
]);
grad_hj
.
v
=
vec_load
(
Grad_hj
);
grad_hj_2
.
v
=
vec_load
(
&
Grad_hj
[
VEC_SIZE
]);
pjPOrho2
.
v
=
vec_load
(
PjPOrho2
);
pjPOrho2_2
.
v
=
vec_load
(
&
PjPOrho2
[
VEC_SIZE
]);
balsara_j
.
v
=
vec_load
(
Balsara_j
);
balsara_j_2
.
v
=
vec_load
(
&
Balsara_j
[
VEC_SIZE
]);
c
j
.
v
=
vec_load
(
Cj
);
cj_2
.
v
=
vec_load
(
&
Cj
[
VEC_SIZE
]);
hj_inv
.
v
=
vec_load
(
Hj_inv
);
hj_inv_2
.
v
=
vec_load
(
&
Hj_inv
[
VEC_SIZE
]);
const
vector
pjrho
=
vec
tor
_load
(
Pjrho
);
const
vector
pjrho_2
=
vec
tor
_load
(
&
Pjrho
[
VEC_SIZE
]);
const
vector
grad_hj
=
vec
tor
_load
(
Grad_hj
);
const
vector
grad_hj_2
=
vec
tor
_load
(
&
Grad_hj
[
VEC_SIZE
]);
const
vector
pjPOrho2
=
vec
tor
_load
(
PjPOrho2
);
const
vector
pjPOrho2_2
=
vec
tor
_load
(
&
PjPOrho2
[
VEC_SIZE
]);
const
vector
balsara_j
=
vec
tor
_load
(
Balsara_j
);
const
vector
balsara_j_2
=
vec
tor
_load
(
&
Balsara_j
[
VEC_SIZE
]);
c
onst
vector
cj
=
vec
tor
_load
(
Cj
);
const
vector
cj_2
=
vec
tor
_load
(
&
Cj
[
VEC_SIZE
]);
const
vector
hj_inv
=
vec
tor
_load
(
Hj_inv
);
const
vector
hj_inv_2
=
vec
tor
_load
(
&
Hj_inv
[
VEC_SIZE
]);
fac_mu
.
v
=
vec_set1
(
1
.
f
);
/* Will change with cosmological integration */
...
...
src/runner_doiact_vec.c
View file @
bebc572a
...
...
@@ -615,21 +615,18 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
}
}
vector
v_pjx
,
v_pjy
,
v_pjz
;
vector
v_pjx2
,
v_pjy2
,
v_pjz2
;
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
for
(
int
pjd
=
0
;
pjd
<
count_align
;
pjd
+=
(
num_vec_proc
*
VEC_SIZE
))
{
/* Load 2 sets of vectors from the particle cache. */
v_pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
v_pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
v_pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
const
vector
v_pjx
=
vec
tor
_load
(
&
cell_cache
->
x
[
pjd
]);
const
vector
v_pjy
=
vec
tor
_load
(
&
cell_cache
->
y
[
pjd
]);
const
vector
v_pjz
=
vec
tor
_load
(
&
cell_cache
->
z
[
pjd
]);
v_pjx2
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
+
VEC_SIZE
]);
v_pjy2
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
+
VEC_SIZE
]);
v_pjz2
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
+
VEC_SIZE
]);
const
vector
v_pjx2
=
vec
tor
_load
(
&
cell_cache
->
x
[
pjd
+
VEC_SIZE
]);
const
vector
v_pjy2
=
vec
tor
_load
(
&
cell_cache
->
y
[
pjd
+
VEC_SIZE
]);
const
vector
v_pjz2
=
vec
tor
_load
(
&
cell_cache
->
z
[
pjd
+
VEC_SIZE
]);
/* Compute the pairwise distance. */
vector
v_dx
,
v_dy
,
v_dz
;
...
...
@@ -848,11 +845,11 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
for
(
int
pjd
=
0
;
pjd
<
count_align
;
pjd
+=
(
num_vec_proc
*
VEC_SIZE
))
{
/* Load 1 set of vectors from the particle cache. */
vector
v_pjx
,
v_pjy
,
v_pjz
,
hj
,
hjg2
;
v_pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
v_pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
v_pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
hj
.
v
=
vec_load
(
&
cell_cache
->
h
[
pjd
]);
vector
hjg2
;
const
vector
v_pjx
=
vec
tor
_load
(
&
cell_cache
->
x
[
pjd
]);
const
vector
v_pjy
=
vec
tor
_load
(
&
cell_cache
->
y
[
pjd
]);
const
vector
v_pjz
=
vec
tor
_load
(
&
cell_cache
->
z
[
pjd
]);
const
vector
hj
=
vec
tor
_load
(
&
cell_cache
->
h
[
pjd
]);
hjg2
.
v
=
vec_mul
(
vec_mul
(
hj
.
v
,
hj
.
v
),
kernel_gamma2_vec
.
v
);
/* Compute the pairwise distance. */
...
...
@@ -1094,7 +1091,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
/* Get the cache index to the jth particle. */
const
int
cj_cache_idx
=
pjd
;
vector
v_pjx
,
v_pjy
,
v_pjz
;
vector
v_dx
,
v_dy
,
v_dz
,
v_r2
;
#ifdef SWIFT_DEBUG_CHECKS
...
...
@@ -1106,9 +1102,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
#endif
/* Load 2 sets of vectors from the particle cache. */
v_pjx
.
v
=
vec_load
(
&
cj_cache
->
x
[
cj_cache_idx
]);
v_pjy
.
v
=
vec_load
(
&
cj_cache
->
y
[
cj_cache_idx
]);
v_pjz
.
v
=
vec_load
(
&
cj_cache
->
z
[
cj_cache_idx
]);
const
vector
v_pjx
=
vec
tor
_load
(
&
cj_cache
->
x
[
cj_cache_idx
]);
const
vector
v_pjy
=
vec
tor
_load
(
&
cj_cache
->
y
[
cj_cache_idx
]);
const
vector
v_pjz
=
vec
tor
_load
(
&
cj_cache
->
z
[
cj_cache_idx
]);
/* Compute the pairwise distance. */
v_dx
.
v
=
vec_sub
(
v_pix
.
v
,
v_pjx
.
v
);
...
...
@@ -1224,13 +1220,12 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
}
#endif
vector
v_pix
,
v_piy
,
v_piz
;
vector
v_dx
,
v_dy
,
v_dz
,
v_r2
;
/* Load 2 sets of vectors from the particle cache. */
v_pix
.
v
=
vec_load
(
&
ci_cache
->
x
[
ci_cache_idx
]);
v_piy
.
v
=
vec_load
(
&
ci_cache
->
y
[
ci_cache_idx
]);
v_piz
.
v
=
vec_load
(
&
ci_cache
->
z
[
ci_cache_idx
]);
const
vector
v_pix
=
vec
tor
_load
(
&
ci_cache
->
x
[
ci_cache_idx
]);
const
vector
v_piy
=
vec
tor
_load
(
&
ci_cache
->
y
[
ci_cache_idx
]);
const
vector
v_piz
=
vec
tor
_load
(
&
ci_cache
->
z
[
ci_cache_idx
]);
/* Compute the pairwise distance. */
v_dx
.
v
=
vec_sub
(
v_pjx
.
v
,
v_pix
.
v
);
...
...
@@ -1469,8 +1464,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Get the cache index to the jth particle. */
const
int
cj_cache_idx
=
pjd
;
vector
v_dx
,
v_dy
,
v_dz
;
vector
v_
pjx
,
v_pjy
,
v_pjz
,
v_hj
,
v_hjg2
,
v_r
2
;
vector
v_dx
,
v_dy
,
v_dz
,
v_r2
;
vector
v_
hjg
2
;
#ifdef SWIFT_DEBUG_CHECKS
if
(
cj_cache_idx
%
VEC_SIZE
!=
0
||
cj_cache_idx
<
0
||
...
...
@@ -1481,10 +1476,10 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
#endif
/* Load 2 sets of vectors from the particle cache. */
v_pjx
.
v
=
vec_load
(
&
cj_cache
->
x
[
cj_cache_idx
]);
v_pjy
.
v
=
vec_load
(
&
cj_cache
->
y
[
cj_cache_idx
]);
v_pjz
.
v
=
vec_load
(
&
cj_cache
->
z
[
cj_cache_idx
]);
v_hj
.
v
=
vec_load
(
&
cj_cache
->
h
[
cj_cache_idx
]);
const
vector
v_pjx
=
vec
tor
_load
(
&
cj_cache
->
x
[
cj_cache_idx
]);
const
vector
v_pjy
=
vec
tor
_load
(
&
cj_cache
->
y
[
cj_cache_idx
]);
const
vector
v_pjz
=
vec
tor
_load
(
&
cj_cache
->
z
[
cj_cache_idx
]);
const
vector
v_hj
=
vec
tor
_load
(
&
cj_cache
->
h
[
cj_cache_idx
]);
v_hjg2
.
v
=
vec_mul
(
vec_mul
(
v_hj
.
v
,
v_hj
.
v
),
kernel_gamma2_vec
.
v
);
/* Compute the pairwise distance. */
...
...
@@ -1610,14 +1605,14 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
}
#endif
vector
v_pix
,
v_piy
,
v_piz
,
v_hi
,
v_hig2
;
vector
v_hig2
;
vector
v_dx
,
v_dy
,
v_dz
,
v_r2
;
/* Load 2 sets of vectors from the particle cache. */
v_pix
.
v
=
vec_load
(
&
ci_cache
->
x
[
ci_cache_idx
]);
v_piy
.
v
=
vec_load
(
&
ci_cache
->
y
[
ci_cache_idx
]);
v_piz
.
v
=
vec_load
(
&
ci_cache
->
z
[
ci_cache_idx
]);
v_hi
.
v
=
vec_load
(
&
ci_cache
->
h
[
ci_cache_idx
]);
const
vector
v_pix
=
vec
tor
_load
(
&
ci_cache
->
x
[
ci_cache_idx
]);
const
vector
v_piy
=
vec
tor
_load
(
&
ci_cache
->
y
[
ci_cache_idx
]);
const
vector
v_piz
=
vec
tor
_load
(
&
ci_cache
->
z
[
ci_cache_idx
]);
const
vector
v_hi
=
vec
tor
_load
(
&
ci_cache
->
h
[
ci_cache_idx
]);
v_hig2
.
v
=
vec_mul
(
vec_mul
(
v_hi
.
v
,
v_hi
.
v
),
kernel_gamma2_vec
.
v
);
/* Compute the pairwise distance. */
...
...
src/vector.h
View file @
bebc572a
...
...
@@ -431,6 +431,21 @@ __attribute__((always_inline)) INLINE vector vec_reciprocal_sqrt(vector x) {
return
x_inv
;
}
/**
* @brief Loads a vector from memory.
*
* @param *x memory to load from.
* @return temp loaded #vector.
*/
__attribute__
((
always_inline
))
INLINE
vector
vector_load
(
float
*
const
x
)
{
vector
temp
;
temp
.
v
=
vec_load
(
x
);
return
temp
;
}
#else
/* Needed for cache alignment. */
#define VEC_SIZE 8
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment