Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
072b0d3f
Commit
072b0d3f
authored
Mar 18, 2013
by
Pedro Gonnet
Browse files
tighten up code for integrator kick.
Former-commit-id: 52ee3b0259133472f476542be1b3461d1c9a9e12
parent
58a9668e
Changes
5
Hide whitespace changes
Inline
Side-by-side
src/debug.c
View file @
072b0d3f
...
...
@@ -40,27 +40,28 @@ void printParticle ( struct part *parts , long long int id, int N ) {
/* Look for the particle. */
for
(
i
=
0
;
i
<
N
&&
parts
[
i
].
id
!=
id
;
i
++
);
if
(
i
<
N
)
printf
(
"## Particle[%d]: id=%lld, x=[%
.3
e,%
.3
e,%
.3
e], v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], h=%.3e, h_dt=%.3e, wcount=%.3e, m=%.3e, rho=%.3e, rho_dh=%.3e, div_v=%.3e, u=%.3e, dudt=%.3e, bals=%.3e, POrho2=%.3e, v_sig=%.3e, dt=%.3e
\n
"
,
i
,
parts
[
i
].
id
,
parts
[
i
].
x
[
0
],
parts
[
i
].
x
[
1
],
parts
[
i
].
x
[
2
],
parts
[
i
].
v
[
0
],
parts
[
i
].
v
[
1
],
parts
[
i
].
v
[
2
],
parts
[
i
].
a
[
0
],
parts
[
i
].
a
[
1
],
parts
[
i
].
a
[
2
],
parts
[
i
].
h
,
parts
[
i
].
force
.
h_dt
,
parts
[
i
].
wcount
,
parts
[
i
].
mass
,
parts
[
i
].
rho
,
parts
[
i
].
rho_dh
,
parts
[
i
].
density
.
div_v
,
parts
[
i
].
u
,
parts
[
i
].
force
.
u_dt
,
parts
[
i
].
force
.
balsara
,
parts
[
i
].
force
.
POrho2
,
parts
[
i
].
force
.
v_sig
,
parts
[
i
].
dt
);
if
(
i
<
N
)
printf
(
"## Particle[%d]: id=%lld, x=[%e,%e,%e], v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], h=%.3e, h_dt=%.3e, wcount=%.3e, m=%.3e, rho=%.3e, rho_dh=%.3e, div_v=%.3e, u=%.3e, dudt=%.3e, bals=%.3e, POrho2=%.3e, v_sig=%.3e, dt=%.3e
\n
"
,
i
,
parts
[
i
].
id
,
parts
[
i
].
x
[
0
],
parts
[
i
].
x
[
1
],
parts
[
i
].
x
[
2
],
parts
[
i
].
v
[
0
],
parts
[
i
].
v
[
1
],
parts
[
i
].
v
[
2
],
parts
[
i
].
a
[
0
],
parts
[
i
].
a
[
1
],
parts
[
i
].
a
[
2
],
parts
[
i
].
h
,
parts
[
i
].
force
.
h_dt
,
parts
[
i
].
wcount
,
parts
[
i
].
mass
,
parts
[
i
].
rho
,
parts
[
i
].
rho_dh
,
parts
[
i
].
density
.
div_v
,
parts
[
i
].
u
,
parts
[
i
].
force
.
u_dt
,
parts
[
i
].
force
.
balsara
,
parts
[
i
].
force
.
POrho2
,
parts
[
i
].
force
.
v_sig
,
parts
[
i
].
dt
);
else
printf
(
"## Particles[???] id=%lld not found
\n
"
,
id
);
}
printf
(
"## Particles[???] id=%lld not found
\n
"
,
id
);
}
src/engine.c
View file @
072b0d3f
...
...
@@ -163,8 +163,9 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
int
j
,
k
;
struct
engine
*
e
=
(
struct
engine
*
)
data
;
float
dt_step
=
e
->
dt_step
,
dt
=
e
->
dt
,
hdt
=
0
.
5
f
*
dt
;
float
dt_min
,
dt_max
,
h_max
,
dx_max
;
float
pdt
,
dt_step
=
e
->
dt_step
,
dt
=
e
->
dt
,
hdt
=
0
.
5
f
*
dt
;
float
dt_min
,
dt_max
,
h_max
,
dx_max
,
a
[
3
],
v
[
3
],
u
,
u_dt
,
h
,
h_dt
,
rho
,
v_old
[
3
];
double
x
[
3
],
x_old
[
3
];
struct
part
*
restrict
p
;
struct
xpart
*
restrict
xp
;
struct
cpart
*
restrict
cp
;
...
...
@@ -186,47 +187,59 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
xp
=
p
->
xtras
;
cp
=
&
c
->
cparts
[
k
];
/* Load the data locally. */
a
[
0
]
=
p
->
a
[
0
];
a
[
1
]
=
p
->
a
[
1
];
a
[
2
]
=
p
->
a
[
2
];
v
[
0
]
=
p
->
v
[
0
];
v
[
1
]
=
p
->
v
[
1
];
v
[
2
]
=
p
->
v
[
2
];
x
[
0
]
=
p
->
x
[
0
];
x
[
1
]
=
p
->
x
[
1
];
x
[
2
]
=
p
->
x
[
2
];
x_old
[
0
]
=
xp
->
x_old
[
0
];
x_old
[
1
]
=
xp
->
x_old
[
1
];
x_old
[
2
]
=
xp
->
x_old
[
2
];
h
=
p
->
h
;
u
=
p
->
u
;
h_dt
=
p
->
force
.
h_dt
;
u_dt
=
p
->
force
.
u_dt
;
pdt
=
p
->
dt
;
/* Store the min/max dt. */
dt_min
=
fminf
(
dt_min
,
p
->
dt
);
dt_max
=
fmaxf
(
dt_max
,
p
->
dt
);
dt_min
=
fminf
(
dt_min
,
pdt
);
dt_max
=
fmaxf
(
dt_max
,
pdt
);
/* Step and store the velocity and internal energy. */
xp
->
v_old
[
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
xp
->
v_old
[
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
xp
->
v_old
[
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
xp
->
v_old
[
0
]
=
v_old
[
0
]
=
v
[
0
]
+
hdt
*
a
[
0
];
xp
->
v_old
[
1
]
=
v_old
[
1
]
=
v
[
1
]
+
hdt
*
a
[
1
];
xp
->
v_old
[
2
]
=
v_old
[
2
]
=
v
[
2
]
+
hdt
*
a
[
2
];
xp
->
u_old
=
p
->
u
+
hdt
*
p
->
force
.
u_dt
;
/* Move the particles with the velocitie at the half-step. */
p
->
x
[
0
]
+=
dt
*
xp
->
v_old
[
0
];
p
->
x
[
1
]
+=
dt
*
xp
->
v_old
[
1
];
p
->
x
[
2
]
+=
dt
*
xp
->
v_old
[
2
];
dx_max
=
fmaxf
(
dx_max
,
sqrtf
(
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
)
*
2
+
p
->
h
);
p
->
x
[
0
]
=
x
[
0
]
+=
dt
*
v_old
[
0
];
p
->
x
[
1
]
=
x
[
1
]
+=
dt
*
v_old
[
1
];
p
->
x
[
2
]
=
x
[
2
]
+=
dt
*
v_old
[
2
];
dx_max
=
fmaxf
(
dx_max
,
sqrtf
(
(
x
[
0
]
-
x_old
[
0
])
*
(
x
[
0
]
-
x_old
[
0
])
+
(
x
[
1
]
-
x_old
[
1
])
*
(
x
[
1
]
-
x_old
[
1
])
+
(
x
[
2
]
-
x_old
[
2
])
*
(
x
[
2
]
-
x_old
[
2
])
)
*
2
+
h
);
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
+=
dt
*
p
->
a
[
0
];
p
->
v
[
1
]
+=
dt
*
p
->
a
[
1
];
p
->
v
[
2
]
+=
dt
*
p
->
a
[
2
];
p
->
u
*
=
expf
(
p
->
force
.
u_dt
/
p
->
u
*
dt
)
;
p
->
h
*
=
expf
(
p
->
force
.
h_dt
/
p
->
h
*
dt
)
;
h_max
=
fmaxf
(
h_max
,
p
->
h
);
p
->
v
[
0
]
=
v
[
0
]
+=
dt
*
a
[
0
];
p
->
v
[
1
]
=
v
[
1
]
+=
dt
*
a
[
1
];
p
->
v
[
2
]
=
v
[
2
]
+=
dt
*
a
[
2
];
p
->
u
=
u
+=
u_dt
*
dt
;
p
->
h
=
h
+=
h_dt
*
dt
;
h_max
=
fmaxf
(
h_max
,
h
);
/* Integrate other values if this particle will not be updated. */
if
(
p
->
dt
>
dt_step
)
{
p
->
rho
*=
expf
(
-
3
.
0
f
*
p
->
force
.
h_dt
/
p
->
h
*
dt
);
p
->
force
.
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
p
->
h
*
p
->
rho_dh
/
3
.
0
f
);
}
/* Fill the cpart. */
cp
->
x
[
0
]
=
p
->
x
[
0
];
cp
->
x
[
1
]
=
p
->
x
[
1
];
cp
->
x
[
2
]
=
p
->
x
[
2
];
cp
->
h
=
p
->
h
;
cp
->
dt
=
p
->
dt
;
cp
->
x
[
0
]
=
x
[
0
];
cp
->
x
[
1
]
=
x
[
1
];
cp
->
x
[
2
]
=
x
[
2
];
cp
->
h
=
h
;
cp
->
dt
=
pdt
;
/* Integrate other values if this particle will not be updated. */
/* Init fields for density calculation. */
if
(
p
->
dt
<=
dt_step
)
{
if
(
pdt
>
dt_step
)
{
rho
=
p
->
rho
*=
expf
(
-
3
.
0
f
*
h_dt
/
h
*
dt
);
// rho = p->rho += cbrt( h_dt ) * dt;
p
->
force
.
POrho2
=
u
*
(
const_gamma
-
1
.
0
f
)
/
(
rho
+
h
*
p
->
rho_dh
/
3
.
0
f
);
}
else
{
p
->
wcount
=
0
.
0
f
;
p
->
density
.
wcount_dh
=
0
.
0
f
;
p
->
rho
=
0
.
0
f
;
...
...
@@ -276,7 +289,11 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
}
void
engine_single2
(
double
*
dim
,
long
long
int
pid
,
struct
part
*
__restrict__
parts
,
int
N
,
int
periodic
)
{
/**
* @brief Compute the force on a single particle brute-force.
*/
void
engine_single
(
double
*
dim
,
long
long
int
pid
,
struct
part
*
__restrict__
parts
,
int
N
,
int
periodic
)
{
int
i
,
k
;
double
r2
,
dx
[
3
];
...
...
@@ -342,9 +359,11 @@ void engine_step ( struct engine *e , int sort_queues ) {
struct
part
*
restrict
parts
=
e
->
s
->
parts
,
*
restrict
p
;
struct
xpart
*
restrict
xp
;
float
dt
=
e
->
dt
,
hdt
=
0
.
5
*
dt
,
dt_step
,
dt_max
,
dt_min
,
ldt_min
,
ldt_max
;
float
pdt
,
h
,
m
,
v
[
3
],
u
;
double
x
[
3
];
double
epot
=
0
.
0
,
ekin
=
0
.
0
,
lepot
,
lekin
,
lmom
[
3
],
mom
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
double
lang
[
3
],
ang
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
double
lent
,
ent
=
0
.
0
;
//
double lent, ent = 0.0;
int
threadID
,
nthreads
,
count
=
0
,
lcount
;
/* Get the maximum dt. */
...
...
@@ -400,62 +419,68 @@ void engine_step ( struct engine *e , int sort_queues ) {
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// engine_single
2
( e->s->dim , 432626 , e->s->parts , e->s->nr_parts , e->s->periodic );
// engine_single( e->s->dim , 432626 , e->s->parts , e->s->nr_parts , e->s->periodic );
// printParticle( parts , 432626 );
// printParticle( parts , 432628 );
/* Second kick. */
TIMER_TIC_ND
dt_min
=
FLT_MAX
;
dt_max
=
0
.
0
f
;
#pragma omp parallel private(p,xp,k,ldt_min,lcount,ldt_max,lmom,lang,
lent,
lekin,lepot,threadID,nthreads)
#pragma omp parallel private(p,xp,k,ldt_min,lcount,ldt_max,lmom,lang,lekin,lepot,threadID,nthreads
,pdt,v,h,m,x,u
)
{
threadID
=
omp_get_thread_num
();
nthreads
=
omp_get_num_threads
();
ldt_min
=
FLT_MAX
;
ldt_max
=
0
.
0
f
;
lmom
[
0
]
=
0
.
0
;
lmom
[
1
]
=
0
.
0
;
lmom
[
2
]
=
0
.
0
;
lang
[
0
]
=
0
.
0
;
lang
[
1
]
=
0
.
0
;
lang
[
2
]
=
0
.
0
;
lekin
=
0
.
0
;
lepot
=
0
.
0
;
lent
=
0
.
0
;
lcount
=
0
;
lekin
=
0
.
0
;
lepot
=
0
.
0
;
/*
lent=0.0;
*/
lcount
=
0
;
for
(
k
=
nr_parts
*
threadID
/
nthreads
;
k
<
nr_parts
*
(
threadID
+
1
)
/
nthreads
;
k
++
)
{
/* Get a handle on the part. */
p
=
&
parts
[
k
];
xp
=
p
->
xtras
;
/* Get local copies of particle data. */
pdt
=
p
->
dt
;
h
=
p
->
h
;
m
=
p
->
mass
;
x
[
0
]
=
p
->
x
[
0
];
x
[
1
]
=
p
->
x
[
1
];
x
[
2
]
=
p
->
x
[
2
];
/* Scale the derivatives if they're freshly computed. */
if
(
p
->
dt
<=
dt_step
)
{
p
->
force
.
h_dt
*=
p
->
h
*
0
.
333333333
f
;
if
(
pdt
<=
dt_step
)
{
p
->
force
.
h_dt
*=
h
*
0
.
333333333
f
;
lcount
+=
1
;
}
/* Update the particle's time step. */
p
->
dt
=
const_cfl
*
p
->
h
/
(
p
->
force
.
v_sig
);
p
->
dt
=
pdt
=
const_cfl
*
h
/
(
p
->
force
.
v_sig
);
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
=
xp
->
v_old
[
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
1
]
=
xp
->
v_old
[
1
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
2
]
=
xp
->
v_old
[
2
]
+
hdt
*
p
->
a
[
2
];
p
->
u
=
xp
->
u_old
+
hdt
*
p
->
force
.
u_dt
;
p
->
v
[
0
]
=
v
[
0
]
=
xp
->
v_old
[
0
]
+
hdt
*
p
->
a
[
0
];
p
->
v
[
1
]
=
v
[
1
]
=
xp
->
v_old
[
1
]
+
hdt
*
p
->
a
[
1
];
p
->
v
[
2
]
=
v
[
2
]
=
xp
->
v_old
[
2
]
+
hdt
*
p
->
a
[
2
];
p
->
u
=
u
=
xp
->
u_old
+
hdt
*
p
->
force
.
u_dt
;
/* Get the smallest/largest dt. */
ldt_min
=
fminf
(
ldt_min
,
p
->
dt
);
ldt_max
=
fmaxf
(
ldt_max
,
p
->
dt
);
ldt_min
=
fminf
(
ldt_min
,
pdt
);
ldt_max
=
fmaxf
(
ldt_max
,
pdt
);
/* Collect total energy. */
lekin
+=
0
.
5
*
p
->
mass
*
(
p
->
v
[
0
]
*
p
->
v
[
0
]
+
p
->
v
[
1
]
*
p
->
v
[
1
]
+
p
->
v
[
2
]
*
p
->
v
[
2
]
);
lepot
+=
p
->
mass
*
p
->
u
;
lekin
+=
0
.
5
*
m
*
(
v
[
0
]
*
v
[
0
]
+
v
[
1
]
*
v
[
1
]
+
v
[
2
]
*
v
[
2
]
);
lepot
+=
m
*
u
;
/* Collect momentum */
lmom
[
0
]
+=
p
->
mass
*
p
->
v
[
0
];
lmom
[
1
]
+=
p
->
mass
*
p
->
v
[
1
];
lmom
[
2
]
+=
p
->
mass
*
p
->
v
[
2
];
lmom
[
0
]
+=
m
*
p
->
v
[
0
];
lmom
[
1
]
+=
m
*
p
->
v
[
1
];
lmom
[
2
]
+=
m
*
p
->
v
[
2
];
/* Collect angular momentum */
lang
[
0
]
+=
p
->
mass
*
(
p
->
x
[
1
]
*
p
->
v
[
2
]
-
p
->
v
[
2
]
*
p
->
x
[
1
]
);
lang
[
1
]
+=
p
->
mass
*
(
p
->
x
[
2
]
*
p
->
v
[
0
]
-
p
->
v
[
0
]
*
p
->
x
[
2
]
);
lang
[
2
]
+=
p
->
mass
*
(
p
->
x
[
0
]
*
p
->
v
[
1
]
-
p
->
v
[
1
]
*
p
->
x
[
0
]
);
lang
[
0
]
+=
m
*
(
x
[
1
]
*
v
[
2
]
-
v
[
2
]
*
x
[
1
]
);
lang
[
1
]
+=
m
*
(
x
[
2
]
*
v
[
0
]
-
v
[
0
]
*
x
[
2
]
);
lang
[
2
]
+=
m
*
(
x
[
0
]
*
v
[
1
]
-
v
[
1
]
*
x
[
0
]
);
/* Collect entropic function */
lent
+=
p
->
u
*
pow
(
p
->
rho
,
1
.
f
-
const_gamma
);
//
lent += u * pow( p->rho, 1.f-const_gamma );
}
#pragma omp critical
...
...
@@ -468,7 +493,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
ang
[
0
]
+=
lang
[
0
];
ang
[
1
]
+=
lang
[
1
];
ang
[
2
]
+=
lang
[
2
];
ent
+=
(
const_gamma
-
1
.)
*
lent
;
//
ent += (const_gamma -1.) * lent;
epot
+=
lepot
;
ekin
+=
lekin
;
count
+=
lcount
;
...
...
@@ -481,7 +506,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
printf
(
"engine_step: etot is %e (ekin=%e, epot=%e).
\n
"
,
ekin
+
epot
,
ekin
,
epot
);
fflush
(
stdout
);
printf
(
"engine_step: total momentum is [ %e , %e , %e ].
\n
"
,
mom
[
0
]
,
mom
[
1
]
,
mom
[
2
]
);
fflush
(
stdout
);
printf
(
"engine_step: total angular momentum is [ %e , %e , %e ].
\n
"
,
ang
[
0
]
,
ang
[
1
]
,
ang
[
2
]
);
fflush
(
stdout
);
printf
(
"engine_step: total entropic function is %e .
\n
"
,
ent
);
fflush
(
stdout
);
//
printf( "engine_step: total entropic function is %e .\n", ent ); fflush(stdout);
printf
(
"engine_step: updated %i parts (dt_step=%.3e).
\n
"
,
count
,
dt_step
);
fflush
(
stdout
);
/* Increase the step counter. */
...
...
src/part.h
View file @
072b0d3f
...
...
@@ -70,7 +70,7 @@ struct part {
float
rho_dh
;
/* Store density/force specific stuff. */
//
union {
union
{
struct
{
...
...
@@ -107,7 +107,7 @@ struct part {
}
force
;
//
};
};
/* Particle pressure. */
// float P;
...
...
src/runner.c
View file @
072b0d3f
...
...
@@ -411,9 +411,6 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
continue
;
}
if
(
p
->
id
==
432626
)
printf
(
"runner_doghost: updating particle %lli.
\n
"
,
p
->
id
);
/* Pre-compute some stuff for the balsara switch. */
normDiv_v
=
fabs
(
p
->
density
.
div_v
/
p
->
rho
*
ihg4
);
normCurl_v
=
sqrtf
(
p
->
density
.
curl_v
[
0
]
*
p
->
density
.
curl_v
[
0
]
+
p
->
density
.
curl_v
[
1
]
*
p
->
density
.
curl_v
[
1
]
+
p
->
density
.
curl_v
[
2
]
*
p
->
density
.
curl_v
[
2
]
)
/
p
->
rho
*
ihg4
;
...
...
src/vector.h
View file @
072b0d3f
...
...
@@ -27,32 +27,60 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */
#ifdef
NO
__AVX__
#ifdef __AVX__
#define VECTORIZE
#define VEC_SIZE 8
#define VEC_FLOAT __m256
#define VEC_DBL __m256d
#define VEC_INT __m256i
#define vec_load(a) _mm256_load_ps(a)
#define vec_set1(a) _mm256_set1_ps(a)
#define vec_set(a,b,c,d,e,f,g,h) _mm256_set_ps(h,g,f,e,d,c,b,a)
#define vec_dbl_set(a,b,c,d) _mm256_set_pd(d,c,b,a)
#define vec_sqrt(a) _mm256_sqrt_ps(a)
#define vec_rcp(a) _mm256_rcp_ps(a)
#define vec_rsqrt(a) _mm256_rsqrt_ps(a)
#define vec_ftoi(a) _mm256_cvttps_epi32(a)
#define vec_fmin(a,b) _mm256_min_ps(a,b)
#define vec_fmax(a,b) _mm256_max_ps(a,b)
#elif defined( NO__SSE2__ )
#define vec_todbl_lo(a) _mm256_cvtps_pd(_mm256_extract128_ps(a,0))
#define vec_todbl_hi(a) _mm256_cvtps_pd(_mm256_extract128_ps(a,1))
#define vec_dbl_tofloat(a,b) _mm256_insertf128( _mm256_castps128_ps256(a) , b , 1 )
#define vec_dbl_load(a) _mm256_load_pd(a)
#define vec_dbl_set1(a) _mm256_set1_pd(a)
#define vec_dbl_sqrt(a) _mm256_sqrt_pd(a)
#define vec_dbl_rcp(a) _mm256_rcp_pd(a)
#define vec_dbl_rsqrt(a) _mm256_rsqrt_pd(a)
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
#elif defined( __SSE2__ )
#define VECTORIZE
#define VEC_SIZE 4
#define VEC_FLOAT __m128
#define VEC_DBL __m128d
#define VEC_INT __m128i
#define vec_load(a) _mm_load_ps(a)
#define vec_set1(a) _mm_set1_ps(a)
#define vec_set(a,b,c,d) _mm_set_ps(d,c,b,a)
#define vec_dbl_set(a,b) _mm_set_pd(b,a)
#define vec_sqrt(a) _mm_sqrt_ps(a)
#define vec_rcp(a) _mm_rcp_ps(a)
#define vec_rsqrt(a) _mm_rsqrt_ps(a)
#define vec_ftoi(a) _mm_cvttps_epi32(a)
#define vec_fmin(a,b) _mm_min_ps(a,b)
#define vec_fmax(a,b) _mm_max_ps(a,b)
#define vec_todbl_lo(a) _mm_cvtps_pd(a)
#define vec_todbl_hi(a) _mm_cvtps_pd(_mm_movehl_ps(a,a))
#define vec_dbl_tofloat(a,b) _mm_movelh_ps( _mm_cvtpd_ps(a) , _mm_cvtpd_ps(b) )
#define vec_dbl_load(a) _mm_load_pd(a)
#define vec_dbl_set1(a) _mm_set1_pd(a)
#define vec_dbl_sqrt(a) _mm_sqrt_pd(a)
#define vec_dbl_rcp(a) _mm_rcp_pd(a)
#define vec_dbl_rsqrt(a) _mm_rsqrt_pd(a)
#define vec_dbl_ftoi(a) _mm_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm_max_pd(a,b)
#else
#define VEC_SIZE 4
#endif
...
...
@@ -74,8 +102,10 @@
// VEC_MACRO(VEC_SIZE,float) v;
// VEC_MACRO(VEC_SIZE,int) m;
VEC_FLOAT
v
;
VEC_DBL
vd
;
VEC_INT
m
;
float
f
[
VEC_SIZE
];
double
d
[
VEC_SIZE
/
2
];
int
i
[
VEC_SIZE
];
}
vector
;
#endif
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment