Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
e33d87f3
Commit
e33d87f3
authored
Aug 11, 2017
by
Matthieu Schaller
Browse files
No need to zero the acceleration caches before use.
parent
95cb0e07
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/gravity_cache.h
View file @
e33d87f3
...
...
@@ -127,12 +127,11 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {
* @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length.
* @param zero_output Do we need to zero the output caches ?
* @param shift A shift to apply to all the particles.
*/
__attribute__
((
always_inline
))
INLINE
void
gravity_cache_populate
(
struct
gravity_cache
*
c
,
const
struct
gpart
*
restrict
gparts
,
int
gcount
,
int
gcount_padded
,
int
zero_output
,
double
shift
[
3
])
{
int
gcount_padded
,
double
shift
[
3
])
{
/* Make the compiler understand we are in happy vectorization land */
float
*
restrict
x
=
c
->
x
;
...
...
@@ -140,17 +139,11 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
float
*
restrict
z
=
c
->
z
;
float
*
restrict
m
=
c
->
m
;
float
*
restrict
epsilon
=
c
->
epsilon
;
float
*
restrict
a_x
=
c
->
a_x
;
float
*
restrict
a_y
=
c
->
a_y
;
float
*
restrict
a_z
=
c
->
a_z
;
swift_align_information
(
x
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
y
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
z
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
epsilon
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
m
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
a_x
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
a_y
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
a_z
,
SWIFT_CACHE_ALIGNMENT
);
swift_assume_size
(
gcount_padded
,
VEC_SIZE
);
/* Fill the input caches */
...
...
@@ -174,13 +167,6 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
epsilon
[
i
]
=
0
.
f
;
m
[
i
]
=
0
.
f
;
}
/* Zero the output caches */
if
(
zero_output
)
{
bzero
(
a_x
,
gcount_padded
*
sizeof
(
float
));
bzero
(
a_y
,
gcount_padded
*
sizeof
(
float
));
bzero
(
a_z
,
gcount_padded
*
sizeof
(
float
));
}
}
/**
...
...
@@ -191,11 +177,10 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate(
* @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length.
* @param zero_output Do we need to zero the output caches ?
*/
__attribute__
((
always_inline
))
INLINE
void
gravity_cache_populate_no_shift
(
struct
gravity_cache
*
c
,
const
struct
gpart
*
restrict
gparts
,
int
gcount
,
int
gcount_padded
,
int
zero_output
)
{
int
gcount_padded
)
{
/* Make the compiler understand we are in happy vectorization land */
float
*
restrict
x
=
c
->
x
;
...
...
@@ -203,17 +188,12 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
float
*
restrict
z
=
c
->
z
;
float
*
restrict
m
=
c
->
m
;
float
*
restrict
epsilon
=
c
->
epsilon
;
float
*
restrict
a_x
=
c
->
a_x
;
float
*
restrict
a_y
=
c
->
a_y
;
float
*
restrict
a_z
=
c
->
a_z
;
swift_align_information
(
x
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
y
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
z
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
epsilon
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
m
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
a_x
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
a_y
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_information
(
a_z
,
SWIFT_CACHE_ALIGNMENT
);
swift_assume_size
(
gcount_padded
,
VEC_SIZE
);
/* Fill the input caches */
for
(
int
i
=
0
;
i
<
gcount
;
++
i
)
{
...
...
@@ -236,13 +216,6 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift(
epsilon
[
i
]
=
0
.
f
;
m
[
i
]
=
0
.
f
;
}
/* Zero the output caches */
if
(
zero_output
)
{
bzero
(
a_x
,
gcount_padded
*
sizeof
(
float
));
bzero
(
a_y
,
gcount_padded
*
sizeof
(
float
));
bzero
(
a_z
,
gcount_padded
*
sizeof
(
float
));
}
}
/**
...
...
src/runner_doiact_grav.h
View file @
e33d87f3
...
...
@@ -188,10 +188,9 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
const
int
gcount_padded_j
=
gcount_j
-
(
gcount_j
%
VEC_SIZE
)
+
VEC_SIZE
;
/* Fill the caches */
gravity_cache_populate
(
ci_cache
,
gparts_i
,
gcount_i
,
gcount_padded_i
,
ci_active
,
shift
);
gravity_cache_populate_no_shift
(
cj_cache
,
gparts_j
,
gcount_j
,
gcount_padded_j
,
cj_active
);
gravity_cache_populate
(
ci_cache
,
gparts_i
,
gcount_i
,
gcount_padded_i
,
shift
);
gravity_cache_populate_no_shift
(
cj_cache
,
gparts_j
,
gcount_j
,
gcount_padded_j
);
/* Ok... Here we go ! */
...
...
@@ -281,9 +280,9 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
}
/* Store everything back in cache */
ci_cache
->
a_x
[
pid
]
+
=
a_x
;
ci_cache
->
a_y
[
pid
]
+
=
a_y
;
ci_cache
->
a_z
[
pid
]
+
=
a_z
;
ci_cache
->
a_x
[
pid
]
=
a_x
;
ci_cache
->
a_y
[
pid
]
=
a_y
;
ci_cache
->
a_z
[
pid
]
=
a_z
;
}
}
...
...
@@ -374,9 +373,9 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
}
/* Store everything back in cache */
cj_cache
->
a_x
[
pjd
]
+
=
a_x
;
cj_cache
->
a_y
[
pjd
]
+
=
a_y
;
cj_cache
->
a_z
[
pjd
]
+
=
a_z
;
cj_cache
->
a_x
[
pjd
]
=
a_x
;
cj_cache
->
a_y
[
pjd
]
=
a_y
;
cj_cache
->
a_z
[
pjd
]
=
a_z
;
}
}
...
...
@@ -531,10 +530,9 @@ void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
const
int
gcount_padded_j
=
gcount_j
-
(
gcount_j
%
VEC_SIZE
)
+
VEC_SIZE
;
/* Fill the caches */
gravity_cache_populate
(
ci_cache
,
gparts_i
,
gcount_i
,
gcount_padded_i
,
ci_active
,
shift
);
gravity_cache_populate_no_shift
(
cj_cache
,
gparts_j
,
gcount_j
,
gcount_padded_j
,
cj_active
);
gravity_cache_populate
(
ci_cache
,
gparts_i
,
gcount_i
,
gcount_padded_i
,
shift
);
gravity_cache_populate_no_shift
(
cj_cache
,
gparts_j
,
gcount_j
,
gcount_padded_j
);
/* Ok... Here we go ! */
...
...
@@ -629,9 +627,9 @@ void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
}
/* Store everything back in cache */
ci_cache
->
a_x
[
pid
]
+
=
a_x
;
ci_cache
->
a_y
[
pid
]
+
=
a_y
;
ci_cache
->
a_z
[
pid
]
+
=
a_z
;
ci_cache
->
a_x
[
pid
]
=
a_x
;
ci_cache
->
a_y
[
pid
]
=
a_y
;
ci_cache
->
a_z
[
pid
]
=
a_z
;
}
}
...
...
@@ -727,9 +725,9 @@ void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
}
/* Store everything back in cache */
cj_cache
->
a_x
[
pjd
]
+
=
a_x
;
cj_cache
->
a_y
[
pjd
]
+
=
a_y
;
cj_cache
->
a_z
[
pjd
]
+
=
a_z
;
cj_cache
->
a_x
[
pjd
]
=
a_x
;
cj_cache
->
a_y
[
pjd
]
=
a_y
;
cj_cache
->
a_z
[
pjd
]
=
a_z
;
}
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment