Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
280a6c95
Commit
280a6c95
authored
7 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Use generic SWIFT alignment and macros to allow auto-vectorisation of cache reads.
parent
ec54107b
No related branches found
No related tags found
1 merge request
!404
Cache auto vec
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/cache.h
+131
-97
131 additions, 97 deletions
src/cache.h
with
131 additions
and
97 deletions
src/cache.h
+
131
−
97
View file @
280a6c95
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
#include
"../config.h"
#include
"../config.h"
/* Local headers */
/* Local headers */
#include
"align.h"
#include
"cell.h"
#include
"cell.h"
#include
"error.h"
#include
"error.h"
#include
"part.h"
#include
"part.h"
...
@@ -30,9 +31,7 @@
...
@@ -30,9 +31,7 @@
#include
"vector.h"
#include
"vector.h"
#define NUM_VEC_PROC 2
#define NUM_VEC_PROC 2
#define CACHE_ALIGN 64
#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)
#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)
#define C2_CACHE_ALIGN sizeof(float) * VEC_SIZE
#ifdef WITH_VECTORIZATION
#ifdef WITH_VECTORIZATION
/* Cache struct to hold a local copy of a cells' particle
/* Cache struct to hold a local copy of a cells' particle
...
@@ -40,31 +39,31 @@
...
@@ -40,31 +39,31 @@
struct
cache
{
struct
cache
{
/* Particle x position. */
/* Particle x position. */
float
*
restrict
x
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
x
SWIFT_
CACHE_ALIGN
;
/* Particle y position. */
/* Particle y position. */
float
*
restrict
y
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
y
SWIFT_
CACHE_ALIGN
;
/* Particle z position. */
/* Particle z position. */
float
*
restrict
z
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
z
SWIFT_
CACHE_ALIGN
;
/* Particle smoothing length. */
/* Particle smoothing length. */
float
*
restrict
h
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
h
SWIFT_
CACHE_ALIGN
;
/* Particle mass. */
/* Particle mass. */
float
*
restrict
m
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
m
SWIFT_
CACHE_ALIGN
;
/* Particle x velocity. */
/* Particle x velocity. */
float
*
restrict
vx
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
vx
SWIFT_
CACHE_ALIGN
;
/* Particle y velocity. */
/* Particle y velocity. */
float
*
restrict
vy
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
vy
SWIFT_
CACHE_ALIGN
;
/* Particle z velocity. */
/* Particle z velocity. */
float
*
restrict
vz
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
vz
SWIFT_
CACHE_ALIGN
;
/* Maximum index into neighbouring cell for particles that are in range. */
/* Maximum index into neighbouring cell for particles that are in range. */
int
*
restrict
max_index
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
int
*
restrict
max_index
SWIFT_
CACHE_ALIGN
;
/* Cache size. */
/* Cache size. */
int
count
;
int
count
;
...
@@ -75,28 +74,28 @@ struct cache {
...
@@ -75,28 +74,28 @@ struct cache {
struct
c2_cache
{
struct
c2_cache
{
/* Separation between two particles squared. */
/* Separation between two particles squared. */
float
r2q
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
r2q
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* x separation between two particles. */
/* x separation between two particles. */
float
dxq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
dxq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* y separation between two particles. */
/* y separation between two particles. */
float
dyq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
dyq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* z separation between two particles. */
/* z separation between two particles. */
float
dzq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
dzq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* Mass of particle pj. */
/* Mass of particle pj. */
float
mq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
mq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* x velocity of particle pj. */
/* x velocity of particle pj. */
float
vxq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
vxq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* y velocity of particle pj. */
/* y velocity of particle pj. */
float
vyq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
vyq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* z velocity of particle pj. */
/* z velocity of particle pj. */
float
vzq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
vzq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
};
};
/**
/**
...
@@ -130,15 +129,15 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
...
@@ -130,15 +129,15 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
free
(
c
->
max_index
);
free
(
c
->
max_index
);
}
}
error
+=
posix_memalign
((
void
**
)
&
c
->
x
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
x
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
y
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
y
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
z
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
z
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
m
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
m
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vx
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vx
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vy
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vy
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vz
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vz
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
h
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
h
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
max_index
,
CACHE_ALIGN
,
sizeIntBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
max_index
,
SWIFT_
CACHE_ALIGN
MENT
,
sizeIntBytes
);
if
(
error
!=
0
)
if
(
error
!=
0
)
error
(
"Couldn't allocate cache, no. of particles: %d"
,
(
int
)
count
);
error
(
"Couldn't allocate cache, no. of particles: %d"
,
(
int
)
count
);
...
@@ -152,25 +151,39 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
...
@@ -152,25 +151,39 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
* @param ci_cache The cache.
* @param ci_cache The cache.
*/
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_particles
(
__attribute__
((
always_inline
))
INLINE
void
cache_read_particles
(
const
struct
cell
*
const
ci
,
struct
cache
*
const
ci_cache
)
{
const
struct
cell
*
restrict
const
ci
,
struct
cache
*
restrict
const
ci_cache
)
{
#if defined(GADGET2_SPH)
#if defined(GADGET2_SPH)
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_align_and_restrict_information
(
x
,
ci_cache
->
x
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
y
,
ci_cache
->
y
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
z
,
ci_cache
->
z
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
h
,
ci_cache
->
h
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
m
,
ci_cache
->
m
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
vx
,
ci_cache
->
vx
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
vy
,
ci_cache
->
vy
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
vz
,
ci_cache
->
vz
,
float
,
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
ci
->
loc
[
0
];
loc
[
1
]
=
ci
->
loc
[
1
];
loc
[
2
]
=
ci
->
loc
[
2
];
/* Shift the particles positions to a local frame so single precision can be
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
* used instead of double precision. */
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma vector aligned
#endif
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
ci_cache
->
x
[
i
]
=
ci
->
parts
[
i
].
x
[
0
]
-
ci
->
loc
[
0
];
x
[
i
]
=
(
float
)(
parts
[
i
].
x
[
0
]
-
loc
[
0
]
)
;
ci_cache
->
y
[
i
]
=
ci
->
parts
[
i
].
x
[
1
]
-
ci
->
loc
[
1
];
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]
)
;
ci_cache
->
z
[
i
]
=
ci
->
parts
[
i
].
x
[
2
]
-
ci
->
loc
[
2
];
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]
)
;
ci_cache
->
h
[
i
]
=
ci
->
parts
[
i
].
h
;
h
[
i
]
=
parts
[
i
].
h
;
ci_cache
->
m
[
i
]
=
ci
->
parts
[
i
].
mass
;
m
[
i
]
=
parts
[
i
].
mass
;
ci_cache
->
vx
[
i
]
=
ci
->
parts
[
i
].
v
[
0
];
vx
[
i
]
=
parts
[
i
].
v
[
0
];
ci_cache
->
vy
[
i
]
=
ci
->
parts
[
i
].
v
[
1
];
vy
[
i
]
=
parts
[
i
].
v
[
1
];
ci_cache
->
vz
[
i
]
=
ci
->
parts
[
i
].
v
[
2
];
vz
[
i
]
=
parts
[
i
].
v
[
2
];
}
}
#endif
#endif
...
@@ -322,13 +335,13 @@ __attribute__((always_inline)) INLINE void cache_read_two_cells_sorted(
...
@@ -322,13 +335,13 @@ __attribute__((always_inline)) INLINE void cache_read_two_cells_sorted(
* interaction.
* interaction.
*/
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_two_partial_cells_sorted
(
__attribute__
((
always_inline
))
INLINE
void
cache_read_two_partial_cells_sorted
(
const
struct
cell
*
const
ci
,
const
struct
cell
*
const
cj
,
const
struct
cell
*
restrict
const
ci
,
const
struct
cell
*
restrict
const
cj
,
struct
cache
*
const
ci_cache
,
struct
cache
*
const
cj_cache
,
struct
cache
*
restrict
const
ci_cache
,
struct
cache
*
restrict
const
cj_cache
,
const
struct
entry
*
restrict
sort_i
,
const
struct
entry
*
restrict
sort_j
,
const
struct
entry
*
restrict
sort_i
,
const
struct
entry
*
restrict
sort_j
,
const
double
*
const
shift
,
int
*
first_pi
,
int
*
last_pj
,
const
double
*
restrict
const
shift
,
int
*
first_pi
,
int
*
last_pj
,
const
int
num_vec_proc
)
{
const
int
num_vec_proc
)
{
int
idx
,
ci_cache_
idx
;
int
idx
;
/* Pad number of particles read to the vector size. */
/* Pad number of particles read to the vector size. */
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
(
num_vec_proc
*
VEC_SIZE
);
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
(
num_vec_proc
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
if
(
rem
!=
0
)
{
...
@@ -346,74 +359,95 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
...
@@ -346,74 +359,95 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
int
first_pi_align
=
*
first_pi
;
int
first_pi_align
=
*
first_pi
;
int
last_pj_align
=
*
last_pj
;
int
last_pj_align
=
*
last_pj
;
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
/* Shift the particles positions to a local frame (ci frame) so single precision
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
* can be
double
loc
[
3
];
* used instead of double precision. Also shift the cell ci, particles positions
loc
[
0
]
=
ci
->
loc
[
0
];
* due to BCs but leave cell cj. */
loc
[
1
]
=
ci
->
loc
[
1
];
#if defined(WITH_VECTORIZATION) && defined(__ICC)
loc
[
2
]
=
ci
->
loc
[
2
];
#pragma vector aligned
#endif
/* Let the compiler know that the data is aligned and create pointers to the
for
(
int
i
=
first_pi_align
;
i
<
ci
->
count
;
i
++
)
{
* arrays inside the cache. */
/* Make sure ci_cache is filled from the first element. */
swift_align_and_restrict_information
(
x
,
ci_cache
->
x
,
float
,
SWIFT_CACHE_ALIGNMENT
);
ci_cache_idx
=
i
-
first_pi_align
;
swift_align_and_restrict_information
(
y
,
ci_cache
->
y
,
float
,
SWIFT_CACHE_ALIGNMENT
);
idx
=
sort_i
[
i
].
i
;
swift_align_and_restrict_information
(
z
,
ci_cache
->
z
,
float
,
SWIFT_CACHE_ALIGNMENT
);
ci_cache
->
x
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
x
[
0
]
-
ci
->
loc
[
0
]
-
shift
[
0
];
swift_align_and_restrict_information
(
h
,
ci_cache
->
h
,
float
,
SWIFT_CACHE_ALIGNMENT
);
ci_cache
->
y
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
x
[
1
]
-
ci
->
loc
[
1
]
-
shift
[
1
];
swift_align_and_restrict_information
(
m
,
ci_cache
->
m
,
float
,
SWIFT_CACHE_ALIGNMENT
);
ci_cache
->
z
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
x
[
2
]
-
ci
->
loc
[
2
]
-
shift
[
2
];
swift_align_and_restrict_information
(
vx
,
ci_cache
->
vx
,
float
,
SWIFT_CACHE_ALIGNMENT
);
ci_cache
->
h
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
h
;
swift_align_and_restrict_information
(
vy
,
ci_cache
->
vy
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
vz
,
ci_cache
->
vz
,
float
,
SWIFT_CACHE_ALIGNMENT
);
ci_cache
->
m
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
mass
;
ci_cache
->
vx
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
v
[
0
];
int
ci_cache_count
=
ci
->
count
-
first_pi_align
;
ci_cache
->
vy
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
v
[
1
];
/* Shift the particles positions to a local frame (ci frame) so single precision
ci_cache
->
vz
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
v
[
2
];
* can be
* used instead of double precision. Also shift the cell ci, particles positions
* due to BCs but leave cell cj. */
for
(
int
i
=
0
;
i
<
ci_cache_count
;
i
++
)
{
idx
=
sort_i
[
i
+
first_pi_align
].
i
;
x
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
0
]
-
loc
[
0
]
-
shift
[
0
]);
y
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
1
]
-
loc
[
1
]
-
shift
[
1
]);
z
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
2
]
-
loc
[
2
]
-
shift
[
2
]);
h
[
i
]
=
parts_i
[
idx
].
h
;
m
[
i
]
=
parts_i
[
idx
].
mass
;
vx
[
i
]
=
parts_i
[
idx
].
v
[
0
];
vy
[
i
]
=
parts_i
[
idx
].
v
[
1
];
vz
[
i
]
=
parts_i
[
idx
].
v
[
2
];
}
}
/* Pad cache with fake particles that exist outside the cell so will not
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
* interact.*/
float
fake_pix
=
2
.
0
f
*
ci
->
parts
[
sort_i
[
ci
->
count
-
1
].
i
].
x
[
0
];
float
fake_pix
=
2
.
0
f
*
parts
_i
[
sort_i
[
ci
->
count
-
1
].
i
].
x
[
0
];
for
(
int
i
=
ci
->
count
-
first_pi_align
;
for
(
int
i
=
ci
->
count
-
first_pi_align
;
i
<
ci
->
count
-
first_pi_align
+
VEC_SIZE
;
i
++
)
{
i
<
ci
->
count
-
first_pi_align
+
VEC_SIZE
;
i
++
)
{
ci_cache
->
x
[
i
]
=
fake_pix
;
x
[
i
]
=
fake_pix
;
ci_cache
->
y
[
i
]
=
1
.
f
;
y
[
i
]
=
1
.
f
;
ci_cache
->
z
[
i
]
=
1
.
f
;
z
[
i
]
=
1
.
f
;
ci_cache
->
h
[
i
]
=
1
.
f
;
h
[
i
]
=
1
.
f
;
ci_cache
->
m
[
i
]
=
1
.
f
;
m
[
i
]
=
1
.
f
;
ci_cache
->
vx
[
i
]
=
1
.
f
;
vx
[
i
]
=
1
.
f
;
ci_cache
->
vy
[
i
]
=
1
.
f
;
vy
[
i
]
=
1
.
f
;
ci_cache
->
vz
[
i
]
=
1
.
f
;
vz
[
i
]
=
1
.
f
;
}
}
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_align_and_restrict_information
(
xj
,
cj_cache
->
x
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
yj
,
cj_cache
->
y
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
zj
,
cj_cache
->
z
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
hj
,
cj_cache
->
h
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
mj
,
cj_cache
->
m
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
vxj
,
cj_cache
->
vx
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
vyj
,
cj_cache
->
vy
,
float
,
SWIFT_CACHE_ALIGNMENT
);
swift_align_and_restrict_information
(
vzj
,
cj_cache
->
vz
,
float
,
SWIFT_CACHE_ALIGNMENT
);
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma vector aligned
#endif
for
(
int
i
=
0
;
i
<=
last_pj_align
;
i
++
)
{
for
(
int
i
=
0
;
i
<=
last_pj_align
;
i
++
)
{
idx
=
sort_j
[
i
].
i
;
idx
=
sort_j
[
i
].
i
;
cj_cache
->
x
[
i
]
=
cj
->
parts
[
idx
].
x
[
0
]
-
ci
->
loc
[
0
];
xj
[
i
]
=
(
float
)(
parts
_j
[
idx
].
x
[
0
]
-
loc
[
0
]
)
;
cj_cache
->
y
[
i
]
=
cj
->
parts
[
idx
].
x
[
1
]
-
ci
->
loc
[
1
];
yj
[
i
]
=
(
float
)(
parts
_j
[
idx
].
x
[
1
]
-
loc
[
1
]
)
;
cj_cache
->
z
[
i
]
=
cj
->
parts
[
idx
].
x
[
2
]
-
ci
->
loc
[
2
];
zj
[
i
]
=
(
float
)(
parts
_j
[
idx
].
x
[
2
]
-
loc
[
2
]
)
;
cj_cache
->
h
[
i
]
=
cj
->
parts
[
idx
].
h
;
h
j
[
i
]
=
parts
_j
[
idx
].
h
;
cj_cache
->
m
[
i
]
=
cj
->
parts
[
idx
].
mass
;
m
j
[
i
]
=
parts
_j
[
idx
].
mass
;
cj_cache
->
vx
[
i
]
=
cj
->
parts
[
idx
].
v
[
0
];
vx
j
[
i
]
=
parts
_j
[
idx
].
v
[
0
];
cj_cache
->
vy
[
i
]
=
cj
->
parts
[
idx
].
v
[
1
];
vy
j
[
i
]
=
parts
_j
[
idx
].
v
[
1
];
cj_cache
->
vz
[
i
]
=
cj
->
parts
[
idx
].
v
[
2
];
vz
j
[
i
]
=
parts
_j
[
idx
].
v
[
2
];
}
}
/* Pad cache with fake particles that exist outside the cell so will not
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
* interact.*/
float
fake_pjx
=
2
.
0
f
*
cj
->
parts
[
sort_j
[
cj
->
count
-
1
].
i
].
x
[
0
];
float
fake_pjx
=
2
.
0
f
*
cj
->
parts
[
sort_j
[
cj
->
count
-
1
].
i
].
x
[
0
];
for
(
int
i
=
last_pj_align
+
1
;
i
<
last_pj_align
+
1
+
VEC_SIZE
;
i
++
)
{
for
(
int
i
=
last_pj_align
+
1
;
i
<
last_pj_align
+
1
+
VEC_SIZE
;
i
++
)
{
cj_cache
->
x
[
i
]
=
fake_pjx
;
x
j
[
i
]
=
fake_pjx
;
cj_cache
->
y
[
i
]
=
1
.
f
;
y
j
[
i
]
=
1
.
f
;
cj_cache
->
z
[
i
]
=
1
.
f
;
z
j
[
i
]
=
1
.
f
;
cj_cache
->
h
[
i
]
=
1
.
f
;
h
j
[
i
]
=
1
.
f
;
cj_cache
->
m
[
i
]
=
1
.
f
;
m
j
[
i
]
=
1
.
f
;
cj_cache
->
vx
[
i
]
=
1
.
f
;
vx
j
[
i
]
=
1
.
f
;
cj_cache
->
vy
[
i
]
=
1
.
f
;
vy
j
[
i
]
=
1
.
f
;
cj_cache
->
vz
[
i
]
=
1
.
f
;
vz
j
[
i
]
=
1
.
f
;
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment