Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
70021c5b
Commit
70021c5b
authored
8 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Added vectorised version of DOPAIR1.
parent
954a7902
No related branches found
No related tags found
1 merge request
!320
Dopair1 vectorisation merge
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/runner_doiact_vec.c
+194
-0
194 additions, 0 deletions
src/runner_doiact_vec.c
src/runner_doiact_vec.h
+3
-0
3 additions, 0 deletions
src/runner_doiact_vec.h
with
197 additions
and
0 deletions
src/runner_doiact_vec.c
+
194
−
0
View file @
70021c5b
...
...
@@ -865,3 +865,197 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec_2(
TIMER_TOC
(
timer_doself_density
);
#endif
/* WITH_VECTORIZATION */
}
/**
* @brief Compute the interactions between a cell pair (non-symmetric).
*
* @param r The #runner.
* @param ci The first #cell.
* @param cj The second #cell.
*/
void
runner_dopair1_density_vec
(
struct
runner
*
r
,
struct
cell
*
ci
,
struct
cell
*
cj
)
{
#ifdef WITH_VECTORIZATION
const
struct
engine
*
restrict
e
=
r
->
e
;
#ifdef WITH_VECTORIZATION
int
icount
=
0
;
float
r2q
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hiq
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hjq
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
dxq
[
3
*
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
struct
part
*
piq
[
VEC_SIZE
],
*
pjq
[
VEC_SIZE
];
#endif
TIMER_TIC
;
/* Anything to do here? */
if
(
!
cell_is_active
(
ci
,
e
)
&&
!
cell_is_active
(
cj
,
e
))
return
;
#ifdef SWIFT_DEBUG_CHECKS
cell_is_drifted
(
ci
,
e
);
cell_is_drifted
(
cj
,
e
);
#endif
/* Get the sort ID. */
double
shift
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
const
int
sid
=
space_getsid
(
e
->
s
,
&
ci
,
&
cj
,
shift
);
/* Have the cells been sorted? */
if
(
!
(
ci
->
sorted
&
(
1
<<
sid
))
||
!
(
cj
->
sorted
&
(
1
<<
sid
)))
error
(
"Trying to interact unsorted cells."
);
/* Get the cutoff shift. */
double
rshift
=
0
.
0
;
for
(
int
k
=
0
;
k
<
3
;
k
++
)
rshift
+=
shift
[
k
]
*
runner_shift
[
sid
][
k
];
/* Pick-out the sorted lists. */
const
struct
entry
*
restrict
sort_i
=
&
ci
->
sort
[
sid
*
(
ci
->
count
+
1
)];
const
struct
entry
*
restrict
sort_j
=
&
cj
->
sort
[
sid
*
(
cj
->
count
+
1
)];
/* Get some other useful values. */
const
double
hi_max
=
ci
->
h_max
*
kernel_gamma
-
rshift
;
const
double
hj_max
=
cj
->
h_max
*
kernel_gamma
;
const
int
count_i
=
ci
->
count
;
const
int
count_j
=
cj
->
count
;
struct
part
*
restrict
parts_i
=
ci
->
parts
;
struct
part
*
restrict
parts_j
=
cj
->
parts
;
const
double
di_max
=
sort_i
[
count_i
-
1
].
d
-
rshift
;
const
double
dj_min
=
sort_j
[
0
].
d
;
const
float
dx_max
=
(
ci
->
dx_max
+
cj
->
dx_max
);
/* Loop over the parts in ci. */
for
(
int
pid
=
count_i
-
1
;
pid
>=
0
&&
sort_i
[
pid
].
d
+
hi_max
+
dx_max
>
dj_min
;
pid
--
)
{
/* Get a hold of the ith part in ci. */
struct
part
*
restrict
pi
=
&
parts_i
[
sort_i
[
pid
].
i
];
if
(
!
part_is_active
(
pi
,
e
))
continue
;
const
float
hi
=
pi
->
h
;
const
double
di
=
sort_i
[
pid
].
d
+
hi
*
kernel_gamma
+
dx_max
-
rshift
;
if
(
di
<
dj_min
)
continue
;
double
pix
[
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
pix
[
k
]
=
pi
->
x
[
k
]
-
shift
[
k
];
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
/* Loop over the parts in cj. */
for
(
int
pjd
=
0
;
pjd
<
count_j
&&
sort_j
[
pjd
].
d
<
di
;
pjd
++
)
{
/* Get a pointer to the jth particle. */
struct
part
*
restrict
pj
=
&
parts_j
[
sort_j
[
pjd
].
i
];
/* Compute the pairwise distance. */
float
r2
=
0
.
0
f
;
float
dx
[
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
{
dx
[
k
]
=
pix
[
k
]
-
pj
->
x
[
k
];
r2
+=
dx
[
k
]
*
dx
[
k
];
}
/* Hit or miss? */
if
(
r2
<
hig2
)
{
#ifndef WITH_VECTORIZATION
runner_iact_nonsym_density
(
r2
,
dx
,
hi
,
pj
->
h
,
pi
,
pj
);
#else
/* Add this interaction to the queue. */
r2q
[
icount
]
=
r2
;
dxq
[
3
*
icount
+
0
]
=
dx
[
0
];
dxq
[
3
*
icount
+
1
]
=
dx
[
1
];
dxq
[
3
*
icount
+
2
]
=
dx
[
2
];
hiq
[
icount
]
=
hi
;
hjq
[
icount
]
=
pj
->
h
;
piq
[
icount
]
=
pi
;
pjq
[
icount
]
=
pj
;
icount
+=
1
;
/* Flush? */
if
(
icount
==
VEC_SIZE
)
{
runner_iact_nonsym_vec_density
(
r2q
,
dxq
,
hiq
,
hjq
,
piq
,
pjq
);
icount
=
0
;
}
#endif
}
}
/* loop over the parts in cj. */
}
/* loop over the parts in ci. */
/* Loop over the parts in cj. */
for
(
int
pjd
=
0
;
pjd
<
count_j
&&
sort_j
[
pjd
].
d
-
hj_max
-
dx_max
<
di_max
;
pjd
++
)
{
/* Get a hold of the jth part in cj. */
struct
part
*
restrict
pj
=
&
parts_j
[
sort_j
[
pjd
].
i
];
if
(
!
part_is_active
(
pj
,
e
))
continue
;
const
float
hj
=
pj
->
h
;
const
double
dj
=
sort_j
[
pjd
].
d
-
hj
*
kernel_gamma
-
dx_max
-
rshift
;
if
(
dj
>
di_max
)
continue
;
double
pjx
[
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
pjx
[
k
]
=
pj
->
x
[
k
]
+
shift
[
k
];
const
float
hjg2
=
hj
*
hj
*
kernel_gamma2
;
/* Loop over the parts in ci. */
for
(
int
pid
=
count_i
-
1
;
pid
>=
0
&&
sort_i
[
pid
].
d
>
dj
;
pid
--
)
{
/* Get a pointer to the jth particle. */
struct
part
*
restrict
pi
=
&
parts_i
[
sort_i
[
pid
].
i
];
/* Compute the pairwise distance. */
float
r2
=
0
.
0
f
;
float
dx
[
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
{
dx
[
k
]
=
pjx
[
k
]
-
pi
->
x
[
k
];
r2
+=
dx
[
k
]
*
dx
[
k
];
}
/* Hit or miss? */
if
(
r2
<
hjg2
)
{
#ifndef WITH_VECTORIZATION
runner_iact_nonsym_density
(
r2
,
dx
,
hj
,
pi
->
h
,
pj
,
pi
);
#else
/* Add this interaction to the queue. */
r2q
[
icount
]
=
r2
;
dxq
[
3
*
icount
+
0
]
=
dx
[
0
];
dxq
[
3
*
icount
+
1
]
=
dx
[
1
];
dxq
[
3
*
icount
+
2
]
=
dx
[
2
];
hiq
[
icount
]
=
hj
;
hjq
[
icount
]
=
pi
->
h
;
piq
[
icount
]
=
pj
;
pjq
[
icount
]
=
pi
;
icount
+=
1
;
/* Flush? */
if
(
icount
==
VEC_SIZE
)
{
runner_iact_nonsym_vec_density
(
r2q
,
dxq
,
hiq
,
hjq
,
piq
,
pjq
);
icount
=
0
;
}
#endif
}
}
/* loop over the parts in cj. */
}
/* loop over the parts in ci. */
#ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */
if
(
icount
>
0
)
for
(
int
k
=
0
;
k
<
icount
;
k
++
)
runner_iact_nonsym_density
(
r2q
[
k
],
&
dxq
[
3
*
k
],
hiq
[
k
],
hjq
[
k
],
piq
[
k
],
pjq
[
k
]);
#endif
TIMER_TOC
(
timer_dopair_density
);
#endif
/* WITH_VECTORIZATION */
}
This diff is collapsed.
Click to expand it.
src/runner_doiact_vec.h
+
3
−
0
View file @
70021c5b
...
...
@@ -31,9 +31,12 @@
#include
"runner.h"
#include
"timers.h"
#include
"vector.h"
#include
"active.h"
#include
"runner.h"
/* Function prototypes. */
void
runner_doself1_density_vec
(
struct
runner
*
r
,
struct
cell
*
restrict
c
);
void
runner_doself1_density_vec_2
(
struct
runner
*
r
,
struct
cell
*
restrict
c
);
void
runner_dopair1_density_vec
(
struct
runner
*
r
,
struct
cell
*
restrict
ci
,
struct
cell
*
restrict
cj
);
#endif
/* SWIFT_RUNNER_VEC_H */
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment