Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
2c149eb6
Commit
2c149eb6
authored
12 years ago
by
Pedro Gonnet
Browse files
Options
Downloads
Patches
Plain Diff
better density self-interaction.
Former-commit-id: c5fa53714af80cac2803940a719eb73fc5120e18
parent
2468664e
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
src/cell.c
+1
-1
1 addition, 1 deletion
src/cell.c
src/runner_doiact.h
+215
-80
215 additions, 80 deletions
src/runner_doiact.h
src/space.h
+1
-1
1 addition, 1 deletion
src/space.h
src/vector.h
+2
-2
2 additions, 2 deletions
src/vector.h
with
219 additions
and
84 deletions
src/cell.c
+
1
−
1
View file @
2c149eb6
...
@@ -167,7 +167,7 @@ void cell_unlocktree( struct cell *c ) {
...
@@ -167,7 +167,7 @@ void cell_unlocktree( struct cell *c ) {
void
cell_split
(
struct
cell
*
c
)
{
void
cell_split
(
struct
cell
*
c
)
{
int
i
,
j
,
k
,
kk
;
int
i
,
j
,
k
;
struct
part
temp
,
*
parts
=
c
->
parts
;
struct
part
temp
,
*
parts
=
c
->
parts
;
int
left
[
8
],
right
[
8
];
int
left
[
8
],
right
[
8
];
double
pivot
[
3
];
double
pivot
[
3
];
...
...
This diff is collapsed.
Click to expand it.
src/runner_doiact.h
+
215
−
80
View file @
2c149eb6
...
@@ -940,6 +940,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -940,6 +940,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Get a pointer to the jth particle. */
/* Get a pointer to the jth particle. */
cpj
=
&
cparts_j
[
sortdt_j
[
pjd
].
i
];
cpj
=
&
cparts_j
[
sortdt_j
[
pjd
].
i
];
hj
=
cpj
->
h
;
/* Compute the pairwise distance. */
/* Compute the pairwise distance. */
r2
=
0
.
0
f
;
r2
=
0
.
0
f
;
...
@@ -953,7 +954,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -953,7 +954,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
#ifndef VECTORIZE
#ifndef VECTORIZE
IACT_NONSYM
(
r2
,
dx
,
cpj
->
h
,
hi
,
&
parts_j
[
sortdt_j
[
pjd
].
i
]
,
pi
);
IACT_NONSYM
(
r2
,
dx
,
h
j
,
hi
,
&
parts_j
[
sortdt_j
[
pjd
].
i
]
,
pi
);
#else
#else
...
@@ -962,7 +963,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -962,7 +963,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
dxq1
[
3
*
icount1
+
0
]
=
dx
[
0
];
dxq1
[
3
*
icount1
+
0
]
=
dx
[
0
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
hiq1
[
icount1
]
=
cpj
->
h
;
hiq1
[
icount1
]
=
h
j
;
hjq1
[
icount1
]
=
hi
;
hjq1
[
icount1
]
=
hi
;
piq1
[
icount1
]
=
&
parts_j
[
sortdt_j
[
pjd
].
i
];
piq1
[
icount1
]
=
&
parts_j
[
sortdt_j
[
pjd
].
i
];
pjq1
[
icount1
]
=
pi
;
pjq1
[
icount1
]
=
pi
;
...
@@ -990,6 +991,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -990,6 +991,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Get a pointer to the jth particle. */
/* Get a pointer to the jth particle. */
cpj
=
&
cparts_j
[
sort_j
[
pjd
].
i
];
cpj
=
&
cparts_j
[
sort_j
[
pjd
].
i
];
hj
=
cpj
->
h
;
/* Compute the pairwise distance. */
/* Compute the pairwise distance. */
r2
=
0
.
0
f
;
r2
=
0
.
0
f
;
...
@@ -1005,9 +1007,9 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1005,9 +1007,9 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Does pj need to be updated too? */
/* Does pj need to be updated too? */
if
(
cpj
->
dt
<=
dt_step
)
if
(
cpj
->
dt
<=
dt_step
)
IACT
(
r2
,
dx
,
hi
,
cpj
->
h
,
pi
,
&
parts_j
[
sort_j
[
pjd
].
i
]
);
IACT
(
r2
,
dx
,
hi
,
h
j
,
pi
,
&
parts_j
[
sort_j
[
pjd
].
i
]
);
else
else
IACT_NONSYM
(
r2
,
dx
,
hi
,
cpj
->
h
,
pi
,
&
parts_j
[
sort_j
[
pjd
].
i
]
);
IACT_NONSYM
(
r2
,
dx
,
hi
,
h
j
,
pi
,
&
parts_j
[
sort_j
[
pjd
].
i
]
);
#else
#else
...
@@ -1020,7 +1022,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1020,7 +1022,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
dxq2
[
3
*
icount2
+
1
]
=
dx
[
1
];
dxq2
[
3
*
icount2
+
1
]
=
dx
[
1
];
dxq2
[
3
*
icount2
+
2
]
=
dx
[
2
];
dxq2
[
3
*
icount2
+
2
]
=
dx
[
2
];
hiq2
[
icount2
]
=
hi
;
hiq2
[
icount2
]
=
hi
;
hjq2
[
icount2
]
=
cpj
->
h
;
hjq2
[
icount2
]
=
h
j
;
piq2
[
icount2
]
=
pi
;
piq2
[
icount2
]
=
pi
;
pjq2
[
icount2
]
=
&
parts_j
[
sort_j
[
pjd
].
i
];
pjq2
[
icount2
]
=
&
parts_j
[
sort_j
[
pjd
].
i
];
icount2
+=
1
;
icount2
+=
1
;
...
@@ -1041,7 +1043,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1041,7 +1043,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
hiq1
[
icount1
]
=
hi
;
hiq1
[
icount1
]
=
hi
;
hjq1
[
icount1
]
=
cpj
->
h
;
hjq1
[
icount1
]
=
h
j
;
piq1
[
icount1
]
=
pi
;
piq1
[
icount1
]
=
pi
;
pjq1
[
icount1
]
=
&
parts_j
[
sort_j
[
pjd
].
i
];
pjq1
[
icount1
]
=
&
parts_j
[
sort_j
[
pjd
].
i
];
icount1
+=
1
;
icount1
+=
1
;
...
@@ -1090,6 +1092,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1090,6 +1092,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Get a pointer to the jth particle. */
/* Get a pointer to the jth particle. */
cpi
=
&
cparts_i
[
sortdt_i
[
pid
].
i
];
cpi
=
&
cparts_i
[
sortdt_i
[
pid
].
i
];
hi
=
cpi
->
h
;
/* Compute the pairwise distance. */
/* Compute the pairwise distance. */
r2
=
0
.
0
f
;
r2
=
0
.
0
f
;
...
@@ -1099,11 +1102,11 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1099,11 +1102,11 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
}
}
/* Hit or miss? */
/* Hit or miss? */
if
(
r2
<
hjg2
&&
r2
>
cpi
->
h
*
cpi
->
h
*
kernel_gamma2
)
{
if
(
r2
<
hjg2
&&
r2
>
hi
*
hi
*
kernel_gamma2
)
{
#ifndef VECTORIZE
#ifndef VECTORIZE
IACT_NONSYM
(
r2
,
dx
,
cpi
->
h
,
hj
,
&
parts_i
[
sortdt_i
[
pid
].
i
]
,
pj
);
IACT_NONSYM
(
r2
,
dx
,
h
i
,
hj
,
&
parts_i
[
sortdt_i
[
pid
].
i
]
,
pj
);
#else
#else
...
@@ -1112,7 +1115,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1112,7 +1115,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
dxq1
[
3
*
icount1
+
0
]
=
dx
[
0
];
dxq1
[
3
*
icount1
+
0
]
=
dx
[
0
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
hiq1
[
icount1
]
=
cpi
->
h
;
hiq1
[
icount1
]
=
h
i
;
hjq1
[
icount1
]
=
hj
;
hjq1
[
icount1
]
=
hj
;
piq1
[
icount1
]
=
&
parts_i
[
sortdt_i
[
pid
].
i
];
piq1
[
icount1
]
=
&
parts_i
[
sortdt_i
[
pid
].
i
];
pjq1
[
icount1
]
=
pj
;
pjq1
[
icount1
]
=
pj
;
...
@@ -1139,6 +1142,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1139,6 +1142,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Get a pointer to the jth particle. */
/* Get a pointer to the jth particle. */
cpi
=
&
cparts_i
[
sort_i
[
pid
].
i
];
cpi
=
&
cparts_i
[
sort_i
[
pid
].
i
];
hi
=
cpi
->
h
;
/* Compute the pairwise distance. */
/* Compute the pairwise distance. */
r2
=
0
.
0
f
;
r2
=
0
.
0
f
;
...
@@ -1148,15 +1152,15 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1148,15 +1152,15 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
}
}
/* Hit or miss? */
/* Hit or miss? */
if
(
r2
<
hjg2
&&
r2
>
cpi
->
h
*
cpi
->
h
*
kernel_gamma2
)
{
if
(
r2
<
hjg2
&&
r2
>
hi
*
hi
*
kernel_gamma2
)
{
#ifndef VECTORIZE
#ifndef VECTORIZE
/* Does pi need to be updated too? */
/* Does pi need to be updated too? */
if
(
cpi
->
dt
<=
dt_step
)
if
(
cpi
->
dt
<=
dt_step
)
IACT
(
r2
,
dx
,
hj
,
cpi
->
h
,
pj
,
&
parts_i
[
sort_i
[
pid
].
i
]
);
IACT
(
r2
,
dx
,
hj
,
h
i
,
pj
,
&
parts_i
[
sort_i
[
pid
].
i
]
);
else
else
IACT_NONSYM
(
r2
,
dx
,
hj
,
cpi
->
h
,
pj
,
&
parts_i
[
sort_i
[
pid
].
i
]
);
IACT_NONSYM
(
r2
,
dx
,
hj
,
h
i
,
pj
,
&
parts_i
[
sort_i
[
pid
].
i
]
);
#else
#else
...
@@ -1169,7 +1173,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1169,7 +1173,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
dxq2
[
3
*
icount2
+
1
]
=
dx
[
1
];
dxq2
[
3
*
icount2
+
1
]
=
dx
[
1
];
dxq2
[
3
*
icount2
+
2
]
=
dx
[
2
];
dxq2
[
3
*
icount2
+
2
]
=
dx
[
2
];
hiq2
[
icount2
]
=
hj
;
hiq2
[
icount2
]
=
hj
;
hjq2
[
icount2
]
=
cpi
->
h
;
hjq2
[
icount2
]
=
h
i
;
piq2
[
icount2
]
=
pj
;
piq2
[
icount2
]
=
pj
;
pjq2
[
icount2
]
=
&
parts_i
[
sort_i
[
pid
].
i
];
pjq2
[
icount2
]
=
&
parts_i
[
sort_i
[
pid
].
i
];
icount2
+=
1
;
icount2
+=
1
;
...
@@ -1190,7 +1194,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
...
@@ -1190,7 +1194,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
hiq1
[
icount1
]
=
hj
;
hiq1
[
icount1
]
=
hj
;
hjq1
[
icount1
]
=
cpi
->
h
;
hjq1
[
icount1
]
=
h
i
;
piq1
[
icount1
]
=
pj
;
piq1
[
icount1
]
=
pj
;
pjq1
[
icount1
]
=
&
parts_i
[
sort_i
[
pid
].
i
];
pjq1
[
icount1
]
=
&
parts_i
[
sort_i
[
pid
].
i
];
icount1
+=
1
;
icount1
+=
1
;
...
@@ -1243,97 +1247,226 @@ void DOSELF1 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1243,97 +1247,226 @@ void DOSELF1 ( struct runner *r , struct cell *restrict c ) {
int
k
,
pid
,
pjd
,
count
=
c
->
count
;
int
k
,
pid
,
pjd
,
count
=
c
->
count
;
double
pix
[
3
];
double
pix
[
3
];
float
dx
[
3
],
hi
,
hig2
,
r2
;
float
dx
[
3
],
hi
,
hj
,
hig2
,
r2
;
struct
part
*
restrict
parts
=
c
->
parts
,
*
restrict
pi
;
struct
part
*
restrict
parts
=
c
->
parts
,
*
restrict
pi
;
struct
cpart
*
restrict
cpi
,
*
restrict
cpj
,
*
restrict
cparts
=
c
->
cparts
;
struct
cpart
*
restrict
cpi
,
*
restrict
cpj
,
*
restrict
cparts
=
c
->
cparts
;
float
dt_step
=
r
->
e
->
dt_step
;
float
dt_step
=
r
->
e
->
dt_step
;
int
firstdt
=
0
,
countdt
=
0
,
*
indt
=
NULL
,
doj
;
#ifdef VECTORIZE
#ifdef VECTORIZE
int
icount
=
0
;
int
icount1
=
0
;
float
r2q
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
r2q1
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hiq
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hiq1
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hjq
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hjq1
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
dxq
[
3
*
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
dxq1
[
3
*
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
struct
part
*
piq
[
VEC_SIZE
],
*
pjq
[
VEC_SIZE
];
struct
part
*
piq1
[
VEC_SIZE
],
*
pjq1
[
VEC_SIZE
];
int
icount2
=
0
;
float
r2q2
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hiq2
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
hjq2
[
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
float
dxq2
[
3
*
VEC_SIZE
]
__attribute__
((
aligned
(
16
)));
struct
part
*
piq2
[
VEC_SIZE
],
*
pjq2
[
VEC_SIZE
];
#endif
#endif
TIMER_TIC
TIMER_TIC
/*
Any work here?
*/
/*
Set up indt if needed.
*/
if
(
c
->
dt_min
>
dt_step
)
if
(
c
->
dt_min
>
dt_step
)
return
;
return
;
else
if
(
c
->
dt_max
>
dt_step
)
{
if
(
(
indt
=
(
int
*
)
alloca
(
sizeof
(
int
)
*
count
)
)
==
NULL
)
error
(
"Failed to allocate indt."
);
for
(
k
=
0
;
k
<
count
;
k
++
)
if
(
cparts
[
k
].
dt
<=
dt_step
)
{
indt
[
countdt
]
=
k
;
countdt
+=
1
;
}
}
/* Loop over the particles in the cell. */
/* Loop over the particles in the cell. */
for
(
pid
=
0
;
pid
<
count
;
pid
++
)
{
for
(
pid
=
0
;
pid
<
count
;
pid
++
)
{
/* Get a pointer to the ith particle. */
/* Get a pointer to the ith particle. */
cpi
=
&
cparts
[
pid
];
pi
=
&
parts
[
pid
];
pi
=
&
parts
[
pid
];
if
(
cpi
->
dt
>
dt_step
)
cpi
=
&
cparts
[
pid
];
continue
;
/* Get the particle position and radius. */
/* Get the particle position and radius. */
for
(
k
=
0
;
k
<
3
;
k
++
)
for
(
k
=
0
;
k
<
3
;
k
++
)
pix
[
k
]
=
cpi
->
x
[
k
];
pix
[
k
]
=
cpi
->
x
[
k
];
hi
=
cpi
->
h
;
hi
=
cpi
->
h
;
hig2
=
hi
*
hi
*
kernel_gamma2
;
hig2
=
hi
*
hi
*
kernel_gamma2
;
/* Loop over the other particles .*/
for
(
pjd
=
0
;
pjd
<
count
;
pjd
++
)
{
/* Skip self-interactions. */
/* Is the ith particle inactive? */
if
(
pid
==
pjd
)
if
(
cpi
->
dt
>
dt_step
)
{
continue
;
/* Get a pointer to the jth particle. */
cpj
=
&
cparts
[
pjd
];
/* Compute the pairwise distance. */
/* Loop over the other particles .*/
r2
=
0
.
0
f
;
for
(
pjd
=
firstdt
;
pjd
<
countdt
;
pjd
++
)
{
for
(
k
=
0
;
k
<
3
;
k
++
)
{
dx
[
k
]
=
pix
[
k
]
-
cpj
->
x
[
k
];
/* Get a pointer to the jth particle. */
r2
+=
dx
[
k
]
*
dx
[
k
];
cpj
=
&
cparts
[
indt
[
pjd
]
];
}
hj
=
cpj
->
h
;
/* Compute the pairwise distance. */
r2
=
0
.
0
f
;
for
(
k
=
0
;
k
<
3
;
k
++
)
{
dx
[
k
]
=
cpj
->
x
[
k
]
-
pix
[
k
];
r2
+=
dx
[
k
]
*
dx
[
k
];
}
/* Hit or miss? */
if
(
r2
<
hj
*
hj
*
kernel_gamma2
)
{
#ifndef VECTORIZE
IACT_NONSYM
(
r2
,
dx
,
hj
,
hi
,
&
parts
[
indt
[
pjd
]]
,
pi
);
#else
/* Add this interaction to the queue. */
r2q1
[
icount1
]
=
r2
;
dxq1
[
3
*
icount1
+
0
]
=
dx
[
0
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
hiq1
[
icount1
]
=
cpj
->
h
;
hjq1
[
icount1
]
=
hi
;
piq1
[
icount1
]
=
&
parts
[
indt
[
pjd
]];
pjq1
[
icount1
]
=
pi
;
icount1
+=
1
;
/* Flush? */
if
(
icount1
==
VEC_SIZE
)
{
IACT_NONSYM_VEC
(
r2q1
,
dxq1
,
hiq1
,
hjq1
,
piq1
,
pjq1
);
icount1
=
0
;
}
#endif
}
}
/* loop over all other particles. */
/* Hit or miss? */
}
if
(
r2
<
hig2
)
{
#ifndef VECTORIZE
/* Otherwise, interact with all candidates. */
else
{
/* We caught a live one! */
firstdt
+=
1
;
/* Loop over the other particles .*/
for
(
pjd
=
pid
+
1
;
pjd
<
count
;
pjd
++
)
{
/* Get a pointer to the jth particle. */
cpj
=
&
cparts
[
pjd
];
hj
=
cpj
->
h
;
/* Compute the pairwise distance. */
r2
=
0
.
0
f
;
for
(
k
=
0
;
k
<
3
;
k
++
)
{
dx
[
k
]
=
pix
[
k
]
-
cpj
->
x
[
k
];
r2
+=
dx
[
k
]
*
dx
[
k
];
}
doj
=
(
cpj
->
dt
<=
dt_step
)
&&
(
r2
<
hj
*
hj
*
kernel_gamma2
);
/* Hit or miss? */
if
(
r2
<
hig2
||
doj
)
{
#ifndef VECTORIZE
/* Which parts need to be updated? */
if
(
r2
<
hig2
&&
doj
)
IACT
(
r2
,
dx
,
hi
,
hj
,
pi
,
&
parts
[
pjd
]
);
else
if
(
!
doj
)
IACT_NONSYM
(
r2
,
dx
,
hi
,
hj
,
pi
,
&
parts
[
pjd
]
);
else
{
dx
[
0
]
=
-
dx
[
0
];
dx
[
1
]
=
-
dx
[
1
];
dx
[
2
]
=
-
dx
[
2
];
IACT_NONSYM
(
r2
,
dx
,
hi
,
hj
,
pi
,
&
parts
[
pjd
]
);
}
#else
/* Does pj need to be updated too? */
if
(
r2
<
hig2
&&
doj
)
{
IACT_NONSYM
(
r2
,
dx
,
hi
,
cpj
->
h
,
pi
,
&
parts
[
pjd
]
);
/* Add this interaction to the symmetric queue. */
r2q2
[
icount2
]
=
r2
;
#else
dxq2
[
3
*
icount2
+
0
]
=
dx
[
0
];
dxq2
[
3
*
icount2
+
1
]
=
dx
[
1
];
dxq2
[
3
*
icount2
+
2
]
=
dx
[
2
];
hiq2
[
icount2
]
=
hi
;
hjq2
[
icount2
]
=
hj
;
piq2
[
icount2
]
=
pi
;
pjq2
[
icount2
]
=
&
parts
[
pjd
];
icount2
+=
1
;
/* Add this interaction to the queue. */
/* Flush? */
r2q
[
icount
]
=
r2
;
if
(
icount2
==
VEC_SIZE
)
{
dxq
[
3
*
icount
+
0
]
=
dx
[
0
];
IACT_VEC
(
r2q2
,
dxq2
,
hiq2
,
hjq2
,
piq2
,
pjq2
);
dxq
[
3
*
icount
+
1
]
=
dx
[
1
];
icount2
=
0
;
dxq
[
3
*
icount
+
2
]
=
dx
[
2
];
}
hiq
[
icount
]
=
hi
;
hjq
[
icount
]
=
cpj
->
h
;
}
piq
[
icount
]
=
pi
;
pjq
[
icount
]
=
&
parts
[
pjd
];
else
if
(
!
doj
)
{
icount
+=
1
;
/* Add this interaction to the non-symmetric queue. */
r2q1
[
icount1
]
=
r2
;
dxq1
[
3
*
icount1
+
0
]
=
dx
[
0
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
hiq1
[
icount1
]
=
hi
;
hjq1
[
icount1
]
=
hj
;
piq1
[
icount1
]
=
pi
;
pjq1
[
icount1
]
=
&
parts
[
pjd
];
icount1
+=
1
;
/* Flush? */
/* Flush? */
if
(
icount
==
VEC_SIZE
)
{
if
(
icount1
==
VEC_SIZE
)
{
IACT_NONSYM_VEC
(
r2q
,
dxq
,
hiq
,
hjq
,
piq
,
pjq
);
IACT_NONSYM_VEC
(
r2q1
,
dxq1
,
hiq1
,
hjq1
,
piq1
,
pjq1
);
icount
=
0
;
icount1
=
0
;
}
}
}
#endif
else
{
}
/* Add this interaction to the non-symmetric queue. */
r2q1
[
icount1
]
=
r2
;
}
/* loop over all other particles. */
dxq1
[
3
*
icount1
+
0
]
=
-
dx
[
0
];
dxq1
[
3
*
icount1
+
1
]
=
-
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
-
dx
[
2
];
hiq1
[
icount1
]
=
hj
;
hjq1
[
icount1
]
=
hi
;
piq1
[
icount1
]
=
&
parts
[
pjd
];
pjq1
[
icount1
]
=
pi
;
icount1
+=
1
;
/* Flush? */
if
(
icount1
==
VEC_SIZE
)
{
IACT_NONSYM_VEC
(
r2q1
,
dxq1
,
hiq1
,
hjq1
,
piq1
,
pjq1
);
icount1
=
0
;
}
}
#endif
}
}
/* loop over all other particles. */
}
}
/* loop over all particles. */
}
/* loop over all particles. */
#ifdef VECTORIZE
#ifdef VECTORIZE
/* Pick up any leftovers. */
/* Pick up any leftovers. */
if
(
icount
>
0
)
if
(
icount1
>
0
)
for
(
k
=
0
;
k
<
icount
;
k
++
)
for
(
k
=
0
;
k
<
icount1
;
k
++
)
IACT_NONSYM
(
r2q
[
k
]
,
&
dxq
[
3
*
k
]
,
hiq
[
k
]
,
hjq
[
k
]
,
piq
[
k
]
,
pjq
[
k
]
);
IACT_NONSYM
(
r2q1
[
k
]
,
&
dxq1
[
3
*
k
]
,
hiq1
[
k
]
,
hjq1
[
k
]
,
piq1
[
k
]
,
pjq1
[
k
]
);
if
(
icount2
>
0
)
for
(
k
=
0
;
k
<
icount2
;
k
++
)
IACT
(
r2q2
[
k
]
,
&
dxq2
[
3
*
k
]
,
hiq2
[
k
]
,
hjq2
[
k
]
,
piq2
[
k
]
,
pjq2
[
k
]
);
#endif
#endif
#ifdef TIMER_VERBOSE
#ifdef TIMER_VERBOSE
printf
(
"runner_doself1[%02i]: %i parts at depth %i took %.3f ms.
\n
"
,
r
->
id
,
count
,
c
->
depth
,
((
double
)
TIMER_TOC
(
TIMER_DOSELF
))
/
CPU_TPS
*
1000
);
printf
(
"runner_doself1[%02i]: %i parts at depth %i took %.3f ms.
\n
"
,
r
->
id
,
count
,
c
->
depth
,
((
double
)
TIMER_TOC
(
TIMER_DOSELF
))
/
CPU_TPS
*
1000
);
#else
#else
...
@@ -1347,7 +1480,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1347,7 +1480,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
int
k
,
pid
,
pjd
,
count
=
c
->
count
;
int
k
,
pid
,
pjd
,
count
=
c
->
count
;
double
pix
[
3
];
double
pix
[
3
];
float
dx
[
3
],
hi
,
hig2
,
r2
;
float
dx
[
3
],
hi
,
hj
,
hig2
,
r2
;
struct
part
*
restrict
parts
=
c
->
parts
,
*
restrict
pi
;
struct
part
*
restrict
parts
=
c
->
parts
,
*
restrict
pi
;
struct
cpart
*
restrict
cpi
,
*
restrict
cpj
,
*
restrict
cparts
=
c
->
cparts
;
struct
cpart
*
restrict
cpi
,
*
restrict
cpj
,
*
restrict
cparts
=
c
->
cparts
;
float
dt_step
=
r
->
e
->
dt_step
;
float
dt_step
=
r
->
e
->
dt_step
;
...
@@ -1402,6 +1535,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1402,6 +1535,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
/* Get a pointer to the jth particle. */
/* Get a pointer to the jth particle. */
cpj
=
&
cparts
[
indt
[
pjd
]
];
cpj
=
&
cparts
[
indt
[
pjd
]
];
hj
=
cpj
->
h
;
/* Compute the pairwise distance. */
/* Compute the pairwise distance. */
r2
=
0
.
0
f
;
r2
=
0
.
0
f
;
...
@@ -1411,11 +1545,11 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1411,11 +1545,11 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
}
}
/* Hit or miss? */
/* Hit or miss? */
if
(
r2
<
hig2
||
r2
<
cpj
->
h
*
cpj
->
h
*
kernel_gamma2
)
{
if
(
r2
<
hig2
||
r2
<
hj
*
hj
*
kernel_gamma2
)
{
#ifndef VECTORIZE
#ifndef VECTORIZE
IACT_NONSYM
(
r2
,
dx
,
cpj
->
h
,
hi
,
&
parts
[
indt
[
pjd
]]
,
pi
);
IACT_NONSYM
(
r2
,
dx
,
h
j
,
hi
,
&
parts
[
indt
[
pjd
]]
,
pi
);
#else
#else
...
@@ -1424,7 +1558,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1424,7 +1558,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
dxq1
[
3
*
icount1
+
0
]
=
dx
[
0
];
dxq1
[
3
*
icount1
+
0
]
=
dx
[
0
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
hiq1
[
icount1
]
=
cpj
->
h
;
hiq1
[
icount1
]
=
h
j
;
hjq1
[
icount1
]
=
hi
;
hjq1
[
icount1
]
=
hi
;
piq1
[
icount1
]
=
&
parts
[
indt
[
pjd
]];
piq1
[
icount1
]
=
&
parts
[
indt
[
pjd
]];
pjq1
[
icount1
]
=
pi
;
pjq1
[
icount1
]
=
pi
;
...
@@ -1455,6 +1589,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1455,6 +1589,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
/* Get a pointer to the jth particle. */
/* Get a pointer to the jth particle. */
cpj
=
&
cparts
[
pjd
];
cpj
=
&
cparts
[
pjd
];
hj
=
cpj
->
h
;
/* Compute the pairwise distance. */
/* Compute the pairwise distance. */
r2
=
0
.
0
f
;
r2
=
0
.
0
f
;
...
@@ -1464,15 +1599,15 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1464,15 +1599,15 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
}
}
/* Hit or miss? */
/* Hit or miss? */
if
(
r2
<
hig2
||
r2
<
cpj
->
h
*
cpj
->
h
*
kernel_gamma2
)
{
if
(
r2
<
hig2
||
r2
<
hj
*
hj
*
kernel_gamma2
)
{
#ifndef VECTORIZE
#ifndef VECTORIZE
/* Does pj need to be updated too? */
/* Does pj need to be updated too? */
if
(
cpj
->
dt
<=
dt_step
)
if
(
cpj
->
dt
<=
dt_step
)
IACT
(
r2
,
dx
,
hi
,
cpj
->
h
,
pi
,
&
parts
[
pjd
]
);
IACT
(
r2
,
dx
,
hi
,
h
j
,
pi
,
&
parts
[
pjd
]
);
else
else
IACT_NONSYM
(
r2
,
dx
,
hi
,
cpj
->
h
,
pi
,
&
parts
[
pjd
]
);
IACT_NONSYM
(
r2
,
dx
,
hi
,
h
j
,
pi
,
&
parts
[
pjd
]
);
#else
#else
...
@@ -1485,7 +1620,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1485,7 +1620,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
dxq2
[
3
*
icount2
+
1
]
=
dx
[
1
];
dxq2
[
3
*
icount2
+
1
]
=
dx
[
1
];
dxq2
[
3
*
icount2
+
2
]
=
dx
[
2
];
dxq2
[
3
*
icount2
+
2
]
=
dx
[
2
];
hiq2
[
icount2
]
=
hi
;
hiq2
[
icount2
]
=
hi
;
hjq2
[
icount2
]
=
cpj
->
h
;
hjq2
[
icount2
]
=
h
j
;
piq2
[
icount2
]
=
pi
;
piq2
[
icount2
]
=
pi
;
pjq2
[
icount2
]
=
&
parts
[
pjd
];
pjq2
[
icount2
]
=
&
parts
[
pjd
];
icount2
+=
1
;
icount2
+=
1
;
...
@@ -1506,7 +1641,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
...
@@ -1506,7 +1641,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
1
]
=
dx
[
1
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
dxq1
[
3
*
icount1
+
2
]
=
dx
[
2
];
hiq1
[
icount1
]
=
hi
;
hiq1
[
icount1
]
=
hi
;
hjq1
[
icount1
]
=
cpj
->
h
;
hjq1
[
icount1
]
=
h
j
;
piq1
[
icount1
]
=
pi
;
piq1
[
icount1
]
=
pi
;
pjq1
[
icount1
]
=
&
parts
[
pjd
];
pjq1
[
icount1
]
=
&
parts
[
pjd
];
icount1
+=
1
;
icount1
+=
1
;
...
...
This diff is collapsed.
Click to expand it.
src/space.h
+
1
−
1
View file @
2c149eb6
...
@@ -26,7 +26,7 @@
...
@@ -26,7 +26,7 @@
#define space_splitratio 0.875f
#define space_splitratio 0.875f
#define space_splitsize_default 400
#define space_splitsize_default 400
#define space_subsize_default 5000
#define space_subsize_default 5000
#define space_dosub
1
#define space_dosub
0
#define space_stretch 1.05f
#define space_stretch 1.05f
#define space_maxtaskspercell 31
#define space_maxtaskspercell 31
#define space_maxreldx 0.2f
#define space_maxreldx 0.2f
...
...
This diff is collapsed.
Click to expand it.
src/vector.h
+
2
−
2
View file @
2c149eb6
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */
/* So what will the vector size be? */
#ifdef
NO
__AVX__
#ifdef __AVX__
#define VECTORIZE
#define VECTORIZE
#define VEC_SIZE 8
#define VEC_SIZE 8
#define VEC_FLOAT __m256
#define VEC_FLOAT __m256
...
@@ -54,7 +54,7 @@
...
@@ -54,7 +54,7 @@
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
#define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
#define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
#elif defined(
NO
__SSE2__ )
#elif defined( __SSE2__ )
#define VECTORIZE
#define VECTORIZE
#define VEC_SIZE 4
#define VEC_SIZE 4
#define VEC_FLOAT __m128
#define VEC_FLOAT __m128
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment