Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
9470da94
Commit
9470da94
authored
Sep 28, 2018
by
Loic Hausammann
Browse files
Update main code with modification in cell.h
parent
c7ee1e42
Changes
29
Expand all
Hide whitespace changes
Inline
Side-by-side
examples/main.c
View file @
9470da94
...
@@ -1084,21 +1084,24 @@ int main(int argc, char *argv[]) {
...
@@ -1084,21 +1084,24 @@ int main(int argc, char *argv[]) {
int
count
=
0
;
int
count
=
0
;
for
(
int
l
=
0
;
l
<
e
.
sched
.
nr_tasks
;
l
++
)
{
for
(
int
l
=
0
;
l
<
e
.
sched
.
nr_tasks
;
l
++
)
{
if
(
!
e
.
sched
.
tasks
[
l
].
implicit
&&
e
.
sched
.
tasks
[
l
].
toc
!=
0
)
{
if
(
!
e
.
sched
.
tasks
[
l
].
implicit
&&
e
.
sched
.
tasks
[
l
].
toc
!=
0
)
{
fprintf
(
fprintf
(
file_thread
,
file_thread
,
" %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i
\n
"
,
" %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i
\n
"
,
myrank
,
myrank
,
e
.
sched
.
tasks
[
l
].
rid
,
e
.
sched
.
tasks
[
l
].
type
,
e
.
sched
.
tasks
[
l
].
rid
,
e
.
sched
.
tasks
[
l
].
type
,
e
.
sched
.
tasks
[
l
].
subtype
,
(
e
.
sched
.
tasks
[
l
].
cj
==
NULL
),
e
.
sched
.
tasks
[
l
].
subtype
,
(
e
.
sched
.
tasks
[
l
].
cj
==
NULL
),
e
.
sched
.
tasks
[
l
].
tic
,
e
.
sched
.
tasks
[
l
].
toc
,
e
.
sched
.
tasks
[
l
].
tic
,
e
.
sched
.
tasks
[
l
].
toc
,
(
e
.
sched
.
tasks
[
l
].
ci
!=
NULL
)
(
e
.
sched
.
tasks
[
l
].
ci
!=
NULL
)
?
e
.
sched
.
tasks
[
l
].
ci
->
count
?
e
.
sched
.
tasks
[
l
].
ci
->
hydro
.
count
:
0
,
:
0
,
(
e
.
sched
.
tasks
[
l
].
cj
!=
NULL
)
?
e
.
sched
.
tasks
[
l
].
cj
->
count
(
e
.
sched
.
tasks
[
l
].
cj
!=
NULL
)
:
0
,
?
e
.
sched
.
tasks
[
l
].
cj
->
hydro
.
count
(
e
.
sched
.
tasks
[
l
].
ci
!=
NULL
)
?
e
.
sched
.
tasks
[
l
].
ci
->
gcount
:
0
,
:
0
,
(
e
.
sched
.
tasks
[
l
].
ci
!=
NULL
)
(
e
.
sched
.
tasks
[
l
].
cj
!=
NULL
)
?
e
.
sched
.
tasks
[
l
].
cj
->
gcount
?
e
.
sched
.
tasks
[
l
].
ci
->
grav
.
gcount
:
0
,
:
0
,
e
.
sched
.
tasks
[
l
].
flags
,
e
.
sched
.
tasks
[
l
].
sid
);
(
e
.
sched
.
tasks
[
l
].
cj
!=
NULL
)
?
e
.
sched
.
tasks
[
l
].
cj
->
grav
.
gcount
:
0
,
e
.
sched
.
tasks
[
l
].
flags
,
e
.
sched
.
tasks
[
l
].
sid
);
}
}
fflush
(
stdout
);
fflush
(
stdout
);
count
++
;
count
++
;
...
@@ -1126,10 +1129,14 @@ int main(int argc, char *argv[]) {
...
@@ -1126,10 +1129,14 @@ int main(int argc, char *argv[]) {
e
.
sched
.
tasks
[
l
].
rid
,
e
.
sched
.
tasks
[
l
].
type
,
e
.
sched
.
tasks
[
l
].
rid
,
e
.
sched
.
tasks
[
l
].
type
,
e
.
sched
.
tasks
[
l
].
subtype
,
(
e
.
sched
.
tasks
[
l
].
cj
==
NULL
),
e
.
sched
.
tasks
[
l
].
subtype
,
(
e
.
sched
.
tasks
[
l
].
cj
==
NULL
),
e
.
sched
.
tasks
[
l
].
tic
,
e
.
sched
.
tasks
[
l
].
toc
,
e
.
sched
.
tasks
[
l
].
tic
,
e
.
sched
.
tasks
[
l
].
toc
,
(
e
.
sched
.
tasks
[
l
].
ci
==
NULL
)
?
0
:
e
.
sched
.
tasks
[
l
].
ci
->
count
,
(
e
.
sched
.
tasks
[
l
].
ci
==
NULL
)
?
0
(
e
.
sched
.
tasks
[
l
].
cj
==
NULL
)
?
0
:
e
.
sched
.
tasks
[
l
].
cj
->
count
,
:
e
.
sched
.
tasks
[
l
].
ci
->
hydro
.
count
,
(
e
.
sched
.
tasks
[
l
].
ci
==
NULL
)
?
0
:
e
.
sched
.
tasks
[
l
].
ci
->
gcount
,
(
e
.
sched
.
tasks
[
l
].
cj
==
NULL
)
?
0
(
e
.
sched
.
tasks
[
l
].
cj
==
NULL
)
?
0
:
e
.
sched
.
tasks
[
l
].
cj
->
gcount
,
:
e
.
sched
.
tasks
[
l
].
cj
->
hydro
.
count
,
(
e
.
sched
.
tasks
[
l
].
ci
==
NULL
)
?
0
:
e
.
sched
.
tasks
[
l
].
ci
->
grav
.
gcount
,
(
e
.
sched
.
tasks
[
l
].
cj
==
NULL
)
?
0
:
e
.
sched
.
tasks
[
l
].
cj
->
grav
.
gcount
,
e
.
sched
.
tasks
[
l
].
sid
);
e
.
sched
.
tasks
[
l
].
sid
);
}
}
}
}
...
...
src/active.h
View file @
9470da94
...
@@ -39,15 +39,15 @@ __attribute__((always_inline)) INLINE static int cell_are_part_drifted(
...
@@ -39,15 +39,15 @@ __attribute__((always_inline)) INLINE static int cell_are_part_drifted(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
if
(
c
->
ti_old
_part
>
e
->
ti_current
)
if
(
c
->
hydro
.
ti_old
>
e
->
ti_current
)
error
(
error
(
"Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) "
"Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) "
"and e->ti_current=%lld (t=%e, a=%e)"
,
"and e->ti_current=%lld (t=%e, a=%e)"
,
c
->
ti_old_part
,
c
->
ti_old
_part
*
e
->
time_base
,
e
->
ti_current
,
c
->
hydro
.
ti_old
,
c
->
hydro
.
ti_old
*
e
->
time_base
,
e
->
ti_current
,
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
#endif
#endif
return
(
c
->
ti_old
_part
==
e
->
ti_current
);
return
(
c
->
hydro
.
ti_old
==
e
->
ti_current
);
}
}
/**
/**
...
@@ -62,15 +62,15 @@ __attribute__((always_inline)) INLINE static int cell_are_gpart_drifted(
...
@@ -62,15 +62,15 @@ __attribute__((always_inline)) INLINE static int cell_are_gpart_drifted(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
if
(
c
->
ti_old_gpart
>
e
->
ti_current
)
if
(
c
->
grav
.
ti_old_gpart
>
e
->
ti_current
)
error
(
error
(
"Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) "
"Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) "
"and e->ti_current=%lld (t=%e)"
,
"and e->ti_current=%lld (t=%e)"
,
c
->
ti_old_gpart
,
c
->
ti_old_gpart
*
e
->
time_base
,
e
->
ti_current
,
c
->
grav
.
ti_old_gpart
,
c
->
grav
.
ti_old_gpart
*
e
->
time_base
,
e
->
ti_current
*
e
->
time_base
);
e
->
ti_current
,
e
->
ti_current
*
e
->
time_base
);
#endif
#endif
return
(
c
->
ti_old_gpart
==
e
->
ti_current
);
return
(
c
->
grav
.
ti_old_gpart
==
e
->
ti_current
);
}
}
/* Are cells / particles active for regular tasks ? */
/* Are cells / particles active for regular tasks ? */
...
@@ -86,15 +86,15 @@ __attribute__((always_inline)) INLINE static int cell_is_active_hydro(
...
@@ -86,15 +86,15 @@ __attribute__((always_inline)) INLINE static int cell_is_active_hydro(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
if
(
c
->
ti_
hydro_end_min
<
e
->
ti_current
)
if
(
c
->
hydro
.
ti
_end_min
<
e
->
ti_current
)
error
(
error
(
"cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and "
"cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and "
"e->ti_current=%lld (t=%e, a=%e)"
,
"e->ti_current=%lld (t=%e, a=%e)"
,
c
->
ti_
hydro_end_min
,
c
->
ti_
hydro_end_min
*
e
->
time_base
,
e
->
ti_current
,
c
->
hydro
.
ti
_end_min
,
c
->
hydro
.
ti
_end_min
*
e
->
time_base
,
e
->
ti_current
,
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
#endif
#endif
return
(
c
->
ti_
hydro_end_min
==
e
->
ti_current
);
return
(
c
->
hydro
.
ti
_end_min
==
e
->
ti_current
);
}
}
/**
/**
...
@@ -108,14 +108,14 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_hydro(
...
@@ -108,14 +108,14 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_hydro(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
if
(
c
->
ti_
hydro_end_max
<
e
->
ti_current
)
if
(
c
->
hydro
.
ti
_end_max
<
e
->
ti_current
)
error
(
error
(
"cell in an impossible time-zone! c->ti_end_max=%lld "
"cell in an impossible time-zone! c->ti_end_max=%lld "
"e->ti_current=%lld"
,
"e->ti_current=%lld"
,
c
->
ti_
hydro_end_max
,
e
->
ti_current
);
c
->
hydro
.
ti
_end_max
,
e
->
ti_current
);
#endif
#endif
return
(
c
->
ti_
hydro_end_max
==
e
->
ti_current
);
return
(
c
->
hydro
.
ti
_end_max
==
e
->
ti_current
);
}
}
/**
/**
...
@@ -129,15 +129,15 @@ __attribute__((always_inline)) INLINE static int cell_is_active_gravity(
...
@@ -129,15 +129,15 @@ __attribute__((always_inline)) INLINE static int cell_is_active_gravity(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
if
(
c
->
ti_
grav
ity
_end_min
<
e
->
ti_current
)
if
(
c
->
grav
.
ti
_end_min
<
e
->
ti_current
)
error
(
error
(
"cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and "
"cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and "
"e->ti_current=%lld (t=%e, a=%e)"
,
"e->ti_current=%lld (t=%e, a=%e)"
,
c
->
ti_
grav
ity
_end_min
,
c
->
ti_
grav
ity
_end_min
*
e
->
time_base
,
c
->
grav
.
ti
_end_min
,
c
->
grav
.
ti
_end_min
*
e
->
time_base
,
e
->
ti_current
,
e
->
ti_current
,
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
#endif
#endif
return
(
c
->
ti_
grav
ity
_end_min
==
e
->
ti_current
);
return
(
c
->
grav
.
ti
_end_min
==
e
->
ti_current
);
}
}
/**
/**
...
@@ -150,7 +150,7 @@ __attribute__((always_inline)) INLINE static int cell_is_active_gravity(
...
@@ -150,7 +150,7 @@ __attribute__((always_inline)) INLINE static int cell_is_active_gravity(
__attribute__
((
always_inline
))
INLINE
static
int
cell_is_active_gravity_mm
(
__attribute__
((
always_inline
))
INLINE
static
int
cell_is_active_gravity_mm
(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
return
(
c
->
ti_
grav
ity
_end_min
==
e
->
ti_current
);
return
(
c
->
grav
.
ti
_end_min
==
e
->
ti_current
);
}
}
/**
/**
...
@@ -164,14 +164,14 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_gravity(
...
@@ -164,14 +164,14 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active_gravity(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
if
(
c
->
ti_
grav
ity
_end_max
<
e
->
ti_current
)
if
(
c
->
grav
.
ti
_end_max
<
e
->
ti_current
)
error
(
error
(
"cell in an impossible time-zone! c->ti_end_max=%lld "
"cell in an impossible time-zone! c->ti_end_max=%lld "
"e->ti_current=%lld"
,
"e->ti_current=%lld"
,
c
->
ti_
grav
ity
_end_max
,
e
->
ti_current
);
c
->
grav
.
ti
_end_max
,
e
->
ti_current
);
#endif
#endif
return
(
c
->
ti_
grav
ity
_end_max
==
e
->
ti_current
);
return
(
c
->
grav
.
ti
_end_max
==
e
->
ti_current
);
}
}
/**
/**
...
@@ -290,15 +290,15 @@ __attribute__((always_inline)) INLINE static int cell_is_starting_hydro(
...
@@ -290,15 +290,15 @@ __attribute__((always_inline)) INLINE static int cell_is_starting_hydro(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
if
(
c
->
ti_
hydro_beg_max
>
e
->
ti_current
)
if
(
c
->
hydro
.
ti
_beg_max
>
e
->
ti_current
)
error
(
error
(
"cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and "
"cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and "
"e->ti_current=%lld (t=%e, a=%e)"
,
"e->ti_current=%lld (t=%e, a=%e)"
,
c
->
ti_
hydro_beg_max
,
c
->
ti_
hydro_beg_max
*
e
->
time_base
,
e
->
ti_current
,
c
->
hydro
.
ti
_beg_max
,
c
->
hydro
.
ti
_beg_max
*
e
->
time_base
,
e
->
ti_current
,
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
#endif
#endif
return
(
c
->
ti_
hydro_beg_max
==
e
->
ti_current
);
return
(
c
->
hydro
.
ti
_beg_max
==
e
->
ti_current
);
}
}
/**
/**
...
@@ -312,15 +312,15 @@ __attribute__((always_inline)) INLINE static int cell_is_starting_gravity(
...
@@ -312,15 +312,15 @@ __attribute__((always_inline)) INLINE static int cell_is_starting_gravity(
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
const
struct
cell
*
c
,
const
struct
engine
*
e
)
{
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
if
(
c
->
ti_
grav
ity
_beg_max
>
e
->
ti_current
)
if
(
c
->
grav
.
ti
_beg_max
>
e
->
ti_current
)
error
(
error
(
"cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and "
"cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and "
"e->ti_current=%lld (t=%e, a=%e)"
,
"e->ti_current=%lld (t=%e, a=%e)"
,
c
->
ti_
grav
ity
_beg_max
,
c
->
ti_
grav
ity
_beg_max
*
e
->
time_base
,
c
->
grav
.
ti
_beg_max
,
c
->
grav
.
ti
_beg_max
*
e
->
time_base
,
e
->
ti_current
,
e
->
ti_current
,
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
e
->
ti_current
*
e
->
time_base
,
e
->
cosmology
->
a
);
#endif
#endif
return
(
c
->
ti_
grav
ity
_beg_max
==
e
->
ti_current
);
return
(
c
->
grav
.
ti
_beg_max
==
e
->
ti_current
);
}
}
/**
/**
...
...
src/cache.h
View file @
9470da94
...
@@ -197,12 +197,12 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
...
@@ -197,12 +197,12 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
const
struct
part
*
restrict
parts
=
ci
->
hydro
.
parts
;
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
/* Shift the particles positions to a local frame so single precision can be
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
* used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ci
->
hydro
.
count
;
i
++
)
{
x
[
i
]
=
(
float
)(
parts
[
i
].
x
[
0
]
-
loc
[
0
]);
x
[
i
]
=
(
float
)(
parts
[
i
].
x
[
0
]
-
loc
[
0
]);
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]);
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]);
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]);
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]);
...
@@ -248,7 +248,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
...
@@ -248,7 +248,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
const
struct
part
*
restrict
parts
=
ci
->
hydro
.
parts
;
/* The cell is on the right so read the particles
/* The cell is on the right so read the particles
* into the cache from the start of the cell. */
* into the cache from the start of the cell. */
...
@@ -258,7 +258,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
...
@@ -258,7 +258,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
const
int
pad
=
VEC_SIZE
-
rem
;
const
int
pad
=
VEC_SIZE
-
rem
;
/* Increase last_pi if there are particles in the cell left to read. */
/* Increase last_pi if there are particles in the cell left to read. */
if
(
*
last_pi
+
pad
<
ci
->
count
)
*
last_pi
+=
pad
;
if
(
*
last_pi
+
pad
<
ci
->
hydro
.
count
)
*
last_pi
+=
pad
;
}
}
/* Shift the particles positions to a local frame so single precision can be
/* Shift the particles positions to a local frame so single precision can be
...
@@ -278,11 +278,11 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
...
@@ -278,11 +278,11 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
/* Pad cache with fake particles that exist outside the cell so will not
/* Pad cache with fake particles that exist outside the cell so will not
* interact. We use values of the same magnitude (but negative!) as the real
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
* particles to avoid overflow problems. */
const
double
max_dx
=
ci
->
dx_max
_part
;
const
double
max_dx
=
ci
->
hydro
.
dx_max
;
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
const
float
h_padded
=
ci
->
parts
[
0
].
h
;
const
float
h_padded
=
ci
->
hydro
.
parts
[
0
].
h
;
for
(
int
i
=
*
last_pi
;
i
<
*
last_pi
+
VEC_SIZE
;
i
++
)
{
for
(
int
i
=
*
last_pi
;
i
<
*
last_pi
+
VEC_SIZE
;
i
++
)
{
x
[
i
]
=
pos_padded
[
0
];
x
[
i
]
=
pos_padded
[
0
];
...
@@ -299,7 +299,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
...
@@ -299,7 +299,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
/* The cell is on the left so read the particles
/* The cell is on the left so read the particles
* into the cache from the end of the cell. */
* into the cache from the end of the cell. */
else
{
else
{
const
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
VEC_SIZE
;
const
int
rem
=
(
ci
->
hydro
.
count
-
*
first_pi
)
%
VEC_SIZE
;
if
(
rem
!=
0
)
{
if
(
rem
!=
0
)
{
const
int
pad
=
VEC_SIZE
-
rem
;
const
int
pad
=
VEC_SIZE
-
rem
;
...
@@ -307,7 +307,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
...
@@ -307,7 +307,7 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
if
(
*
first_pi
-
pad
>=
0
)
*
first_pi
-=
pad
;
if
(
*
first_pi
-
pad
>=
0
)
*
first_pi
-=
pad
;
}
}
const
int
ci_cache_count
=
ci
->
count
-
*
first_pi
;
const
int
ci_cache_count
=
ci
->
hydro
.
count
-
*
first_pi
;
/* Shift the particles positions to a local frame so single precision can be
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
* used instead of double precision. */
...
@@ -326,14 +326,14 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
...
@@ -326,14 +326,14 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
/* Pad cache with fake particles that exist outside the cell so will not
/* Pad cache with fake particles that exist outside the cell so will not
* interact. We use values of the same magnitude (but negative!) as the real
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
* particles to avoid overflow problems. */
const
double
max_dx
=
ci
->
dx_max
_part
;
const
double
max_dx
=
ci
->
hydro
.
dx_max
;
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
const
float
h_padded
=
ci
->
parts
[
0
].
h
;
const
float
h_padded
=
ci
->
hydro
.
parts
[
0
].
h
;
for
(
int
i
=
ci
->
count
-
*
first_pi
;
i
<
ci
->
count
-
*
first_pi
+
VEC_SIZE
;
for
(
int
i
=
ci
->
hydro
.
count
-
*
first_pi
;
i
++
)
{
i
<
ci
->
hydro
.
count
-
*
first_pi
+
VEC_SIZE
;
i
++
)
{
x
[
i
]
=
pos_padded
[
0
];
x
[
i
]
=
pos_padded
[
0
];
y
[
i
]
=
pos_padded
[
1
];
y
[
i
]
=
pos_padded
[
1
];
z
[
i
]
=
pos_padded
[
2
];
z
[
i
]
=
pos_padded
[
2
];
...
@@ -382,12 +382,12 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
...
@@ -382,12 +382,12 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
swift_declare_aligned_ptr
(
float
,
soundspeed
,
ci_cache
->
soundspeed
,
swift_declare_aligned_ptr
(
float
,
soundspeed
,
ci_cache
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
);
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
const
struct
part
*
restrict
parts
=
ci
->
hydro
.
parts
;
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
/* Shift the particles positions to a local frame so single precision can be
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
* used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ci
->
hydro
.
count
;
i
++
)
{
x
[
i
]
=
(
float
)(
parts
[
i
].
x
[
0
]
-
loc
[
0
]);
x
[
i
]
=
(
float
)(
parts
[
i
].
x
[
0
]
-
loc
[
0
]);
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]);
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]);
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]);
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]);
...
@@ -433,7 +433,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
...
@@ -433,7 +433,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
* cache. */
* cache. */
/* Is the number of particles to read a multiple of the vector size? */
/* Is the number of particles to read a multiple of the vector size? */
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
VEC_SIZE
;
int
rem
=
(
ci
->
hydro
.
count
-
*
first_pi
)
%
VEC_SIZE
;
if
(
rem
!=
0
)
{
if
(
rem
!=
0
)
{
int
pad
=
VEC_SIZE
-
rem
;
int
pad
=
VEC_SIZE
-
rem
;
...
@@ -446,14 +446,14 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
...
@@ -446,14 +446,14 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
int
pad
=
VEC_SIZE
-
rem
;
int
pad
=
VEC_SIZE
-
rem
;
/* Increase last_pj if there are particles in the cell left to read. */
/* Increase last_pj if there are particles in the cell left to read. */
if
(
*
last_pj
+
pad
<
cj
->
count
)
*
last_pj
+=
pad
;
if
(
*
last_pj
+
pad
<
cj
->
hydro
.
count
)
*
last_pj
+=
pad
;
}
}
/* Get some local pointers */
/* Get some local pointers */
const
int
first_pi_align
=
*
first_pi
;
const
int
first_pi_align
=
*
first_pi
;
const
int
last_pj_align
=
*
last_pj
;
const
int
last_pj_align
=
*
last_pj
;
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_i
=
ci
->
hydro
.
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
hydro
.
parts
;
/* Shift particles to the local frame and account for boundary conditions.*/
/* Shift particles to the local frame and account for boundary conditions.*/
const
double
total_ci_shift
[
3
]
=
{
const
double
total_ci_shift
[
3
]
=
{
...
@@ -471,7 +471,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
...
@@ -471,7 +471,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
int
ci_cache_count
=
ci
->
count
-
first_pi_align
;
int
ci_cache_count
=
ci
->
hydro
.
count
-
first_pi_align
;
/* Shift the particles positions to a local frame (ci frame) so single
/* Shift the particles positions to a local frame (ci frame) so single
* precision can be used instead of double precision. */
* precision can be used instead of double precision. */
...
@@ -491,11 +491,11 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
...
@@ -491,11 +491,11 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
#ifdef SWIFT_DEBUG_CHECKS
#ifdef SWIFT_DEBUG_CHECKS
const
float
shift_threshold_x
=
const
float
shift_threshold_x
=
2
.
*
ci
->
width
[
0
]
+
2
.
*
max
(
ci
->
dx_max
_part
,
cj
->
dx_max
_part
);
2
.
*
ci
->
width
[
0
]
+
2
.
*
max
(
ci
->
hydro
.
dx_max
,
cj
->
hydro
.
dx_max
);
const
float
shift_threshold_y
=
const
float
shift_threshold_y
=
2
.
*
ci
->
width
[
1
]
+
2
.
*
max
(
ci
->
dx_max
_part
,
cj
->
dx_max
_part
);
2
.
*
ci
->
width
[
1
]
+
2
.
*
max
(
ci
->
hydro
.
dx_max
,
cj
->
hydro
.
dx_max
);
const
float
shift_threshold_z
=
const
float
shift_threshold_z
=
2
.
*
ci
->
width
[
2
]
+
2
.
*
max
(
ci
->
dx_max
_part
,
cj
->
dx_max
_part
);
2
.
*
ci
->
width
[
2
]
+
2
.
*
max
(
ci
->
hydro
.
dx_max
,
cj
->
hydro
.
dx_max
);
/* Make sure that particle positions have been shifted correctly. */
/* Make sure that particle positions have been shifted correctly. */
for
(
int
i
=
0
;
i
<
ci_cache_count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ci_cache_count
;
i
++
)
{
...
@@ -529,14 +529,14 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
...
@@ -529,14 +529,14 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
/* Pad cache with fake particles that exist outside the cell so will not
/* Pad cache with fake particles that exist outside the cell so will not
* interact. We use values of the same magnitude (but negative!) as the real
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
* particles to avoid overflow problems. */
const
double
max_dx
=
max
(
ci
->
dx_max
_part
,
cj
->
dx_max
_part
);
const
double
max_dx
=
max
(
ci
->
hydro
.
dx_max
,
cj
->
hydro
.
dx_max
);
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
const
float
h_padded
=
ci
->
parts
[
0
].
h
;
const
float
h_padded
=
ci
->
hydro
.
parts
[
0
].
h
;
for
(
int
i
=
ci
->
count
-
first_pi_align
;
for
(
int
i
=
ci
->
hydro
.
count
-
first_pi_align
;
i
<
ci
->
count
-
first_pi_align
+
VEC_SIZE
;
i
++
)
{
i
<
ci
->
hydro
.
count
-
first_pi_align
+
VEC_SIZE
;
i
++
)
{
x
[
i
]
=
pos_padded
[
0
];
x
[
i
]
=
pos_padded
[
0
];
y
[
i
]
=
pos_padded
[
1
];
y
[
i
]
=
pos_padded
[
1
];
z
[
i
]
=
pos_padded
[
2
];
z
[
i
]
=
pos_padded
[
2
];
...
@@ -609,7 +609,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
...
@@ -609,7 +609,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
const
float
pos_padded_j
[
3
]
=
{
-
(
2
.
*
cj
->
width
[
0
]
+
max_dx
),
const
float
pos_padded_j
[
3
]
=
{
-
(
2
.
*
cj
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
2
]
+
max_dx
)};
-
(
2
.
*
cj
->
width
[
2
]
+
max_dx
)};
const
float
h_padded_j
=
cj
->
parts
[
0
].
h
;
const
float
h_padded_j
=
cj
->
hydro
.
parts
[
0
].
h
;
for
(
int
i
=
last_pj_align
+
1
;
i
<
last_pj_align
+
1
+
VEC_SIZE
;
i
++
)
{
for
(
int
i
=
last_pj_align
+
1
;
i
<
last_pj_align
+
1
+
VEC_SIZE
;
i
++
)
{
xj
[
i
]
=
pos_padded_j
[
0
];
xj
[
i
]
=
pos_padded_j
[
0
];
...
@@ -650,7 +650,7 @@ cache_read_two_partial_cells_sorted_force(
...
@@ -650,7 +650,7 @@ cache_read_two_partial_cells_sorted_force(
* cache. */
* cache. */
/* Is the number of particles to read a multiple of the vector size? */
/* Is the number of particles to read a multiple of the vector size? */
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
VEC_SIZE
;
int
rem
=
(
ci
->
hydro
.
count
-
*
first_pi
)
%
VEC_SIZE
;
if
(
rem
!=
0
)
{
if
(
rem
!=
0
)
{
int
pad
=
VEC_SIZE
-
rem
;
int
pad
=
VEC_SIZE
-
rem
;
...
@@ -663,14 +663,14 @@ cache_read_two_partial_cells_sorted_force(
...
@@ -663,14 +663,14 @@ cache_read_two_partial_cells_sorted_force(
int
pad
=
VEC_SIZE
-
rem
;
int
pad
=
VEC_SIZE
-
rem
;
/* Increase last_pj if there are particles in the cell left to read. */
/* Increase last_pj if there are particles in the cell left to read. */
if
(
*
last_pj
+
pad
<
cj
->
count
)
*
last_pj
+=
pad
;
if
(
*
last_pj
+
pad
<
cj
->
hydro
.
count
)
*
last_pj
+=
pad
;
}
}
/* Get some local pointers */
/* Get some local pointers */
const
int
first_pi_align
=
*
first_pi
;
const
int
first_pi_align
=
*
first_pi
;
const
int
last_pj_align
=
*
last_pj
;
const
int
last_pj_align
=
*
last_pj
;
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_i
=
ci
->
hydro
.
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
hydro
.
parts
;
/* Shift particles to the local frame and account for boundary conditions.*/
/* Shift particles to the local frame and account for boundary conditions.*/
const
double
total_ci_shift
[
3
]
=
{
const
double
total_ci_shift
[
3
]
=
{
...
@@ -697,7 +697,7 @@ cache_read_two_partial_cells_sorted_force(
...
@@ -697,7 +697,7 @@ cache_read_two_partial_cells_sorted_force(