Commit fb28100d authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge remote-tracking branch 'origin/master' into repart-by-ticks-with-means

parents 4adcd862 ec378b32
......@@ -7,8 +7,8 @@ General information for adding new schemes
==========================================
The following steps are required for any new options (such as new
:ref:`hydro`, :ref:`chemistry`, :ref:`cooling`,
:ref:`equation_of_state`, :ref:`stars` or :ref:`gravity`)
:ref:`hydro`, chemistry, cooling,
:ref:`equation_of_state`, stars, or gravity)
In order to add a new scheme, you will need to:
......
This diff is collapsed.
This diff is collapsed.
......@@ -50,7 +50,9 @@ HDF5 library, not a parallel build.
Compiling SWIFT
---------------
The next part is compiling SWIFT with VELOCIraptor and assumes you already
downloaded SWIFT from the GitLab_, this can be done by running::
downloaded SWIFT from the GitLab_, this can be done by running
.. code:: bash
./autogen.sh
./configure --with-velociraptor=/path/to/VELOCIraptor-STF/src
......@@ -60,14 +62,14 @@ In which ``./autogen.sh`` only needs to be run once after the code is cloned
from the GitLab_, and ``/path/to/`` is the path to the ``VELOCIraptor-STF``
directory on your machine. In general ``./configure`` can be run with other
options as desired. After this we can run SWIFT with VELOCIraptor, but for this
we first need to add several lines to the yaml file of our simulation::
we first need to add several lines to the yaml file of our simulation
.. code:: YAML
#structure finding options
StructureFinding:
config_file_name: stf_input_6dfof_dmonly_sub.cfg
basename: ./stf
output_time_format: 1
scale_factor_first: 0.02
delta_time: 1.02
......
......@@ -87,7 +87,7 @@ html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['.static']
# html_static_path = ['.static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
......
......@@ -10,10 +10,8 @@ InternalUnitSystem:
StructureFinding:
config_file_name: stf_input.cfg # Name of the STF config file.
basename: ./stf # Common part of the name of output files.
output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time).
scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run)
time_first: 0.01 # Time of the first structure finding output (in internal units).
delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps.
delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
# Cosmological parameters
......
......@@ -10,10 +10,8 @@ InternalUnitSystem:
StructureFinding:
config_file_name: stf_input.cfg # Name of the STF config file.
basename: ./stf # Common part of the name of output files.
output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time).
scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run)
time_first: 0.01 # Time of the first structure finding output (in internal units).
delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps.
delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
# Cosmological parameters
......
......@@ -10,10 +10,8 @@ InternalUnitSystem:
StructureFinding:
config_file_name: stf_input.cfg # Name of the STF config file.
basename: ./stf # Common part of the name of output files.
output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time).
scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run)
time_first: 0.01 # Time of the first structure finding output (in internal units).
delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps.
delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
# Cosmological parameters
......
......@@ -10,7 +10,6 @@ InternalUnitSystem:
StructureFinding:
config_file_name: stf_input_6dfof_dmonly_sub.cfg
basename: ./stf
output_time_format: 1
scale_factor_first: 0.02
delta_time: 1.02
......
......@@ -37,8 +37,9 @@ SPH:
# Parameters governing the snapshots
Snapshots:
basename: snap
delta_time: 1.02
delta_time: 1.05
scale_factor_first: 0.02
invoke_stf: 1
# Parameters governing the conserved quantities statistics
Statistics:
......@@ -52,6 +53,7 @@ Scheduler:
# Parameters related to the initial conditions
InitialConditions:
file_name: small_cosmo_volume.hdf5
periodic: 1
cleanup_h_factors: 1
cleanup_velocity_factors: 1
generate_gas_in_ics: 1 # Generate gas particles from the DM-only ICs
......@@ -61,7 +63,6 @@ InitialConditions:
StructureFinding:
config_file_name: stfconfig_input.cfg
basename: ./stf
output_time_format: 1
scale_factor_first: 0.02
delta_time: 1.02
......@@ -923,6 +923,10 @@ int main(int argc, char *argv[]) {
fflush(stdout);
}
#ifdef HAVE_VELOCIRAPTOR
if (with_structure_finding) velociraptor_init(&e);
#endif
/* Get some info to the user. */
if (myrank == 0) {
long long N_DM = N_total[1] - N_total[2] - N_total[0];
......@@ -1123,14 +1127,6 @@ int main(int argc, char *argv[]) {
#endif
// write a final snapshot with logger, in order to facilitate a restart
engine_dump_snapshot(&e);
#ifdef HAVE_VELOCIRAPTOR
/* Call VELOCIraptor at the end of the run to find groups. */
if (e.policy & engine_policy_structure_finding) {
velociraptor_init(&e);
velociraptor_invoke(&e);
}
#endif
}
#ifdef WITH_MPI
......
......@@ -85,6 +85,7 @@ Snapshots:
scale_factor_first: 0.1 # (Optional) Scale-factor of the first snapshot if cosmological time-integration.
time_first: 0. # (Optional) Time of the first output if non-cosmological time-integration (in internal units)
delta_time: 0.01 # Time difference between consecutive outputs (in internal units)
invoke_stf: 0 # (Optional) Call VELOCIraptor every time a snapshot is written irrespective of the VELOCIraptor output strategy.
compression: 0 # (Optional) Set the level of compression of the HDF5 datasets [0-9]. 0 does no compression.
int_time_label_on: 0 # (Optional) Enable to label the snapshots using the time rounded to an integer (in internal units)
UnitMass_in_cgs: 1 # (Optional) Unit system for the outputs (Grams)
......@@ -158,6 +159,16 @@ DomainDecomposition:
# task weights in first repartition, if 0 only use task timings, if > 1 only use
# fixed costs, unless none are available.
# Structure finding options (requires velociraptor)
StructureFinding:
config_file_name: stf_input.cfg # Name of the STF config file.
basename: ./stf # Common part of the name of output files.
scale_factor_first: 0.92 # (Optional) Scale-factor of the first snaphot (cosmological run)
time_first: 0.01 # (Optional) Time of the first structure finding output (in internal units).
delta_time: 1.10 # (Optional) Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
output_list_on: 0 # (Optional) Enable the output list
output_list: stflist.txt # (Optional) File containing the output times (see documentation in "Parameter File" section)
# Parameters related to the equation of state ------------------------------------------
EoS:
......@@ -288,15 +299,3 @@ EAGLEChemistry:
init_abundance_Magnesium: 0.000 # Inital fraction of particle mass in Magnesium
init_abundance_Silicon: 0.000 # Inital fraction of particle mass in Silicon
init_abundance_Iron: 0.000 # Inital fraction of particle mass in Iron
# Structure finding options (requires velociraptor)
StructureFinding:
config_file_name: stf_input.cfg # Name of the STF config file.
basename: ./stf # Common part of the name of output files.
output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time).
scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run)
time_first: 0.01 # Time of the first structure finding output (in internal units).
delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps.
delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals.
output_list_on: 0 # (Optional) Enable the output list
output_list: stflist.txt # (Optional) File containing the output times (see documentation in "Parameter File" section)
......@@ -49,7 +49,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h \
chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \
mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h \
logger_io.h tracers_io.h tracers.h tracers_struct.h
logger_io.h tracers_io.h tracers.h tracers_struct.h velociraptor_struct.h velociraptor_io.h
# source files for EAGLE cooling
EAGLE_COOLING_SOURCES =
......
......@@ -179,8 +179,9 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
*
* @param ci The #cell.
* @param ci_cache The cache.
* @return uninhibited_count The no. of uninhibited particles.
*/
__attribute__((always_inline)) INLINE void cache_read_particles(
__attribute__((always_inline)) INLINE int cache_read_particles(
const struct cell *restrict const ci,
struct cache *restrict const ci_cache) {
......@@ -197,12 +198,29 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
const int count = ci->hydro.count;
const struct part *restrict parts = ci->hydro.parts;
const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
const double max_dx = ci->hydro.dx_max_part;
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.h_max / 4.;
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for (int i = 0; i < ci->hydro.count; i++) {
for (int i = 0; i < count; i++) {
/* Pad inhibited particles. */
if (parts[i].time_bin >= time_bin_inhibited) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = h_padded;
continue;
}
x[i] = (float)(parts[i].x[0] - loc[0]);
y[i] = (float)(parts[i].x[1] - loc[1]);
z[i] = (float)(parts[i].x[2] - loc[2]);
......@@ -213,6 +231,26 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
vz[i] = parts[i].v[2];
}
/* Pad cache if the no. of particles is not a multiple of double the vector
* length. */
int count_align = count;
const int rem = count % (NUM_VEC_PROC * VEC_SIZE);
if (rem != 0) {
count_align += (NUM_VEC_PROC * VEC_SIZE) - rem;
/* Set positions to something outside of the range of any particle */
for (int i = count; i < count_align; i++) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
}
}
return count_align;
#else
error("Can't call the cache reading function with this flavour of SPH!");
return 0;
#endif
}
......@@ -261,10 +299,32 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
if (*last_pi + pad < ci->hydro.count) *last_pi += pad;
}
const double max_dx = ci->hydro.dx_max_part;
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.h_max / 4.;
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for (int i = 0; i < *last_pi; i++) {
const int idx = sort_i[i].i;
/* Put inhibited particles out of range. */
if (parts[idx].time_bin >= time_bin_inhibited) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = h_padded;
m[i] = 1.f;
vx[i] = 1.f;
vy[i] = 1.f;
vz[i] = 1.f;
continue;
}
x[i] = (float)(parts[idx].x[0] - loc[0]);
y[i] = (float)(parts[idx].x[1] - loc[1]);
z[i] = (float)(parts[idx].x[2] - loc[2]);
......@@ -278,12 +338,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
/* Pad cache with fake particles that exist outside the cell so will not
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const double max_dx = ci->hydro.dx_max_part;
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.parts[0].h;
for (int i = *last_pi; i < *last_pi + VEC_SIZE; i++) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
......@@ -308,11 +362,32 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
}
const int ci_cache_count = ci->hydro.count - *first_pi;
const double max_dx = ci->hydro.dx_max_part;
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.h_max / 4.;
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for (int i = 0; i < ci_cache_count; i++) {
const int idx = sort_i[i + *first_pi].i;
/* Put inhibited particles out of range. */
if (parts[idx].time_bin >= time_bin_inhibited) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = h_padded;
m[i] = 1.f;
vx[i] = 1.f;
vy[i] = 1.f;
vz[i] = 1.f;
continue;
}
x[i] = (float)(parts[idx].x[0] - loc[0]);
y[i] = (float)(parts[idx].x[1] - loc[1]);
z[i] = (float)(parts[idx].x[2] - loc[2]);
......@@ -326,12 +401,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
/* Pad cache with fake particles that exist outside the cell so will not
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const double max_dx = ci->hydro.dx_max_part;
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.parts[0].h;
for (int i = ci->hydro.count - *first_pi;
i < ci->hydro.count - *first_pi + VEC_SIZE; i++) {
x[i] = pos_padded[0];
......@@ -355,8 +424,9 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subset(
*
* @param ci The #cell.
* @param ci_cache The cache.
* @return uninhibited_count The no. of uninhibited particles.
*/
__attribute__((always_inline)) INLINE void cache_read_force_particles(
__attribute__((always_inline)) INLINE int cache_read_force_particles(
const struct cell *restrict const ci,
struct cache *restrict const ci_cache) {
......@@ -382,12 +452,34 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed,
SWIFT_CACHE_ALIGNMENT);
const int count = ci->hydro.count;
const struct part *restrict parts = ci->hydro.parts;
const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
const double max_dx = ci->hydro.dx_max_part;
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.h_max / 4.;
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for (int i = 0; i < ci->hydro.count; i++) {
for (int i = 0; i < count; i++) {
/* Skip inhibited particles. */
if (parts[i].time_bin >= time_bin_inhibited) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = h_padded;
rho[i] = 1.f;
grad_h[i] = 1.f;
pOrho2[i] = 1.f;
balsara[i] = 1.f;
soundspeed[i] = 1.f;
continue;
}
x[i] = (float)(parts[i].x[0] - loc[0]);
y[i] = (float)(parts[i].x[1] - loc[1]);
z[i] = (float)(parts[i].x[2] - loc[2]);
......@@ -403,6 +495,32 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
soundspeed[i] = parts[i].force.soundspeed;
}
/* Pad cache if there is a serial remainder. */
int count_align = count;
const int rem = count % VEC_SIZE;
if (rem != 0) {
count_align += VEC_SIZE - rem;
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for (int i = count; i < count_align; i++) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = h_padded;
rho[i] = 1.f;
grad_h[i] = 1.f;
pOrho2[i] = 1.f;
balsara[i] = 1.f;
soundspeed[i] = 1.f;
}
}
return count_align;
#else
error("Can't call the cache reading function with this flavour of SPH!");
return 0;
#endif
}
......@@ -472,11 +590,32 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
int ci_cache_count = ci->hydro.count - first_pi_align;
const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part);
const float pos_padded_i[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded_i = ci->hydro.h_max / 4.;
/* Shift the particles positions to a local frame (ci frame) so single
* precision can be used instead of double precision. */
for (int i = 0; i < ci_cache_count; i++) {
const int idx = sort_i[i + first_pi_align].i;
/* Put inhibited particles out of range. */
if (parts_i[idx].time_bin >= time_bin_inhibited) {
x[i] = pos_padded_i[0];
y[i] = pos_padded_i[1];
z[i] = pos_padded_i[2];
h[i] = h_padded_i;
m[i] = 1.f;
vx[i] = 1.f;
vy[i] = 1.f;
vz[i] = 1.f;
continue;
}
x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
......@@ -532,18 +671,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
/* Pad cache with fake particles that exist outside the cell so will not
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part);
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.parts[0].h;
for (int i = ci->hydro.count - first_pi_align;
i < ci->hydro.count - first_pi_align + VEC_SIZE; i++) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = h_padded;
x[i] = pos_padded_i[0];
y[i] = pos_padded_i[1];
z[i] = pos_padded_i[2];
h[i] = h_padded_i;
m[i] = 1.f;
vx[i] = 1.f;
......@@ -562,8 +695,29 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr(float, vyj, cj_cache->vy, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT);
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
const float h_padded_j = cj->hydro.h_max / 4.;
for (int i = 0; i <= last_pj_align; i++) {
const int idx = sort_j[i].i;
/* Put inhibited particles out of range. */
if (parts_j[idx].time_bin >= time_bin_inhibited) {
xj[i] = pos_padded_j[0];
yj[i] = pos_padded_j[1];
zj[i] = pos_padded_j[2];
hj[i] = h_padded_j;
mj[i] = 1.f;
vxj[i] = 1.f;
vyj[i] = 1.f;
vzj[i] = 1.f;
continue;
}
xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
......@@ -609,11 +763,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
/* Pad cache with fake particles that exist outside the cell so will not
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
const float h_padded_j = cj->hydro.parts[0].h;
for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) {
xj[i] = pos_padded_j[0];
yj[i] = pos_padded_j[1];
......@@ -701,11 +850,37 @@ cache_read_two_partial_cells_sorted_force(
SWIFT_CACHE_ALIGNMENT);
int ci_cache_count = ci->hydro.count - first_pi_align;
const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part);
const float pos_padded_i[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded_i = ci->hydro.h_max / 4.;
/* Shift the particles positions to a local frame (ci frame) so single
* precision can be used instead of double precision. */
for (int i = 0; i < ci_cache_count; i++) {
const int idx = sort_i[i + first_pi_align].i;
/* Put inhibited particles out of range. */
if (parts_i[idx].time_bin >= time_bin_inhibited) {
x[i] = pos_padded_i[0];
y[i] = pos_padded_i[1];
z[i] = pos_padded_i[2];
h[i] = h_padded_i;
m[i] = 1.f;
vx[i] = 1.f;
vy[i] = 1.f;
vz[i] = 1.f;
rho[i] = 1.f;
grad_h[i] = 1.f;
pOrho2[i] = 1.f;
balsara[i] = 1.f;
soundspeed[i] = 1.f;
continue;
}
x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
......@@ -726,18 +901,12 @@ cache_read_two_partial_cells_sorted_force(
/* Pad cache with fake particles that exist outside the cell so will not
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const double max_dx = max(ci->hydro.dx_max_part, cj->hydro.dx_max_part);
const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
-(2. * ci->width[1] + max_dx),
-(2. * ci->width[2] + max_dx)};
const float h_padded = ci->hydro.parts[0].h;
for (int i = ci->hydro.count - first_pi_align;
i < ci->hydro.count - first_pi_align + VEC_SIZE; i++) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
h[i] = h_padded;
x[i] = pos_padded_i[0];
y[i] = pos_padded_i[1];
z[i] = pos_padded_i[2];
h[i] = h_padded_i;
m[i] = 1.f;
vx[i] = 1.f;
vy[i] = 1.f;
......@@ -769,8 +938,33 @@ cache_read_two_partial_cells_sorted_force(
swift_declare_aligned_ptr(float, soundspeedj, cj_cache->soundspeed,
SWIFT_CACHE_ALIGNMENT);
const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
-(2. * cj->width[1] + max_dx),
-(2. * cj->width[2] + max_dx)};
const float h_padded_j = cj->hydro.h_max / 4.;