diff --git a/configure.ac b/configure.ac index c33b52e83f724508c42a4ea1a1ce2235e1cbd90e..9ec78c8b8b8168191df002a01ed5de6fb1580243 100644 --- a/configure.ac +++ b/configure.ac @@ -627,7 +627,7 @@ fi # Hydro scheme. AC_ARG_WITH([hydro], [AS_HELP_STRING([--with-hydro=<scheme>], - [Hydro dynamics to use @<:@gadget2, minimal, hopkins, default, gizmo, shadowfax default: gadget2@:>@] + [Hydro dynamics to use @<:@gadget2, minimal, hopkins, default, gizmo, shadowfax debug default: gadget2@:>@] )], [with_hydro="$withval"], [with_hydro="gadget2"] @@ -639,6 +639,9 @@ case "$with_hydro" in minimal) AC_DEFINE([MINIMAL_SPH], [1], [Minimal SPH]) ;; + debug) + AC_DEFINE([DEBUG_INTERACTIONS_SPH], [1], [Debug SPH]) + ;; hopkins) AC_DEFINE([HOPKINS_PE_SPH], [1], [Pressure-Entropy SPH]) ;; diff --git a/examples/SedovBlast_3D/sedov.yml b/examples/SedovBlast_3D/sedov.yml index 1cc4aced9af3314cadde44f768016225426addf6..865755743b3b73af29a10811f6571d80ca933f15 100644 --- a/examples/SedovBlast_3D/sedov.yml +++ b/examples/SedovBlast_3D/sedov.yml @@ -32,4 +32,5 @@ SPH: # Parameters related to the initial conditions InitialConditions: file_name: ./sedov.hdf5 # The file to read + h_scaling: 3.33 diff --git a/src/debug.c b/src/debug.c index c9f12370f0e59f812afc72eb55b0bdaa032edfe1..ac886fc8d75a03ac67b213e0639cd384f0980529 100644 --- a/src/debug.c +++ b/src/debug.c @@ -21,16 +21,15 @@ ******************************************************************************/ /* Config parameters. */ -#include "../config.h" + +/* This object's header. */ +#include "debug.h" /* Some standard headers. */ #include <float.h> #include <stdio.h> #include <unistd.h> -/* This object's header. */ -#include "debug.h" - /* Local includes. */ #include "active.h" #include "cell.h" @@ -41,7 +40,9 @@ #include "space.h" /* Import the right hydro definition */ -#if defined(MINIMAL_SPH) +#if defined(DEBUG_INTERACTIONS_SPH) +#include "./hydro/DebugInteractions/hydro_debug.h" +#elif defined(MINIMAL_SPH) #include "./hydro/Minimal/hydro_debug.h" #elif defined(GADGET2_SPH) #include "./hydro/Gadget2/hydro_debug.h" diff --git a/src/debug.h b/src/debug.h index 7bea11de6ac3fe82ae9b3f1be84249f75742ecaf..20ae8fc2c20fa458b7be82f75b2fbb3be9acd32f 100644 --- a/src/debug.h +++ b/src/debug.h @@ -19,6 +19,9 @@ #ifndef SWIFT_DEBUG_H #define SWIFT_DEBUG_H +/* Config parameters. */ +#include "../config.h" + /* Includes. */ #include "cell.h" #include "part.h" diff --git a/src/engine.c b/src/engine.c index cf4d00a1959b609cb6e7c103f3c5138fce5e53e4..65d541bab1c428aa6e0c719a847554563aa0b489 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2086,7 +2086,7 @@ static inline void engine_make_external_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> external gravity --> kick */ - scheduler_addunlock(sched, c->drift_gpart, gravity); + scheduler_addunlock(sched, c->super->drift_gpart, gravity); scheduler_addunlock(sched, gravity, c->super->kick2); } diff --git a/src/hydro.h b/src/hydro.h index abb49d35b204bbaf986f502d796883e7eb778e7f..cbe26761e2470bea73028d1e2ed12204ac730d0e 100644 --- a/src/hydro.h +++ b/src/hydro.h @@ -27,7 +27,11 @@ #include "kernel_hydro.h" /* Import the right functions */ -#if defined(MINIMAL_SPH) +#if defined(DEBUG_INTERACTIONS_SPH) +#include "./hydro/DebugInteractions/hydro.h" +#include "./hydro/DebugInteractions/hydro_iact.h" +#define SPH_IMPLEMENTATION "Debug SELF/PAIR" +#elif defined(MINIMAL_SPH) #include "./hydro/Minimal/hydro.h" #include "./hydro/Minimal/hydro_iact.h" #define SPH_IMPLEMENTATION "Minimal version of SPH (e.g. Price 2010)" diff --git a/src/hydro/DebugInteractions/hydro.h b/src/hydro/DebugInteractions/hydro.h new file mode 100644 index 0000000000000000000000000000000000000000..dfffab642ae99e3bbb1928984e8daaf44d76c8eb --- /dev/null +++ b/src/hydro/DebugInteractions/hydro.h @@ -0,0 +1,313 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEBUG_INTERACTIONS_HYDRO_H +#define SWIFT_DEBUG_INTERACTIONS_HYDRO_H + +/** + * @file DebugInteractions/hydro.h + * @brief Empty SPH implementation used solely to test the SELF/PAIR routines. + */ + +#include "hydro_properties.h" +#include "hydro_space.h" +#include "part.h" + +#include <float.h> + +/** + * @brief Returns the internal energy of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_internal_energy( + const struct part *restrict p) { + + return 0.f; +} + +/** + * @brief Returns the pressure of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_pressure( + const struct part *restrict p) { + + return 0.f; +} + +/** + * @brief Returns the entropy of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_entropy( + const struct part *restrict p) { + + return 0.f; +} + +/** + * @brief Returns the sound speed of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_soundspeed( + const struct part *restrict p) { + + return 0.f; +} + +/** + * @brief Returns the density of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_density( + const struct part *restrict p) { + + return 0.f; +} + +/** + * @brief Returns the mass of a particle + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_mass( + const struct part *restrict p) { + return 0.f; +} + +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest + */ +__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( + const struct part *restrict p) { + return 0.f; +} + +/** + * @brief Returns the time derivative of internal energy of a particle + * + * We assume a constant density. + * + * @param p The particle of interest. + * @param du_dt The new time derivative of the internal energy. + */ +__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( + struct part *restrict p, float du_dt) {} + +/** + * @brief Computes the hydro time-step of a given particle + * + * @param p Pointer to the particle data + * @param xp Pointer to the extended particle data + * + */ +__attribute__((always_inline)) INLINE static float hydro_compute_timestep( + const struct part *restrict p, const struct xpart *restrict xp, + const struct hydro_props *restrict hydro_properties) { + + return FLT_MAX; +} + +/** + * @brief Does some extra hydro operations once the actual physical time step + * for the particle is known. + * + * @param p The particle to act upon. + * @param dt Physical time step of the particle during the next step. + */ +__attribute__((always_inline)) INLINE static void hydro_timestep_extra( + struct part *p, float dt) {} + +/** + * @brief Prepares a particle for the density calculation. + * + * Zeroes all the relevant arrays in preparation for the sums taking place in + * the variaous density tasks + * + * @param p The particle to act upon + * @param hs #hydro_space containing hydro specific space information. + */ +__attribute__((always_inline)) INLINE static void hydro_init_part( + struct part *restrict p, const struct hydro_space *hs) { + + for (int i = 0; i < 256; ++i) p->ids_ngbs_density[i] = -1; + p->num_ngb_density = 0; + + p->density.wcount = 0.f; + p->density.wcount_dh = 0.f; +} + +/** + * @brief Finishes the density calculation. + * + * Multiplies the density and number of neighbours by the appropiate constants + * and add the self-contribution term. + * + * @param p The particle to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_end_density( + struct part *restrict p) { + + /* Some smoothing length multiples. */ + const float h = p->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + const float h_inv_dim_plus_one = h_inv_dim * h_inv; /* 1/h^(d+1) */ + + /* Final operation on the density (add self-contribution). */ + p->density.wcount += kernel_root; + p->density.wcount_dh -= hydro_dimension * kernel_root; + + /* Finish the calculation by inserting the missing h-factors */ + p->density.wcount *= h_inv_dim; + p->density.wcount_dh *= h_inv_dim_plus_one; +} + +/** + * @brief Sets all particle fields to sensible values when the #part has 0 ngbs. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_part_has_no_neighbours( + struct part *restrict p, struct xpart *restrict xp) { + + /* Some smoothing length multiples. */ + const float h = p->h; + const float h_inv = 1.0f / h; /* 1/h */ + const float h_inv_dim = pow_dimension(h_inv); /* 1/h^d */ + + /* Re-set problematic values */ + p->density.wcount = kernel_root * kernel_norm * h_inv_dim; + p->density.wcount_dh = 0.f; +} + +/** + * @brief Prepare a particle for the force calculation. + * + * Computes viscosity term, conduction term and smoothing length gradient terms. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + * @param ti_current The current time (on the timeline) + * @param timeBase The minimal time-step size + */ +__attribute__((always_inline)) INLINE static void hydro_prepare_force( + struct part *restrict p, struct xpart *restrict xp) {} + +/** + * @brief Reset acceleration fields of a particle + * + * Resets all hydro acceleration and time derivative fields in preparation + * for the sums taking place in the variaous force tasks + * + * @param p The particle to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_reset_acceleration( + struct part *restrict p) { + + for (int i = 0; i < 256; ++i) p->ids_ngbs_force[i] = -1; + p->num_ngb_force = 0; + + p->force.h_dt = 0.f; +} + +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param p The particle. + * @param xp The extended data of this particle. + */ +__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( + struct part *restrict p, const struct xpart *restrict xp) {} + +/** + * @brief Predict additional particle fields forward in time when drifting + * + * @param p The particle + * @param xp The extended data of the particle + * @param dt The drift time-step. + * @param t0 The time at the start of the drift + * @param t1 The time at the end of the drift + * @param timeBase The minimal time-step size + */ +__attribute__((always_inline)) INLINE static void hydro_predict_extra( + struct part *restrict p, const struct xpart *restrict xp, float dt) {} + +/** + * @brief Finishes the force calculation. + * + * Multiplies the forces and accelerationsby the appropiate constants + * + * @param p The particle to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_end_force( + struct part *restrict p) {} + +/** + * @brief Kick the additional variables + * + * @param p The particle to act upon + * @param xp The particle extended data to act upon + * @param dt The time-step for this kick + */ +__attribute__((always_inline)) INLINE static void hydro_kick_extra( + struct part *restrict p, struct xpart *restrict xp, float dt) {} + +/** + * @brief Converts hydro quantity of a particle at the start of a run + * + * Requires the density to be known + * + * @param p The particle to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_convert_quantities( + struct part *restrict p, struct xpart *restrict xp) {} + +/** + * @brief Initialises the particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_first_init_part( + struct part *restrict p, struct xpart *restrict xp) { + + p->time_bin = 0; + xp->v_full[0] = p->v[0]; + xp->v_full[1] = p->v[1]; + xp->v_full[2] = p->v[2]; + + hydro_reset_acceleration(p); + hydro_init_part(p, NULL); +} + +#endif /* SWIFT_DEBUG_INTERACTIONS_HYDRO_H */ diff --git a/src/hydro/DebugInteractions/hydro_debug.h b/src/hydro/DebugInteractions/hydro_debug.h new file mode 100644 index 0000000000000000000000000000000000000000..5420f9cece2415fb5e2438b551eb605187741c13 --- /dev/null +++ b/src/hydro/DebugInteractions/hydro_debug.h @@ -0,0 +1,39 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEBUG_INTERACTIONS_HYDRO_DEBUG_H +#define SWIFT_DEBUG_INTERACTIONS_HYDRO_DEBUG_H + +/** + * @file DebugInteractions/hydro_debug.h + * @brief Empty SPH implementation used solely to test the SELF/PAIR routines. + */ + +__attribute__((always_inline)) INLINE static void hydro_debug_particle( + const struct part* p, const struct xpart* xp) { + printf( + "x=[%.3e,%.3e,%.3e], " + "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " + "h=%.3e, wcount=%.3f, wcount_dh=%.3e, dh/dt=%.3e time_bin=%d\n", + p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], + xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], + p->h, p->density.wcount, p->density.wcount_dh, p->force.h_dt, + p->time_bin); +} + +#endif /* SWIFT_DEBUG_INTERACTIONS_HYDRO_DEBUG_H */ diff --git a/src/hydro/DebugInteractions/hydro_iact.h b/src/hydro/DebugInteractions/hydro_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..89613f4902f0f9ab476110a7532664135b7b74ed --- /dev/null +++ b/src/hydro/DebugInteractions/hydro_iact.h @@ -0,0 +1,113 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEBUG_INTERACTIONS_HYDRO_IACT_H +#define SWIFT_DEBUG_INTERACTIONS_HYDRO_IACT_H + +/** + * @file DebugInteractions/hydro_iact.h + * @brief Empty SPH implementation used solely to test the SELF/PAIR routines. + */ + +/** + * @brief Density loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_density( + float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { + + float wi, wi_dx; + float wj, wj_dx; + + /* Get r and r inverse. */ + const float r = sqrtf(r2); + + /* Compute the kernel function for pi */ + const float hi_inv = 1.f / hi; + const float ui = r * hi_inv; + kernel_deval(ui, &wi, &wi_dx); + + /* Compute contribution to the number of neighbours */ + pi->density.wcount += wi; + pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); + + /* Compute the kernel function for pj */ + const float hj_inv = 1.f / hj; + const float uj = r * hj_inv; + kernel_deval(uj, &wj, &wj_dx); + + /* Compute contribution to the number of neighbours */ + pj->density.wcount += wj; + pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx); + + /* Update ngb counters */ + pi->ids_ngbs_density[pi->num_ngb_density] = pj->id; + ++pi->num_ngb_density; + pj->ids_ngbs_density[pj->num_ngb_density] = pi->id; + ++pj->num_ngb_density; +} + +/** + * @brief Density loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( + float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { + + float wi, wi_dx; + + /* Get r and r inverse. */ + const float r = sqrtf(r2); + + /* Compute the kernel function */ + const float hi_inv = 1.0f / hi; + const float ui = r * hi_inv; + kernel_deval(ui, &wi, &wi_dx); + + /* Compute contribution to the number of neighbours */ + pi->density.wcount += wi; + pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); + + /* Update ngb counters */ + pi->ids_ngbs_density[pi->num_ngb_density] = pj->id; + ++pi->num_ngb_density; +} + +/** + * @brief Force loop + */ +__attribute__((always_inline)) INLINE static void runner_iact_force( + float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { + + /* Update ngb counters */ + pi->ids_ngbs_force[pi->num_ngb_force] = pj->id; + ++pi->num_ngb_force; + pj->ids_ngbs_force[pj->num_ngb_force] = pi->id; + ++pj->num_ngb_force; +} + +/** + * @brief Force loop (non-symmetric version) + */ +__attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( + float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { + + /* Update ngb counters */ + pi->ids_ngbs_force[pi->num_ngb_force] = pj->id; + ++pi->num_ngb_force; +} + +#endif /* SWIFT_DEBUG_INTERACTIONS_HYDRO_IACT_H */ diff --git a/src/hydro/DebugInteractions/hydro_io.h b/src/hydro/DebugInteractions/hydro_io.h new file mode 100644 index 0000000000000000000000000000000000000000..61b391a0c711edd5ad8acdbb6b914c070470992a --- /dev/null +++ b/src/hydro/DebugInteractions/hydro_io.h @@ -0,0 +1,114 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEBUG_INTERACTIONS_HYDRO_IO_H +#define SWIFT_DEBUG_INTERACTIONS_HYDRO_IO_H + +/** + * @file DebugInteractions/hydro_io.h + * @brief Empty SPH implementation used solely to test the SELF/PAIR routines. + */ + +#include "adiabatic_index.h" +#include "hydro.h" +#include "io_properties.h" +#include "kernel_hydro.h" + +/** + * @brief Specifies which particle fields to read from a dataset + * + * @param parts The particle array. + * @param list The list of i/o properties to read. + * @param num_fields The number of i/o fields to read. + */ +void hydro_read_particles(struct part* parts, struct io_props* list, + int* num_fields) { + + *num_fields = 4; + + /* List what we want to read */ + list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY, + UNIT_CONV_LENGTH, parts, x); + list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY, + UNIT_CONV_SPEED, parts, v); + list[2] = io_make_input_field("SmoothingLength", FLOAT, 1, COMPULSORY, + UNIT_CONV_LENGTH, parts, h); + list[3] = io_make_input_field("ParticleIDs", ULONGLONG, 1, COMPULSORY, + UNIT_CONV_NO_UNITS, parts, id); +} + +float convert_u(struct engine* e, struct part* p) { + + return hydro_get_internal_energy(p); +} + +float convert_P(struct engine* e, struct part* p) { + + return hydro_get_pressure(p); +} + +/** + * @brief Specifies which particle fields to write to a dataset + * + * @param parts The particle array. + * @param list The list of i/o properties to write. + * @param num_fields The number of i/o fields to write. + */ +void hydro_write_particles(struct part* parts, struct io_props* list, + int* num_fields) { + + *num_fields = 8; + + /* List what we want to write */ + list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, + parts, x); + list[1] = + io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v); + list[2] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH, + parts, h); + list[3] = io_make_output_field("ParticleIDs", ULONGLONG, 1, + UNIT_CONV_NO_UNITS, parts, id); + list[4] = io_make_output_field("Num_ngb_density", INT, 1, UNIT_CONV_NO_UNITS, + parts, num_ngb_density); + list[5] = io_make_output_field("Num_ngb_force", INT, 1, UNIT_CONV_NO_UNITS, + parts, num_ngb_force); + list[6] = io_make_output_field("Ids_ngb_density", LONGLONG, 256, + UNIT_CONV_NO_UNITS, parts, ids_ngbs_density); + list[7] = io_make_output_field("Ids_ngb_force", LONGLONG, 256, + UNIT_CONV_NO_UNITS, parts, ids_ngbs_force); +} + +/** + * @brief Writes the current model of SPH to the file + * @param h_grpsph The HDF5 group in which to write + */ +void writeSPHflavour(hid_t h_grpsph) { + + /* Viscosity and thermal conduction */ + io_write_attribute_s(h_grpsph, "Thermal Conductivity Model", "None"); + io_write_attribute_s(h_grpsph, "Viscosity Model", "None"); +} + +/** + * @brief Are we writing entropy in the internal energy field ? + * + * @return 1 if entropy is in 'internal energy', 0 otherwise. + */ +int writeEntropyFlag() { return 0; } + +#endif /* SWIFT_DEBUG_INTERACTIONS_HYDRO_IO_H */ diff --git a/src/hydro/DebugInteractions/hydro_part.h b/src/hydro/DebugInteractions/hydro_part.h new file mode 100644 index 0000000000000000000000000000000000000000..85410b2f3f9f11409398aeebc7bfe824c71cf217 --- /dev/null +++ b/src/hydro/DebugInteractions/hydro_part.h @@ -0,0 +1,110 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEBUG_INTERACTIONS_HYDRO_PART_H +#define SWIFT_DEBUG_INTERACTIONS_HYDRO_PART_H + +/** + * @file DebugInteractions/hydro_part.h + * @brief Empty SPH implementation used solely to test the SELF/PAIR routines. + */ + +#include "cooling_struct.h" + +/* Extra particle data not needed during the SPH loops over neighbours. */ +struct xpart { + + /* Offset between current position and position at last tree rebuild. */ + float x_diff[3]; + + /* Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + + /* Velocity at the last full step. */ + float v_full[3]; + + /* Additional data used to record cooling information */ + struct cooling_xpart_data cooling_data; + +} SWIFT_STRUCT_ALIGN; + +/* Data of a single particle. */ +struct part { + + /* Particle ID. */ + long long id; + + /* Pointer to corresponding gravity part. */ + struct gpart* gpart; + + /* Particle position. */ + double x[3]; + + /* Particle predicted velocity. */ + float v[3]; + + /* Particle acceleration. */ + float a_hydro[3]; + + /* Particle cutoff radius. */ + float h; + + struct { + + /* Number of neighbours. */ + float wcount; + + /* Number of neighbours spatial derivative. */ + float wcount_dh; + + } density; + + struct { + + float h_dt; + + } force; + + /* Time-step length */ + timebin_t time_bin; + +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif + + /*! List of interacting particles in the density SELF and PAIR */ + long long ids_ngbs_density[256]; + + /*! List of interacting particles in the force SELF and PAIR */ + long long ids_ngbs_force[256]; + + /*! Number of interactions in the density SELF and PAIR */ + int num_ngb_density; + + /*! Number of interactions in the force SELF and PAIR */ + int num_ngb_force; + +} SWIFT_STRUCT_ALIGN; + +#endif /* SWIFT_DEBUG_INTERACTIONS_HYDRO_PART_H */ diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 7b1c8c3b91ce917af46efc28f6001a4d47747e2a..89b90be22b831066c8b9211bfe50208b4c4af67e 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -96,120 +96,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( for (k = 0; k < 3; k++) pj->density.rot_v[k] += mi * curlvr[k] * wj_dx; } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_VECTORIZATION - - vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; - vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; - vector mi, mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr, div_vi, div_vj; - vector curlvr[3], rot_vi[3], rot_vj[3]; - int k, j; - -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); - xi.v = r.v * hi_inv.v; - - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f)); - xj.v = r.v * hj_inv.v; - - kernel_deval_vec(&xi, &wi, &wi_dx); - kernel_deval_vec(&xj, &wj, &wj_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - rhoj.v = mi.v * wj.v; - rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + xj.v * wj_dx.v); - wcountj.v = wj.v; - wcountj_dh.v = xj.v * wj_dx.v; - div_vj.v = mi.v * dvdr.v * wj_dx.v; - for (k = 0; k < 3; k++) rot_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; - - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v -= div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += rot_vi[j].f[k]; - pj[k]->rho += rhoj.f[k]; - pj[k]->rho_dh -= rhoj_dh.f[k]; - pj[k]->density.wcount += wcountj.f[k]; - pj[k]->density.wcount_dh -= wcountj_dh.f[k]; - pj[k]->density.div_v -= div_vj.f[k]; - for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += rot_vj[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - /** * @brief Density loop (non-symmetric version) */ @@ -258,98 +144,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( for (k = 0; k < 3; k++) pi->density.rot_v[k] += mj * curlvr[k] * wi_dx; } -/** - * @brief Density loop (non-symmetric vectorized version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - -#ifdef WITH_VECTORIZATION - - vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx; - vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; - vector mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr; - vector curlvr[3], rot_vi[3]; - int k, j; - -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); - xi.v = r.v * hi_inv.v; - - kernel_deval_vec(&xi, &wi, &wi_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) rot_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v -= div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += rot_vi[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_nonsym_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - /** * @brief Force loop */ @@ -445,212 +239,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->force.v_sig = max(pj->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_VECTORIZATION - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hid_inv, hjd_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector w; - vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju; - vector mi, mj; - vector f; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3], pja[3]; - vector piu_dt, pju_dt; - vector pih_dt, pjh_dt; - vector ci, cj, v_sig; - vector omega_ij, Pi_ij, balsara; - vector pialpha, pjalpha, alpha_ij, v_sig_u, tc; - int j, k; - -/* Load stuff. */ -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u, - pi[6]->u, pi[7]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, - pj[6]->u, pj[7]->u); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, - pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, - pj[6]->force.soundspeed, pj[7]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha, - pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha, - pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha); -#else -#error -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hid_inv = pow_dimension_plus_one_vec(hi_inv); - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hid_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hjd_inv = pow_dimension_plus_one_vec(hj_inv); - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hjd_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Get the time derivative for h. */ - pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v; - pjh_dt.v = mi.v / pirho.v * dvdr.v * wj_dr.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v; - - /* Compute viscosity parameter */ - alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v); - - /* Compute viscosity tensor */ - Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v); - Pi_ij.v *= (wi_dr.v + wj_dr.v); - - /* Thermal conductivity */ - v_sig_u.v = vec_sqrt(vec_set1(2.f * hydro_gamma_minus_one) * - vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) / - (pirho.v + pjrho.v)); - tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v); - tc.v *= (wi_dr.v + wj_dr.v); - - /* Get the common factor out. */ - w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) + - vec_set1(0.25f) * Pi_ij.v); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * w.v; - pia[k].v = mj.v * f.v; - pja[k].v = mi.v * f.v; - } - - /* Get the time derivative for u. */ - piu_dt.v = - mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v); - pju_dt.v = - mi.v * dvdr.v * (pjPOrho2.v * wj_dr.v + vec_set1(0.125f) * Pi_ij.v); - - /* Add the thermal conductivity */ - piu_dt.v += mj.v * tc.v * (piu.v - pju.v); - pju_dt.v += mi.v * tc.v * (pju.v - piu.v); - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->force.u_dt += piu_dt.f[k]; - pj[k]->force.u_dt += pju_dt.f[k]; - pi[k]->force.h_dt -= pih_dt.f[k]; - pj[k]->force.h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); - pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]); - for (j = 0; j < 3; j++) { - pi[k]->a_hydro[j] -= pia[j].f[k]; - pj[k]->a_hydro[j] += pja[j].f[k]; - } - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - /** * @brief Force loop (non-symmetric version) */ @@ -740,196 +328,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_VECTORIZATION - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hid_inv, hjd_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector w; - vector piPOrho2, pjPOrho2, pirho, pjrho, piu, pju; - vector mj; - vector f; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3]; - vector piu_dt; - vector pih_dt; - vector ci, cj, v_sig; - vector omega_ij, Pi_ij, balsara; - vector pialpha, pjalpha, alpha_ij, v_sig_u, tc; - int j, k; - -/* Load stuff. */ -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u, pi[4]->u, pi[5]->u, - pi[6]->u, pi[7]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u, pj[4]->u, pj[5]->u, - pj[6]->u, pj[7]->u); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, - pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, - pj[6]->force.soundspeed, pj[7]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha, - pi[4]->alpha, pi[5]->alpha, pi[6]->alpha, pi[7]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha, - pj[4]->alpha, pj[5]->alpha, pj[6]->alpha, pj[7]->alpha); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - piu.v = vec_set(pi[0]->u, pi[1]->u, pi[2]->u, pi[3]->u); - pju.v = vec_set(pj[0]->u, pj[1]->u, pj[2]->u, pj[3]->u); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); - pialpha.v = vec_set(pi[0]->alpha, pi[1]->alpha, pi[2]->alpha, pi[3]->alpha); - pjalpha.v = vec_set(pj[0]->alpha, pj[1]->alpha, pj[2]->alpha, pj[3]->alpha); -#else -#error -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hid_inv = pow_dimension_plus_one_vec(hi_inv); - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hid_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hjd_inv = pow_dimension_plus_one_vec(hj_inv); - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hjd_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Get the time derivative for h. */ - pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(2.0f) * omega_ij.v; - - /* Compute viscosity parameter */ - alpha_ij.v = vec_set1(-0.5f) * (pialpha.v + pjalpha.v); - - /* Compute viscosity tensor */ - Pi_ij.v = balsara.v * alpha_ij.v * v_sig.v * omega_ij.v / (pirho.v + pjrho.v); - Pi_ij.v *= (wi_dr.v + wj_dr.v); - - /* Thermal conductivity */ - v_sig_u.v = vec_sqrt(vec_set1(2.f * hydro_gamma_minus_one) * - vec_fabs(pirho.v * piu.v - pjrho.v * pju.v) / - (pirho.v + pjrho.v)); - tc.v = vec_set1(const_conductivity_alpha) * v_sig_u.v / (pirho.v + pjrho.v); - tc.v *= (wi_dr.v + wj_dr.v); - - /* Get the common factor out. */ - w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) + - vec_set1(0.25f) * Pi_ij.v); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * w.v; - pia[k].v = mj.v * f.v; - } - - /* Get the time derivative for u. */ - piu_dt.v = - mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v); - - /* Add the thermal conductivity */ - piu_dt.v += mj.v * tc.v * (piu.v - pju.v); - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->force.u_dt += piu_dt.f[k]; - pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); - for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_nonsym_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} - #endif /* SWIFT_DEFAULT_HYDRO_IACT_H */ diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index cce4fb03ee405fbdf2232184778a0f426e63014a..3851dd6e894eafff29d35942ab0f364b5919e029 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -105,128 +105,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->density.rot_v[2] += facj * curlvr[2]; } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_OLD_VECTORIZATION - - vector r, ri, r2, ui, uj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; - vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; - vector mi, mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr, div_vi, div_vj; - vector curlvr[3], curl_vi[3], curl_vj[3]; - int k, j; - -#if VEC_SIZE == 8 - /* Get the masses. */ - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - /* Get each velocity component. */ - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - /* Get each component of particle separation. - * (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/ - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#else - error("Unknown vector size."); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri = vec_reciprocal_sqrt(r2); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv = vec_reciprocal(hi); - ui.v = r.v * hi_inv.v; - - hj.v = vec_load(Hj); - hj_inv = vec_reciprocal(hj); - uj.v = r.v * hj_inv.v; - - /* Compute the kernel function. */ - kernel_deval_vec(&ui, &wi, &wi_dx); - kernel_deval_vec(&uj, &wj, &wj_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - /* Compute density of pi. */ - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - /* Compute density of pj. */ - rhoj.v = mi.v * wj.v; - rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + uj.v * wj_dx.v); - wcountj.v = wj.v; - wcountj_dh.v = (vec_set1(hydro_dimension) * wj.v + uj.v * wj_dx.v); - div_vj.v = mi.v * dvdr.v * wj_dx.v; - for (k = 0; k < 3; k++) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; - - /* Update particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->density.rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v -= div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k]; - pj[k]->rho += rhoj.f[k]; - pj[k]->density.rho_dh -= rhoj_dh.f[k]; - pj[k]->density.wcount += wcountj.f[k]; - pj[k]->density.wcount_dh -= wcountj_dh.f[k]; - pj[k]->density.div_v -= div_vj.f[k]; - for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += curl_vj[j].f[k]; - } - -#else - - error( - "The Gadget2 serial version of runner_iact_density was called when the " - "vectorised version should have been used."); - -#endif -} - /** * @brief Density loop (non-symmetric version) */ @@ -275,105 +153,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->density.rot_v[2] += fac * curlvr[2]; } -/** - * @brief Density loop (non-symmetric vectorized version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - -#ifdef WITH_OLD_VECTORIZATION - - vector r, ri, r2, ui, hi, hi_inv, wi, wi_dx; - vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; - vector mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr; - vector curlvr[3], curl_vi[3]; - int k, j; - -#if VEC_SIZE == 8 - /* Get the masses. */ - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - /* Get each velocity component. */ - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - /* Get each component of particle separation. - * (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/ - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#else - error("Unknown vector size."); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri = vec_reciprocal_sqrt(r2); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv = vec_reciprocal(hi); - ui.v = r.v * hi_inv.v; - - kernel_deval_vec(&ui, &wi, &wi_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - /* Compute density of pi. */ - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = (vec_set1(hydro_dimension) * wi.v + ui.v * wi_dx.v); - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - /* Update particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->density.rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v -= div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k]; - } - -#else - - error( - "The Gadget2 serial version of runner_iact_nonsym_density was called " - "when the vectorised version should have been used."); - -#endif -} - #ifdef WITH_VECTORIZATION /** @@ -703,199 +482,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->entropy_dt += mi * visc_term * dvdr; } -/** - * @brief Force loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_OLD_VECTORIZATION - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hid_inv, hjd_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector piPOrho2, pjPOrho2, pirho, pjrho; - vector mi, mj; - vector f; - vector grad_hi, grad_hj; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3], pja[3]; - vector pih_dt, pjh_dt; - vector ci, cj, v_sig; - vector omega_ij, mu_ij, fac_mu, balsara; - vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt; - int j, k; - - fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ - -/* Load stuff. */ -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); - grad_hi.v = - vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f, - pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f); - grad_hj.v = - vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f, - pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, - pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, - pj[6]->force.soundspeed, pj[7]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); - grad_hi.v = - vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f); - grad_hj.v = - vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); -#else - error("Unknown vector size."); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri = vec_reciprocal_sqrt(r2); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv = vec_reciprocal(hi); - hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */ - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hid_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv = vec_reciprocal(hj); - hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */ - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hjd_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - // dvdr.v = dvdr.v * ri.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; - - /* Now construct the full viscosity term */ - rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); - visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * - mu_ij.v * balsara.v / rho_ij.v; - - /* Now, convolve with the kernel */ - visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; - sph_term.v = - (grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) * - ri.v; - - /* Eventually get the acceleration */ - acc.v = visc_term.v + sph_term.v; - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * acc.v; - pia[k].v = mj.v * f.v; - pja[k].v = mi.v * f.v; - } - - /* Get the time derivative for h. */ - pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v; - pjh_dt.v = mi.v * dvdr.v * ri.v / pirho.v * wj_dr.v; - - /* Change in entropy */ - entropy_dt.v = visc_term.v * dvdr.v; - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - for (j = 0; j < 3; j++) { - pi[k]->a_hydro[j] -= pia[j].f[k]; - pj[k]->a_hydro[j] += pja[j].f[k]; - } - pi[k]->force.h_dt -= pih_dt.f[k]; - pj[k]->force.h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); - pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]); - pi[k]->entropy_dt += entropy_dt.f[k] * mj.f[k]; - pj[k]->entropy_dt += entropy_dt.f[k] * mi.f[k]; - } - -#else - - error( - "The Gadget2 serial version of runner_iact_nonsym_force was called when " - "the vectorised version should have been used."); - -#endif -} - /** * @brief Force loop (non-symmetric version) */ @@ -985,188 +571,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->entropy_dt += mj * visc_term * dvdr; } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef WITH_OLD_VECTORIZATION - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hid_inv, hjd_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector piPOrho2, pjPOrho2, pirho, pjrho; - vector mj; - vector f; - vector grad_hi, grad_hj; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3]; - vector pih_dt; - vector ci, cj, v_sig; - vector omega_ij, mu_ij, fac_mu, balsara; - vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt; - int j, k; - - fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */ - -/* Load stuff. */ -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); - grad_hi.v = - vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f, - pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f); - grad_hj.v = - vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f, - pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed, - pi[4]->force.soundspeed, pi[5]->force.soundspeed, - pi[6]->force.soundspeed, pi[7]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed, - pj[4]->force.soundspeed, pj[5]->force.soundspeed, - pj[6]->force.soundspeed, pj[7]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); - grad_hi.v = - vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f); - grad_hj.v = - vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, - pi[2]->force.soundspeed, pi[3]->force.soundspeed); - cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed, - pj[2]->force.soundspeed, pj[3]->force.soundspeed); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); -#else - error("Unknown vector size."); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri = vec_reciprocal_sqrt(r2); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv = vec_reciprocal(hi); - hid_inv = pow_dimension_plus_one_vec(hi_inv); - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hid_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv = vec_reciprocal(hj); - hjd_inv = pow_dimension_plus_one_vec(hj_inv); - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hjd_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - // dvdr.v = dvdr.v * ri.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */ - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v; - - /* Now construct the full viscosity term */ - rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v); - visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v * - mu_ij.v * balsara.v / rho_ij.v; - - /* Now, convolve with the kernel */ - visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; - sph_term.v = - (grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) * - ri.v; - - /* Eventually get the acceleration */ - acc.v = visc_term.v + sph_term.v; - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * acc.v; - pia[k].v = mj.v * f.v; - } - - /* Get the time derivative for h. */ - pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v; - - /* Change in entropy */ - entropy_dt.v = mj.v * visc_term.v * dvdr.v; - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k]; - pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]); - pi[k]->entropy_dt += entropy_dt.f[k]; - } - -#else - - error( - "The Gadget2 serial version of runner_iact_nonsym_force was called when " - "the vectorised version should have been used."); - -#endif -} - #ifdef WITH_VECTORIZATION static const vector const_viscosity_alpha_fac = FILL_VEC(-0.25f * const_viscosity_alpha); diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h index 0c7c8251b7d1c105dfc0c4b1637724accadaa4ae..3dd66eecb408213fef5c03fae56c33b7d4de34cb 100644 --- a/src/hydro/Gizmo/hydro_iact.h +++ b/src/hydro/Gizmo/hydro_iact.h @@ -594,37 +594,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0); } - -//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...) - -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "Vectorised versions of the Gizmo interaction functions do not exist " - "yet!"); -} - -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - error( - "Vectorised versions of the Gizmo interaction functions do not exist " - "yet!"); -} - -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "Vectorised versions of the Gizmo interaction functions do not exist " - "yet!"); -} - -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "Vectorised versions of the Gizmo interaction functions do not exist " - "yet!"); -} diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 621177a3363e651e12dd728ad96ddadce3812f0e..0d4db1123d23e59146a2b026700c7b44a9e99f0c 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -70,17 +70,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->density.wcount_dh -= (hydro_dimension * wj + uj * wj_dx); } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "A vectorised version of the Minimal density interaction function does " - "not exist yet!"); -} - /** * @brief Density loop (non-symmetric version) */ @@ -105,17 +94,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->density.wcount_dh -= (hydro_dimension * wi + ui * wi_dx); } -/** - * @brief Density loop (non-symmetric vectorized version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - error( - "A vectorised version of the Minimal non-symmetric density interaction " - "function does not exist yet!"); -} - /** * @brief Force loop */ @@ -216,17 +194,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->force.v_sig = max(pj->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "A vectorised version of the Minimal force interaction function does not " - "exist yet!"); -} - /** * @brief Force loop (non-symmetric version) */ @@ -318,15 +285,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = max(pi->force.v_sig, v_sig); } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - error( - "A vectorised version of the Minimal non-symmetric force interaction " - "function does not exist yet!"); -} - #endif /* SWIFT_MINIMAL_HYDRO_IACT_H */ diff --git a/src/hydro/PressureEntropy/hydro_iact.h b/src/hydro/PressureEntropy/hydro_iact.h index 37a9f2b01af16fe598b414a9f67123849bee1442..171f7911a31b53fce74713e6d7e244bc79c59828 100644 --- a/src/hydro/PressureEntropy/hydro_iact.h +++ b/src/hydro/PressureEntropy/hydro_iact.h @@ -110,16 +110,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->density.rot_v[2] += facj * curlvr[2]; } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - - error("Vectorized version of Pressure-Entropy SPH routine not existant yet."); -} - /** * @brief Density loop (non-symmetric version) */ @@ -173,16 +163,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->density.rot_v[2] += fac * curlvr[2]; } -/** - * @brief Density loop (non-symmetric vectorized version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) { - - error("Vectorized version of Pressure-Entropy SPH routine not existant yet."); -} - /** * @brief Force loop */ @@ -284,16 +264,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( pj->entropy_dt += mi * visc_term * r_inv * dvdr; } -/** - * @brief Force loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - - error("Vectorized version of Pressure-Entropy SPH routine not existant yet."); -} - /** * @brief Force loop (non-symmetric version) */ @@ -388,14 +358,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->entropy_dt += mj * visc_term * r_inv * dvdr; } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - - error("Vectorized version of Pressure-Entropy SPH routine not existant yet."); -} - #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_IACT_H */ diff --git a/src/hydro/Shadowswift/hydro_iact.h b/src/hydro/Shadowswift/hydro_iact.h index 63f7bdc6900c8fe9887879f3ff7e6c5eaff1b281..28c3ce5521c8f15a971d47dbb3e48cdb2a57e77d 100644 --- a/src/hydro/Shadowswift/hydro_iact.h +++ b/src/hydro/Shadowswift/hydro_iact.h @@ -346,20 +346,3 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0); } -//// EMPTY VECTORIZED VERSIONS (gradients methods are missing...) - -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) {} - -__attribute__((always_inline)) INLINE static void -runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, - struct part **pi, struct part **pj) {} - -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) {} - -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) {} diff --git a/src/hydro_io.h b/src/hydro_io.h index 202d724f821b570b427210bb48b7070563513458..29044de30960d1e80590d63609063c9552c79308 100644 --- a/src/hydro_io.h +++ b/src/hydro_io.h @@ -19,10 +19,13 @@ #ifndef SWIFT_HYDRO_IO_H #define SWIFT_HYDRO_IO_H -#include "./const.h" +/* Config parameters. */ +#include "../config.h" /* Import the right functions */ -#if defined(MINIMAL_SPH) +#if defined(DEBUG_INTERACTIONS_SPH) +#include "./hydro/DebugInteractions/hydro_io.h" +#elif defined(MINIMAL_SPH) #include "./hydro/Minimal/hydro_io.h" #elif defined(GADGET2_SPH) #include "./hydro/Gadget2/hydro_io.h" diff --git a/src/part.h b/src/part.h index 1b40aee0db3deb4790e07e3da9807060900d0c55..ebe0ae41c6a292e5ec140dcaf32c4620c7f8586f 100644 --- a/src/part.h +++ b/src/part.h @@ -42,7 +42,10 @@ #define gpart_align 128 /* Import the right hydro particle definition */ -#if defined(MINIMAL_SPH) +#if defined(DEBUG_INTERACTIONS_SPH) +#include "./hydro/DebugInteractions/hydro_part.h" +#define hydro_need_extra_init_loop 0 +#elif defined(MINIMAL_SPH) #include "./hydro/Minimal/hydro_part.h" #define hydro_need_extra_init_loop 0 #elif defined(GADGET2_SPH) diff --git a/src/runner.c b/src/runner.c index 0ceba7b04e1f76080deb14f2de3e69c41f8d174c..87e517073586f7b67eb63450f6f7af54a2813a4c 100644 --- a/src/runner.c +++ b/src/runner.c @@ -669,8 +669,8 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { for (int i = 0; i < count; i++) { /* Get a direct pointer on the part. */ - struct part *restrict p = &parts[pid[i]]; - struct xpart *restrict xp = &xparts[pid[i]]; + struct part *p = &parts[pid[i]]; + struct xpart *xp = &xparts[pid[i]]; #ifdef SWIFT_DEBUG_CHECKS /* Is this part within the timestep? */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 14eecaa2c0c126b721a400e63298e1d55a9c98da..5812b9641dbb371eff74c1b9a1c4c68b8f1d3f79 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -125,14 +125,6 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); #endif -#ifdef WITH_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; -#endif TIMER_TIC; /* Anything to do here? */ @@ -157,93 +149,44 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[pid]; + const int pi_active = part_is_active(pi, e); const float hi = pi->h; - - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; const float hig2 = hi * hi * kernel_gamma2; + const float pix[3] = {pi->x[0] - (cj->loc[0] + shift[0]), + pi->x[1] - (cj->loc[1] + shift[1]), + pi->x[2] - (cj->loc[2] + shift[2])}; /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j; pjd++) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; + const float hj = pj->h; + const float hjg2 = hj * hj * kernel_gamma2; + const int pj_active = part_is_active(pj, e); /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + const float pjx[3] = {pj->x[0] - cj->loc[0], pj->x[1] - cj->loc[1], + pj->x[2] - cj->loc[2]}; + float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ - if (r2 < hig2) { + if (r2 < hig2 && pi_active) { -#ifndef WITH_VECTORIZATION - - IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj); } - if (r2 < pj->h * pj->h * kernel_gamma2) { - -#ifndef WITH_VECTORIZATION - - for (int k = 0; k < 3; k++) dx[k] = -dx[k]; - IACT_NONSYM(r2, dx, pj->h, hi, pj, pi); + if (r2 < hjg2 && pj_active) { -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = -dx[0]; - dxq[3 * icount + 1] = -dx[1]; - dxq[3 * icount + 2] = -dx[2]; - hiq[icount] = pj->h; - hjq[icount] = hi; - piq[icount] = pj; - pjq[icount] = pi; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; -#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi); } } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ - -#ifdef WITH_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif + } /* loop over the parts in ci. */ TIMER_TOC(TIMER_DOPAIR); } @@ -266,14 +209,6 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); #endif -#ifdef WITH_OLD_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; -#endif TIMER_TIC; /* Anything to do here? */ @@ -298,65 +233,48 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[pid]; + const int pi_active = part_is_active(pi, e); const float hi = pi->h; - - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; const float hig2 = hi * hi * kernel_gamma2; + const float pix[3] = {pi->x[0] - (cj->loc[0] + shift[0]), + pi->x[1] - (cj->loc[1] + shift[1]), + pi->x[2] - (cj->loc[2] + shift[2])}; /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j; pjd++) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; + const int pj_active = part_is_active(pj, e); + const float hj = pj->h; + const float hjg2 = hj * hj * kernel_gamma2; /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + const float pjx[3] = {pj->x[0] - cj->loc[0], pj->x[1] - cj->loc[1], + pj->x[2] - cj->loc[2]}; + float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ - if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { + if (r2 < hig2 || r2 < hjg2) { -#ifndef WITH_OLD_VECTORIZATION + if (pi_active && pj_active) { - IACT(r2, dx, hi, pj->h, pi, pj); + IACT(r2, dx, hi, hj, pi, pj); + } else if (pi_active) { -#else + IACT_NONSYM(r2, dx, hi, hj, pi, pj); + } else if (pj_active) { - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; -#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi); + } } - } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ - -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif + } /* loop over the parts in ci. */ TIMER_TOC(TIMER_DOPAIR); } @@ -377,15 +295,6 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { error("Don't use in actual runs ! Slow code !"); #endif -#ifdef WITH_OLD_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; -#endif - TIMER_TIC; /* Anything to do here? */ @@ -402,12 +311,15 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]}; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; + const int pi_active = part_is_active(pi, e); /* Loop over the parts in cj. */ for (int pjd = pid + 1; pjd < count; pjd++) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; + const float hj = pj->h; + const int pj_active = part_is_active(pj, e); /* Compute the pairwise distance. */ float r2 = 0.0f; @@ -416,47 +328,31 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } + const float hjg2 = hj * hj * kernel_gamma2; /* Hit or miss? */ - if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { + if (r2 < hig2 || r2 < hjg2) { -#ifndef WITH_OLD_VECTORIZATION + if (pi_active && pj_active) { - IACT(r2, dx, hi, pj->h, pi, pj); + IACT(r2, dx, hi, hj, pi, pj); + } else if (pi_active) { -#else + IACT_NONSYM(r2, dx, hi, hj, pi, pj); + } else if (pj_active) { - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; -#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi); + } } } /* loop over the parts in cj. */ } /* loop over the parts in ci. */ -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif - TIMER_TOC(TIMER_DOSELF); } @@ -479,13 +375,8 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, const struct engine *e = r->e; -#ifdef WITH_OLD_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; +#ifndef SWIFT_DEBUG_CHECKS + error("Don't use in actual runs ! Slow code !"); #endif TIMER_TIC; @@ -543,42 +434,10 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_OLD_VECTORIZATION - IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif } - } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ - -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif + } /* loop over the parts in ci. */ TIMER_TOC(timer_dopair_subset_naive); } @@ -598,16 +457,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts_i, int *restrict ind, int count, struct cell *restrict cj) { - struct engine *e = r->e; - -#ifdef WITH_OLD_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; -#endif + const struct engine *e = r->e; TIMER_TIC; @@ -651,62 +501,35 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[ind[pid]]; - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; - + const double pix = pi->x[0] - (shift[0]); + const double piy = pi->x[1] - (shift[1]); + const double piz = pi->x[2] - (shift[2]); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; - const float di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] + - pix[1] * runner_shift[sid][1] + - pix[2] * runner_shift[sid][2]; + const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] + + piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_OLD_VECTORIZATION - - IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj); } - } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ - + } /* loop over the parts in ci. */ } /* Parts are on the right. */ @@ -717,69 +540,37 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[ind[pid]]; - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const double pix = pi->x[0] - (shift[0]); + const double piy = pi->x[1] - (shift[1]); + const double piz = pi->x[2] - (shift[2]); const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; - const float di = - -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] + - pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2]; + const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] + + piy * runner_shift[sid][1] + piz * runner_shift[sid][2]; /* Loop over the parts in cj. */ for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; + const double pjx = pj->x[0]; + const double pjy = pj->x[1]; + const double pjz = pj->x[2]; /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_OLD_VECTORIZATION - - IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj); } - } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ + } /* loop over the parts in ci. */ } -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif - TIMER_TOC(timer_dopair_subset); } @@ -796,13 +587,8 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts, int *restrict ind, int count) { -#ifdef WITH_OLD_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; +#ifdef SWIFT_DEBUG_CHECKS + const struct engine *e = r->e; #endif TIMER_TIC; @@ -814,11 +600,16 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, for (int pid = 0; pid < count; pid++) { /* Get a hold of the ith part in ci. */ - struct part *restrict pi = &parts[ind[pid]]; - const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]}; + struct part *pi = &parts[ind[pid]]; + const float pix[3] = {pi->x[0] - ci->loc[0], pi->x[1] - ci->loc[1], + pi->x[2] - ci->loc[2]}; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; +#ifdef SWIFT_DEBUG_CHECKS + if (!part_is_active(pi, e)) error("Inactive particle in subset function!"); +#endif + /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_i; pjd++) { @@ -826,52 +617,18 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict pj = &parts_j[pjd]; /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + const float pjx[3] = {pj->x[0] - ci->loc[0], pj->x[1] - ci->loc[1], + pj->x[2] - ci->loc[2]}; + float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ - if (r2 > 0.0f && r2 < hig2) { - -#ifndef WITH_OLD_VECTORIZATION + if (r2 > 0.f && r2 < hig2) { IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif } - } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ - -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif + } /* loop over the parts in ci. */ TIMER_TOC(timer_doself_subset); } @@ -890,14 +647,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const struct engine *restrict e = r->e; -#ifdef WITH_OLD_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; -#endif + // DOPAIR1_NAIVE(r, ci, cj); + // return; TIMER_TIC; @@ -961,28 +712,34 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[sort_i[pid].i]; - if (!part_is_active(pi, e)) continue; const float hi = pi->h; + + /* Skip inactive particles */ + if (!part_is_active(pi, e)) continue; + + /* Is there anything we need to interact with ? */ const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; if (di < dj_min) continue; - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + /* Get some additional information about pi */ const float hig2 = hi * hi * kernel_gamma2; + const float pix = pi->x[0] - (cj->loc[0] + shift[0]); + const float piy = pi->x[1] - (cj->loc[1] + shift[1]); + const float piz = pi->x[2] - (cj->loc[2] + shift[2]); /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; + /* Recover pj */ + struct part *pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; + const float pjx = pj->x[0] - cj->loc[0]; + const float pjy = pj->x[1] - cj->loc[1]; + const float pjz = pj->x[2] - cj->loc[2]; /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ @@ -995,37 +752,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_OLD_VECTORIZATION - - IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj); } - } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ - - } /* Cell ci is active */ + } /* loop over the parts in ci. */ + } /* Cell ci is active */ if (cell_is_active(cj, e)) { @@ -1034,29 +765,35 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, pjd++) { /* Get a hold of the jth part in cj. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - if (!part_is_active(pj, e)) continue; + struct part *pj = &parts_j[sort_j[pjd].i]; const float hj = pj->h; + + /* Skip inactive particles */ + if (!part_is_active(pj, e)) continue; + + /* Is there anything we need to interact with ? */ const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; if (dj - rshift > di_max) continue; - double pjx[3]; - for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + /* Get some additional information about pj */ const float hjg2 = hj * hj * kernel_gamma2; + const float pjx = pj->x[0] - cj->loc[0]; + const float pjy = pj->x[1] - cj->loc[1]; + const float pjz = pj->x[2] - cj->loc[2]; /* Loop over the parts in ci. */ for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { - /* Get a pointer to the jth particle. */ - struct part *restrict pi = &parts_i[sort_i[pid].i]; + /* Recover pi */ + struct part *pi = &parts_i[sort_i[pid].i]; + const float hi = pi->h; + const float pix = pi->x[0] - (cj->loc[0] + shift[0]); + const float piy = pi->x[1] - (cj->loc[1] + shift[1]); + const float piz = pi->x[2] - (cj->loc[2] + shift[2]); /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pjx[k] - pi->x[k]; - r2 += dx[k] * dx[k]; - } + float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ @@ -1069,44 +806,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? */ if (r2 < hjg2) { -#ifndef WITH_OLD_VECTORIZATION - - IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hj; - hjq[icount] = pi->h; - piq[icount] = pj; - pjq[icount] = pi; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi); } - } /* loop over the parts in ci. */ - - } /* loop over the parts in cj. */ - - } /* Cell cj is active */ - -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (int k = 0; k < icount; k++) - IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif + } /* loop over the parts in cj. */ + } /* Cell cj is active */ TIMER_TOC(TIMER_DOPAIR); } @@ -1165,20 +869,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; -#ifdef WITH_OLD_VECTORIZATION - int icount1 = 0; - float r2q1[VEC_SIZE] __attribute__((aligned(16))); - float hiq1[VEC_SIZE] __attribute__((aligned(16))); - float hjq1[VEC_SIZE] __attribute__((aligned(16))); - float dxq1[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE]; - int icount2 = 0; - float r2q2[VEC_SIZE] __attribute__((aligned(16))); - float hiq2[VEC_SIZE] __attribute__((aligned(16))); - float hjq2[VEC_SIZE] __attribute__((aligned(16))); - float dxq2[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE]; -#endif + // DOPAIR2_NAIVE(r, ci, cj); + // return; TIMER_TIC; @@ -1205,8 +897,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - struct entry *restrict sort_i = ci->sort[sid]; - struct entry *restrict sort_j = cj->sort[sid]; + const struct entry *restrict sort_i = ci->sort[sid]; + const struct entry *restrict sort_j = cj->sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Check that the dx_max_sort values in the cell are indeed an upper @@ -1242,378 +934,123 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { #endif /* SWIFT_DEBUG_CHECKS */ /* Get some other useful values. */ - const double hi_max = ci->h_max * kernel_gamma - rshift; - const double hj_max = cj->h_max * kernel_gamma; + const double hi_max = ci->h_max; + const double hj_max = cj->h_max; const int count_i = ci->count; const int count_j = cj->count; struct part *restrict parts_i = ci->parts; struct part *restrict parts_j = cj->parts; - const double di_max = sort_i[count_i - 1].d - rshift; + const double di_max = sort_i[count_i - 1].d; const double dj_min = sort_j[0].d; const double dx_max = (ci->dx_max_sort + cj->dx_max_sort); - /* Collect the number of parts left and right below dt. */ - int countdt_i = 0, countdt_j = 0; - struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL; - if (cell_is_all_active(ci, e)) { - sortdt_i = sort_i; - countdt_i = count_i; - } else if (cell_is_active(ci, e)) { - if (posix_memalign((void *)&sortdt_i, VEC_SIZE * sizeof(float), - sizeof(struct entry) * count_i) != 0) - error("Failed to allocate dt sortlists."); - for (int k = 0; k < count_i; k++) - if (part_is_active(&parts_i[sort_i[k].i], e)) { - sortdt_i[countdt_i] = sort_i[k]; - countdt_i += 1; - } - } - if (cell_is_all_active(cj, e)) { - sortdt_j = sort_j; - countdt_j = count_j; - } else if (cell_is_active(cj, e)) { - if (posix_memalign((void *)&sortdt_j, VEC_SIZE * sizeof(float), - sizeof(struct entry) * count_j) != 0) - error("Failed to allocate dt sortlists."); - for (int k = 0; k < count_j; k++) - if (part_is_active(&parts_j[sort_j[k].i], e)) { - sortdt_j[countdt_j] = sort_j[k]; - countdt_j += 1; - } - } - - /* Loop over the parts in ci. */ - for (int pid = count_i - 1; - pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { - - /* Get a hold of the ith part in ci. */ - struct part *restrict pi = &parts_i[sort_i[pid].i]; - const float hi = pi->h; - const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; - if (di < dj_min) continue; - - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; - const float hig2 = hi * hi * kernel_gamma2; - - /* Look at valid dt parts only? */ - if (!part_is_active(pi, e)) { - - /* Loop over the parts in cj within dt. */ - for (int pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sortdt_j[pjd].i]; - const float hj = pj->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pj->x[k] - pix[k]; - r2 += dx[k] * dx[k]; - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - -#ifndef WITH_OLD_VECTORIZATION + if (cell_is_active(ci, e)) { - IACT_NONSYM(r2, dx, hj, hi, pj, pi); + /* Loop over the parts in ci until nothing is within range in cj. + * We start from the centre and move outwards. */ + for (int pid = count_i - 1; pid >= 0; pid--) { -#else + /* Get a hold of the ith part in ci. */ + struct part *pi = &parts_i[sort_i[pid].i]; + const float hi = pi->h; - /* Add this interaction to the queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hj; - hjq1[icount1] = hi; - piq1[icount1] = pj; - pjq1[icount1] = pi; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } + /* Skip inactive particles */ + if (!part_is_active(pi, e)) continue; -#endif - } + /* Is there anything we need to interact with ? */ + const double di = + sort_i[pid].d + max(hi, hj_max) * kernel_gamma + dx_max - rshift; + if (di < dj_min) continue; - } /* loop over the parts in cj. */ + /* Where do we have to stop when looping over cell j? */ + int last_pj = count_j - 1; + while (sort_j[last_pj].d > di && last_pj > 0) last_pj--; - } + last_pj += 1; + if (last_pj >= count_j) last_pj = count_j - 1; - /* Otherwise, look at all parts. */ - else { + /* Get some additional information about pi */ + const float hig2 = hi * hi * kernel_gamma2; + const float pix = pi->x[0] - (cj->loc[0] + shift[0]); + const float piy = pi->x[1] - (cj->loc[1] + shift[1]); + const float piz = pi->x[2] - (cj->loc[2] + shift[2]); - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + /* Now loop over the relevant particles in cj */ + for (int pjd = 0; pjd <= last_pj; ++pjd) { - /* Get a pointer to the jth particle. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; + /* Recover pj */ + struct part *pj = &parts_j[sort_j[pjd].i]; const float hj = pj->h; + const float hjg2 = hj * hj * kernel_gamma2; + const float pjx = pj->x[0] - cj->loc[0]; + const float pjy = pj->x[1] - cj->loc[1]; + const float pjz = pj->x[2] - cj->loc[2]; /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hig2) { - -#ifndef WITH_OLD_VECTORIZATION - - /* Does pj need to be updated too? */ - if (part_is_active(pj, e)) - IACT(r2, dx, hi, hj, pi, pj); - else - IACT_NONSYM(r2, dx, hi, hj, pi, pj); - -#else - - /* Does pj need to be updated too? */ - if (part_is_active(pj, e)) { - - /* Add this interaction to the symmetric queue. */ - r2q2[icount2] = r2; - dxq2[3 * icount2 + 0] = dx[0]; - dxq2[3 * icount2 + 1] = dx[1]; - dxq2[3 * icount2 + 2] = dx[2]; - hiq2[icount2] = hi; - hjq2[icount2] = hj; - piq2[icount2] = pi; - pjq2[icount2] = pj; - icount2 += 1; - - /* Flush? */ - if (icount2 == VEC_SIZE) { - IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2); - icount2 = 0; - } - - } else { - - /* Add this interaction to the non-symmetric queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hi; - hjq1[icount1] = hj; - piq1[icount1] = pi; - pjq1[icount1] = pj; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - } - -#endif - } - - } /* loop over the parts in cj. */ - } - - } /* loop over the parts in ci. */ - - /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; - pjd++) { - - /* Get a hold of the jth part in cj. */ - struct part *restrict pj = &parts_j[sort_j[pjd].i]; - const float hj = pj->h; - const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; - if (dj - rshift > di_max) continue; - - double pjx[3]; - for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; - const float hjg2 = hj * hj * kernel_gamma2; + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Is this particle outside the dt? */ - if (!part_is_active(pj, e)) { + if (r2 < hig2 || r2 < hjg2) { - /* Loop over the parts in ci. */ - for (int pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) { - - /* Get a pointer to the jth particle. */ - struct part *restrict pi = &parts_i[sortdt_i[pid].i]; - const float hi = pi->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pi->x[k] - pjx[k]; - r2 += dx[k] * dx[k]; + IACT_NONSYM(r2, dx, hi, hj, pi, pj); } + } /* Loop over the parts in cj */ + } /* Loop over the parts in ci */ + } /* Cell ci is active */ -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { + if (cell_is_active(cj, e)) { -#ifndef WITH_OLD_VECTORIZATION + /* Loop over the parts in cj until nothing is within range in ci. + * We start from the centre and move outwards. */ + for (int pjd = 0; pjd < count_j; pjd++) { - IACT_NONSYM(r2, dx, hi, hj, pi, pj); + /* Get a hold of the jth part in cj. */ + struct part *pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; -#else + /* Skip inactive particles */ + if (!part_is_active(pj, e)) continue; - /* Add this interaction to the queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hi; - hjq1[icount1] = hj; - piq1[icount1] = pi; - pjq1[icount1] = pj; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } + /* Is there anything we need to interact with ? */ + const double dj = sort_j[pjd].d - max(hj, hi_max) * kernel_gamma - dx_max; + if (dj > di_max - rshift) continue; -#endif - } + /* Where do we have to stop when looping over cell i? */ + int first_pi = 0; + while (sort_i[first_pi].d - rshift < dj && first_pi < count_i - 1) + first_pi++; - } /* loop over the parts in cj. */ - } + first_pi -= 1; + if (first_pi < 0) first_pi = 0; - /* Otherwise, interact with all particles in cj. */ - else { + /* Get some additional information about pj */ + const float hjg2 = hj * hj * kernel_gamma2; + const float pjx = pj->x[0] - cj->loc[0]; + const float pjy = pj->x[1] - cj->loc[1]; + const float pjz = pj->x[2] - cj->loc[2]; - /* Loop over the parts in ci. */ - for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { + /* Now loop over the relevant particles in ci */ + for (int pid = count_i - 1; pid >= first_pi; --pid) { - /* Get a pointer to the jth particle. */ - struct part *restrict pi = &parts_i[sort_i[pid].i]; + /* Recover pi */ + struct part *pi = &parts_i[sort_i[pid].i]; const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float pix = pi->x[0] - (cj->loc[0] + shift[0]); + const float piy = pi->x[1] - (cj->loc[1] + shift[1]); + const float piz = pi->x[2] - (cj->loc[2] + shift[2]); /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pjx[k] - pi->x[k]; - r2 += dx[k] * dx[k]; - } - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (pi->ti_drift != e->ti_current) - error("Particle pi not drifted to current time"); - if (pj->ti_drift != e->ti_current) - error("Particle pj not drifted to current time"); -#endif - - /* Hit or miss? */ - if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { + float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; -#ifndef WITH_OLD_VECTORIZATION + if (r2 < hjg2 || r2 < hig2) { - /* Does pi need to be updated too? */ - if (part_is_active(pi, e)) - IACT(r2, dx, hj, hi, pj, pi); - else - IACT_NONSYM(r2, dx, hj, hi, pj, pi); - -#else - - /* Does pi need to be updated too? */ - if (part_is_active(pi, e)) { - - /* Add this interaction to the symmetric queue. */ - r2q2[icount2] = r2; - dxq2[3 * icount2 + 0] = dx[0]; - dxq2[3 * icount2 + 1] = dx[1]; - dxq2[3 * icount2 + 2] = dx[2]; - hiq2[icount2] = hj; - hjq2[icount2] = hi; - piq2[icount2] = pj; - pjq2[icount2] = pi; - icount2 += 1; - - /* Flush? */ - if (icount2 == VEC_SIZE) { - IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2); - icount2 = 0; - } - - } else { - - /* Add this interaction to the non-symmetric queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hj; - hjq1[icount1] = hi; - piq1[icount1] = pj; - pjq1[icount1] = pi; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - } - -#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi); } - - } /* loop over the parts in cj. */ - } - - } /* loop over the parts in ci. */ - -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount1 > 0) - for (int k = 0; k < icount1; k++) - IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]); - if (icount2 > 0) - for (int k = 0; k < icount2; k++) - IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); -#endif - - /* Clean-up if necessary */ - if (cell_is_active(ci, e) && !cell_is_all_active(ci, e)) free(sortdt_i); - if (cell_is_active(cj, e) && !cell_is_all_active(cj, e)) free(sortdt_j); + } /* Loop over the parts in ci */ + } /* Loop over the parts in cj */ + } /* Cell cj is active */ TIMER_TOC(TIMER_DOPAIR); } @@ -1628,21 +1065,6 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_OLD_VECTORIZATION - int icount1 = 0; - float r2q1[VEC_SIZE] __attribute__((aligned(16))); - float hiq1[VEC_SIZE] __attribute__((aligned(16))); - float hjq1[VEC_SIZE] __attribute__((aligned(16))); - float dxq1[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE]; - int icount2 = 0; - float r2q2[VEC_SIZE] __attribute__((aligned(16))); - float hiq2[VEC_SIZE] __attribute__((aligned(16))); - float hjq2[VEC_SIZE] __attribute__((aligned(16))); - float dxq2[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE]; -#endif - TIMER_TIC; if (!cell_is_active(c, e)) return; @@ -1705,34 +1127,9 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_OLD_VECTORIZATION - IACT_NONSYM(r2, dx, hj, hi, pj, pi); - -#else - - /* Add this interaction to the queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hj; - hjq1[icount1] = hi; - piq1[icount1] = pj; - pjq1[icount1] = pi; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - -#endif } - } /* loop over all other particles. */ - } /* Otherwise, interact with all candidates. */ @@ -1769,8 +1166,6 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || doj) { -#ifndef WITH_OLD_VECTORIZATION - /* Which parts need to be updated? */ if (r2 < hig2 && doj) IACT(r2, dx, hi, hj, pi, pj); @@ -1782,86 +1177,11 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { dx[2] = -dx[2]; IACT_NONSYM(r2, dx, hj, hi, pj, pi); } - -#else - - /* Does pj need to be updated too? */ - if (r2 < hig2 && doj) { - - /* Add this interaction to the symmetric queue. */ - r2q2[icount2] = r2; - dxq2[3 * icount2 + 0] = dx[0]; - dxq2[3 * icount2 + 1] = dx[1]; - dxq2[3 * icount2 + 2] = dx[2]; - hiq2[icount2] = hi; - hjq2[icount2] = hj; - piq2[icount2] = pi; - pjq2[icount2] = pj; - icount2 += 1; - - /* Flush? */ - if (icount2 == VEC_SIZE) { - IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2); - icount2 = 0; - } - - } else if (!doj) { - - /* Add this interaction to the non-symmetric queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hi; - hjq1[icount1] = hj; - piq1[icount1] = pi; - pjq1[icount1] = pj; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - - } else { - - /* Add this interaction to the non-symmetric queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = -dx[0]; - dxq1[3 * icount1 + 1] = -dx[1]; - dxq1[3 * icount1 + 2] = -dx[2]; - hiq1[icount1] = hj; - hjq1[icount1] = hi; - piq1[icount1] = pj; - pjq1[icount1] = pi; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - } - -#endif } - } /* loop over all other particles. */ } - } /* loop over all particles. */ -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount1 > 0) - for (int k = 0; k < icount1; k++) - IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]); - if (icount2 > 0) - for (int k = 0; k < icount2; k++) - IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); -#endif - free(indt); TIMER_TOC(TIMER_DOSELF); @@ -1877,25 +1197,9 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_OLD_VECTORIZATION - int icount1 = 0; - float r2q1[VEC_SIZE] __attribute__((aligned(16))); - float hiq1[VEC_SIZE] __attribute__((aligned(16))); - float hjq1[VEC_SIZE] __attribute__((aligned(16))); - float dxq1[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE]; - int icount2 = 0; - float r2q2[VEC_SIZE] __attribute__((aligned(16))); - float hiq2[VEC_SIZE] __attribute__((aligned(16))); - float hjq2[VEC_SIZE] __attribute__((aligned(16))); - float dxq2[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE]; -#endif - TIMER_TIC; if (!cell_is_active(c, e)) return; - if (!cell_are_part_drifted(c, e)) error("Cell is not drifted"); struct part *restrict parts = c->parts; @@ -1954,34 +1258,9 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_OLD_VECTORIZATION - IACT_NONSYM(r2, dx, hj, hi, pj, pi); - -#else - - /* Add this interaction to the queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hj; - hjq1[icount1] = hi; - piq1[icount1] = pj; - pjq1[icount1] = pi; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - -#endif } - } /* loop over all other particles. */ - } /* Otherwise, interact with all candidates. */ @@ -2016,74 +1295,16 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_OLD_VECTORIZATION - /* Does pj need to be updated too? */ if (part_is_active(pj, e)) IACT(r2, dx, hi, hj, pi, pj); else IACT_NONSYM(r2, dx, hi, hj, pi, pj); - -#else - - /* Does pj need to be updated too? */ - if (part_is_active(pj, e)) { - - /* Add this interaction to the symmetric queue. */ - r2q2[icount2] = r2; - dxq2[3 * icount2 + 0] = dx[0]; - dxq2[3 * icount2 + 1] = dx[1]; - dxq2[3 * icount2 + 2] = dx[2]; - hiq2[icount2] = hi; - hjq2[icount2] = hj; - piq2[icount2] = pi; - pjq2[icount2] = pj; - icount2 += 1; - - /* Flush? */ - if (icount2 == VEC_SIZE) { - IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2); - icount2 = 0; - } - - } else { - - /* Add this interaction to the non-symmetric queue. */ - r2q1[icount1] = r2; - dxq1[3 * icount1 + 0] = dx[0]; - dxq1[3 * icount1 + 1] = dx[1]; - dxq1[3 * icount1 + 2] = dx[2]; - hiq1[icount1] = hi; - hjq1[icount1] = hj; - piq1[icount1] = pi; - pjq1[icount1] = pj; - icount1 += 1; - - /* Flush? */ - if (icount1 == VEC_SIZE) { - IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1); - icount1 = 0; - } - } - -#endif } - } /* loop over all other particles. */ } - } /* loop over all particles. */ -#ifdef WITH_OLD_VECTORIZATION - /* Pick up any leftovers. */ - if (icount1 > 0) - for (int k = 0; k < icount1; k++) - IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]); - if (icount2 > 0) - for (int k = 0; k < icount2; k++) - IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); -#endif - free(indt); TIMER_TOC(TIMER_DOSELF); diff --git a/src/swift.h b/src/swift.h index 1d1a7c7d04b3662c524504c292aa7d9eee2c3d09..a5556730ae965109385257c0c38bfc34277223d4 100644 --- a/src/swift.h +++ b/src/swift.h @@ -35,6 +35,7 @@ #include "engine.h" #include "error.h" #include "gravity.h" +#include "gravity_derivatives.h" #include "gravity_properties.h" #include "hydro.h" #include "hydro_properties.h" diff --git a/tests/testGravityDerivatives.c b/tests/testGravityDerivatives.c index 77240f397ee34753a05a6d18e8855bb226d7bb82..aa9941976cede293d6b8f90e5bfb19b599eb03e9 100644 --- a/tests/testGravityDerivatives.c +++ b/tests/testGravityDerivatives.c @@ -930,11 +930,8 @@ int main() { unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); - /* Choke on FP-exceptions */ - feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); - /* Relative tolerance */ - const double tol = 1e-4; + double tol = 1e-4; /* Get some randomness going */ const int seed = time(NULL); @@ -987,6 +984,8 @@ int main() { #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + tol *= 2.; + /* 3rd order terms */ test(pot.D_300, D_300(dx, dy, dz, r_inv), tol, min, "D_300"); test(pot.D_030, D_030(dx, dy, dz, r_inv), tol, min, "D_030"); @@ -1021,6 +1020,8 @@ int main() { #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + tol *= 2.; + /* 5th order terms */ test(pot.D_500, D_500(dx, dy, dz, r_inv), tol, min, "D_500"); test(pot.D_050, D_050(dx, dy, dz, r_inv), tol, min, "D_050"); diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat index aa86962b733e2da73211bceeb30b2345af808bb5..e1fd7518e55811386967938dfff0d62f04325cb0 100644 --- a/tests/tolerance_27_perturbed.dat +++ b/tests/tolerance_27_perturbed.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 1e-2 1e-5 3e-6 3e-6 7e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-5 2e-3 6e-5 2e-3 2e-3 2e-3 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 4e-4 1e-6 1e0 1e-6 2e-6 2e-6 2e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.3e-3 1e-5 2e-3 6e-5 2e-3 2e-3 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-6 1e0 1e-6 2e-6 2e-6 2e-6