/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Matthieu Schaller (schaller@strw.leidenuniv.nl)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
******************************************************************************/
#ifndef SWIFT_GRAVITY_CACHE_H
#define SWIFT_GRAVITY_CACHE_H
/* Config parameters. */
#include
/* Local headers */
#include "accumulate.h"
#include "align.h"
#include "error.h"
#include "gravity.h"
#include "multipole_accept.h"
#include "vector.h"
/**
* @brief A SoA object for the #gpart of a cell.
*
* This is used to help vectorize the leaf-leaf gravity interactions.
*/
struct gravity_cache {
/*! #gpart x position. */
float *restrict x SWIFT_CACHE_ALIGN;
/*! #gpart y position. */
float *restrict y SWIFT_CACHE_ALIGN;
/*! #gpart z position. */
float *restrict z SWIFT_CACHE_ALIGN;
/*! #gpart softening length. */
float *restrict epsilon SWIFT_CACHE_ALIGN;
/*! #gpart mass. */
float *restrict m SWIFT_CACHE_ALIGN;
/*! #gpart x acceleration. */
float *restrict a_x SWIFT_CACHE_ALIGN;
/*! #gpart y acceleration. */
float *restrict a_y SWIFT_CACHE_ALIGN;
/*! #gpart z acceleration. */
float *restrict a_z SWIFT_CACHE_ALIGN;
/*! #gpart potential. */
float *restrict pot SWIFT_CACHE_ALIGN;
/*! Is this #gpart active ? */
int *restrict active SWIFT_CACHE_ALIGN;
/*! Can this #gpart use a M2P interaction ? */
int *restrict use_mpole SWIFT_CACHE_ALIGN;
/*! Cache size */
int count;
};
/**
* @brief Frees the memory allocated in a #gravity_cache
*
* @param c The #gravity_cache to free.
*/
static INLINE void gravity_cache_clean(struct gravity_cache *c) {
if (c->count > 0) {
swift_free("gravity_cache", c->x);
swift_free("gravity_cache", c->y);
swift_free("gravity_cache", c->z);
swift_free("gravity_cache", c->epsilon);
swift_free("gravity_cache", c->m);
swift_free("gravity_cache", c->a_x);
swift_free("gravity_cache", c->a_y);
swift_free("gravity_cache", c->a_z);
swift_free("gravity_cache", c->pot);
swift_free("gravity_cache", c->active);
swift_free("gravity_cache", c->use_mpole);
}
c->count = 0;
}
/**
* @brief Allocates memory for the #gpart caches used in the leaf-leaf
* interactions.
*
* The cache is padded for the vector size and aligned properly
*
* @param c The #gravity_cache to allocate.
* @param count The number of #gpart to allocated for (space_splitsize is a good
* choice).
*/
static INLINE void gravity_cache_init(struct gravity_cache *c,
const int count) {
/* Size of the gravity cache */
const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE;
const size_t sizeBytesF = padded_count * sizeof(float);
const size_t sizeBytesI = padded_count * sizeof(int);
/* Delete old stuff if any */
gravity_cache_clean(c);
int e = 0;
e += swift_memalign("gravity_cache", (void **)&c->x, SWIFT_CACHE_ALIGNMENT,
sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->y, SWIFT_CACHE_ALIGNMENT,
sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->z, SWIFT_CACHE_ALIGNMENT,
sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->epsilon,
SWIFT_CACHE_ALIGNMENT, sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->m, SWIFT_CACHE_ALIGNMENT,
sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->a_x, SWIFT_CACHE_ALIGNMENT,
sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->a_y, SWIFT_CACHE_ALIGNMENT,
sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->a_z, SWIFT_CACHE_ALIGNMENT,
sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->pot, SWIFT_CACHE_ALIGNMENT,
sizeBytesF);
e += swift_memalign("gravity_cache", (void **)&c->active,
SWIFT_CACHE_ALIGNMENT, sizeBytesI);
e += swift_memalign("gravity_cache", (void **)&c->use_mpole,
SWIFT_CACHE_ALIGNMENT, sizeBytesI);
if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count);
c->count = padded_count;
}
/**
* @brief Zero all the output fields (acceleration and potential) of a
* #gravity_cache.
*
* @param c The #gravity_cache to zero.
* @param gcount_padded The padded size of the cache arrays.
*/
INLINE static void gravity_cache_zero_output(struct gravity_cache *c,
const int gcount_padded) {
#ifdef SWIFT_DEBUG_CHECKS
if (gcount_padded % VEC_SIZE != 0)
error("Padded gcount size not a multiple of the vector length");
#endif
/* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, pot, c->pot, SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE);
/* Zero everything */
bzero(a_x, gcount_padded * sizeof(float));
bzero(a_y, gcount_padded * sizeof(float));
bzero(a_z, gcount_padded * sizeof(float));
bzero(pot, gcount_padded * sizeof(float));
}
/**
* @brief Fills a #gravity_cache structure with some #gpart and shift them.
*
* Also checks whether the #gpart can use a M2P interaction instead of the
* more expensive P2P.
*
* @param max_active_bin The largest active bin in the current time-step.
* @param allow_mpole Are we allowing the use of multipoles?
* @param periodic Are we using periodic BCs ?
* @param dim The size of the simulation volume along each dimension.
* @param c The #gravity_cache to fill.
* @param gparts The #gpart array to read from.
* @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length.
* @param shift A shift to apply to all the particles.
* @param CoM The position of the multipole.
* @param multipole The mulipole to check for.
* @param cell The cell we play with (to get reasonable padding positions).
* @param grav_props The global gravity properties.
*/
INLINE static void gravity_cache_populate(
const timebin_t max_active_bin, const int allow_mpole, const int periodic,
const float dim[3], struct gravity_cache *c,
const struct gpart *restrict gparts, const int gcount,
const int gcount_padded, const double shift[3], const float CoM[3],
const struct gravity_tensors *multipole, const struct cell *cell,
const struct gravity_props *grav_props) {
#ifdef SWIFT_DEBUG_CHECKS
if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
if (gcount_padded % VEC_SIZE != 0)
error("Padded gravity cache size invalid. Not a multiple of SIMD length.");
#endif
/* Do we need to grow the cache? */
if (c->count < gcount_padded) gravity_cache_init(c, gcount_padded + VEC_SIZE);
/* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */
#if !defined(SWIFT_DEBUG_CHECKS) && _OPENMP >= 201307
#pragma omp simd
#endif
for (int i = 0; i < gcount; ++i) {
x[i] = (float)(gparts[i].x[0] - shift[0]);
y[i] = (float)(gparts[i].x[1] - shift[1]);
z[i] = (float)(gparts[i].x[2] - shift[2]);
epsilon[i] = gravity_get_softening(&gparts[i], grav_props);
#ifdef SWIFT_DEBUG_CHECKS
if (gparts[i].time_bin == time_bin_not_created) {
error("Found an extra gpart in the gravity cache");
}
#endif
/* Make a dummy particle out of the inhibted ones */
if (gparts[i].time_bin == time_bin_inhibited) {
m[i] = 0.f;
active[i] = 0;
} else {
m[i] = gparts[i].mass;
active[i] = (int)(gparts[i].time_bin <= max_active_bin);
}
/* Distance to the CoM of the other cell. */
float dx = x[i] - CoM[0];
float dy = y[i] - CoM[1];
float dz = z[i] - CoM[2];
/* Apply periodic BC */
if (periodic) {
dx = nearestf(dx, dim[0]);
dy = nearestf(dy, dim[1]);
dz = nearestf(dz, dim[2]);
}
const float r2 = dx * dx + dy * dy + dz * dz;
/* Check whether we can use the multipole instead of P-P */
use_mpole[i] = allow_mpole && gravity_M2P_accept(grav_props, &gparts[i],
multipole, r2, periodic);
}
#ifdef SWIFT_DEBUG_CHECKS
if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif
/* Particles used for padding should get impossible positions
* that have a reasonable magnitude. We use the cell width for this */
const float pos_padded[3] = {-2.f * (float)cell->width[0],
-2.f * (float)cell->width[1],
-2.f * (float)cell->width[2]};
const float eps_padded = epsilon[0];
/* Pad the caches */
for (int i = gcount; i < gcount_padded; ++i) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
epsilon[i] = eps_padded;
m[i] = 0.f;
active[i] = 0;
use_mpole[i] = 0;
}
/* Zero the output as well */
gravity_cache_zero_output(c, gcount_padded);
}
/**
* @brief Fills a #gravity_cache structure with some #gpart and shift them.
*
* @param max_active_bin The largest active bin in the current time-step.
* @param c The #gravity_cache to fill.
* @param gparts The #gpart array to read from.
* @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length.
* @param shift A shift to apply to all the particles.
* @param cell The cell we play with (to get reasonable padding positions).
* @param grav_props The global gravity properties.
*/
INLINE static void gravity_cache_populate_no_mpole(
const timebin_t max_active_bin, struct gravity_cache *c,
const struct gpart *restrict gparts, const int gcount,
const int gcount_padded, const double shift[3], const struct cell *cell,
const struct gravity_props *grav_props) {
#ifdef SWIFT_DEBUG_CHECKS
if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
if (gcount_padded % VEC_SIZE != 0)
error("Padded gravity cache size invalid. Not a multiple of SIMD length.");
#endif
/* Do we need to grow the cache? */
if (c->count < gcount_padded) gravity_cache_init(c, gcount_padded + VEC_SIZE);
/* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */
for (int i = 0; i < gcount; ++i) {
x[i] = (float)(gparts[i].x[0] - shift[0]);
y[i] = (float)(gparts[i].x[1] - shift[1]);
z[i] = (float)(gparts[i].x[2] - shift[2]);
epsilon[i] = gravity_get_softening(&gparts[i], grav_props);
#ifdef SWIFT_DEBUG_CHECKS
if (gparts[i].time_bin == time_bin_not_created) {
error("Found an extra gpart in the gravity cache");
}
#endif
/* Make a dummy particle out of the inhibted ones */
if (gparts[i].time_bin == time_bin_inhibited) {
m[i] = 0.f;
active[i] = 0;
} else {
m[i] = gparts[i].mass;
active[i] = (int)(gparts[i].time_bin <= max_active_bin);
}
}
#ifdef SWIFT_DEBUG_CHECKS
if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif
/* Particles used for padding should get impossible positions
* that have a reasonable magnitude. We use the cell width for this */
const float pos_padded[3] = {-2.f * (float)cell->width[0],
-2.f * (float)cell->width[1],
-2.f * (float)cell->width[2]};
const float eps_padded = epsilon[0];
/* Pad the caches */
for (int i = gcount; i < gcount_padded; ++i) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
epsilon[i] = eps_padded;
m[i] = 0.f;
active[i] = 0;
}
/* Zero the output as well */
gravity_cache_zero_output(c, gcount_padded);
}
/**
* @brief Fills a #gravity_cache structure with some #gpart and make them use
* the multi-pole.
*
* @param max_active_bin The largest active bin in the current time-step.
* @param periodic Are we using periodic BCs ?
* @param dim The size of the simulation volume along each dimension.
* @param c The #gravity_cache to fill.
* @param gparts The #gpart array to read from.
* @param gcount The number of particles to read.
* @param gcount_padded The number of particle to read padded to the next
* multiple of the vector length.
* @param cell The cell we play with (to get reasonable padding positions).
* @param CoM The position of the multipole.
* @param multipole The mulipole to check for.
* @param grav_props The global gravity properties.
*/
INLINE static void gravity_cache_populate_all_mpole(
const timebin_t max_active_bin, const int periodic, const float dim[3],
struct gravity_cache *c, const struct gpart *restrict gparts,
const int gcount, const int gcount_padded, const struct cell *cell,
const float CoM[3], const struct gravity_tensors *multipole,
const struct gravity_props *grav_props) {
#ifdef SWIFT_DEBUG_CHECKS
if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
if (gcount_padded % VEC_SIZE != 0)
error("Padded gravity cache size invalid. Not a multiple of SIMD length.");
#endif
/* Do we need to grow the cache? */
if (c->count < gcount_padded) gravity_cache_init(c, gcount_padded + VEC_SIZE);
/* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
SWIFT_CACHE_ALIGNMENT);
swift_assume_size(gcount_padded, VEC_SIZE);
/* Fill the input caches */
for (int i = 0; i < gcount; ++i) {
x[i] = (float)(gparts[i].x[0]);
y[i] = (float)(gparts[i].x[1]);
z[i] = (float)(gparts[i].x[2]);
epsilon[i] = gravity_get_softening(&gparts[i], grav_props);
m[i] = gparts[i].mass;
active[i] = (int)(gparts[i].time_bin <= max_active_bin);
use_mpole[i] = 1;
#ifdef SWIFT_DEBUG_CHECKS
/* Distance to the CoM of the other cell. */
float dx = x[i] - CoM[0];
float dy = y[i] - CoM[1];
float dz = z[i] - CoM[2];
/* Apply periodic BC */
if (periodic) {
dx = nearestf(dx, dim[0]);
dy = nearestf(dy, dim[1]);
dz = nearestf(dz, dim[2]);
}
const float r2 = dx * dx + dy * dy + dz * dz;
if (!gravity_M2P_accept(grav_props, &gparts[i], multipole, r2, periodic))
error("Using m-pole where the test fails");
#endif
}
#ifdef SWIFT_DEBUG_CHECKS
if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif
/* Particles used for padding should get impossible positions
* that have a reasonable magnitude. We use the cell width for this */
const float pos_padded[3] = {-2.f * (float)cell->width[0],
-2.f * (float)cell->width[1],
-2.f * (float)cell->width[2]};
const float eps_padded = epsilon[0];
/* Pad the caches */
for (int i = gcount; i < gcount_padded; ++i) {
x[i] = pos_padded[0];
y[i] = pos_padded[1];
z[i] = pos_padded[2];
epsilon[i] = eps_padded;
m[i] = 0.f;
active[i] = 0;
use_mpole[i] = 0;
}
/* Zero the output as well */
gravity_cache_zero_output(c, gcount_padded);
}
/**
* @brief Write the output cache values back to the active #gpart.
*
* This function obviously omits the padded values in the cache.
*
* @param c The #gravity_cache to read from.
* @param gparts The #gpart array to write to.
* @param gcount The number of particles to write.
*/
INLINE static void gravity_cache_write_back(const struct gravity_cache *c,
struct gpart *restrict gparts,
const int gcount) {
/* Make the compiler understand we are in happy vectorization land */
swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, pot, c->pot, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
/* Write stuff back to the particles */
#if !defined(SWIFT_DEBUG_CHECKS) && _OPENMP >= 201307
#pragma omp simd
#endif
for (int i = 0; i < gcount; ++i) {
if (active[i]) {
gparts[i].a_grav[0] += a_x[i];
gparts[i].a_grav[1] += a_y[i];
gparts[i].a_grav[2] += a_z[i];
gravity_add_comoving_potential(&gparts[i], pot[i]);
}
}
}
#endif /* SWIFT_GRAVITY_CACHE_H */