Commit 4baf1331 authored by James Willis's avatar James Willis
Browse files

Merge branch 'master' into debug_interactions

parents 2e47a130 602118e3
......@@ -776,7 +776,11 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
/* Self-interaction? */
if (l->t->type == task_type_self)
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
runner_doself_subset_density_vec(r, finger, parts, pid, count);
#else
runner_doself_subset_density(r, finger, parts, pid, count);
#endif
/* Otherwise, pair interaction? */
else if (l->t->type == task_type_pair) {
......
......@@ -2424,8 +2424,11 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
/* Otherwise, compute self-interaction. */
else
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
runner_doself_subset_density_vec(r, ci, parts, ind, count);
#else
DOSELF_SUBSET(r, ci, parts, ind, count);
#endif
} /* self-interaction. */
/* Otherwise, it's a pair interaction. */
......
......@@ -528,8 +528,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
struct runner *r, struct cell *restrict c) {
#ifdef WITH_VECTORIZATION
const int num_vec_proc = NUM_VEC_PROC;
/* Get some local variables */
const struct engine *e = r->e;
const timebin_t max_active_bin = e->max_active_bin;
......@@ -567,8 +565,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
/* Is the ith particle active? */
if (!part_is_active_no_debug(pi, max_active_bin)) continue;
vector v_r2;
const float hi = cell_cache->h[pid];
/* Fill particle pi vectors. */
......@@ -598,9 +594,9 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
/* Pad cache if there is a serial remainder. */
int count_align = count;
const int rem = count % (num_vec_proc * VEC_SIZE);
const int rem = count % (NUM_VEC_PROC * VEC_SIZE);
if (rem != 0) {
count_align += (num_vec_proc * VEC_SIZE) - rem;
count_align += (NUM_VEC_PROC * VEC_SIZE) - rem;
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
......@@ -613,7 +609,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) {
for (int pjd = 0; pjd < count_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
/* Load 2 sets of vectors from the particle cache. */
const vector v_pjx = vector_load(&cell_cache->x[pjd]);
......@@ -625,7 +621,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
const vector v_pjz2 = vector_load(&cell_cache->z[pjd + VEC_SIZE]);
/* Compute the pairwise distance. */
vector v_dx, v_dy, v_dz;
vector v_dx, v_dy, v_dz, v_r2;
vector v_dx_2, v_dy_2, v_dz_2, v_r2_2;
v_dx.v = vec_sub(v_pix.v, v_pjx.v);
......@@ -704,7 +700,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
vec_init_mask_true(int_mask2);
/* Perform interaction with 2 vectors. */
for (int pjd = 0; pjd < icount_align; pjd += (num_vec_proc * VEC_SIZE)) {
for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
runner_iact_nonsym_2_vec_density(
&int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
&int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
......@@ -733,6 +729,207 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
#endif /* WITH_VECTORIZATION */
}
/**
* @brief Compute the interactions between a cell pair, but only for the
* given indices in ci. (Vectorised)
*
* @param r The #runner.
* @param c The first #cell.
* @param parts The #part to interact.
* @param ind The list of indices of particles in @c c to interact with.
* @param pi_count The number of particles in @c ind.
*/
__attribute__((always_inline)) INLINE void runner_doself_subset_density_vec(
struct runner *r, struct cell *restrict c, struct part *restrict parts,
int *restrict ind, int pi_count) {
#ifdef WITH_VECTORIZATION
const int count = c->count;
TIMER_TIC
/* Get the particle cache from the runner and re-allocate
* the cache if it is not big enough for the cell. */
struct cache *restrict cell_cache = &r->ci_cache;
if (cell_cache->count < count) {
cache_init(cell_cache, count);
}
/* Read the particles from the cell and store them locally in the cache. */
cache_read_particles(c, cell_cache);
/* Create secondary cache to store particle interactions. */
struct c2_cache int_cache;
int icount = 0, icount_align = 0;
/* Loop over the subset of particles in the parts that need updating. */
for (int pid = 0; pid < pi_count; pid++) {
/* Get a pointer to the ith particle. */
struct part *pi = &parts[ind[pid]];
#ifdef SWIFT_DEBUG_CHECKS
const struct engine *e = r->e;
if (!part_is_active(pi, e)) error("Inactive particle in subset function!");
#endif
const float hi = pi->h;
/* Fill particle pi vectors. */
const vector v_pix = vector_set1(pi->x[0] - c->loc[0]);
const vector v_piy = vector_set1(pi->x[1] - c->loc[1]);
const vector v_piz = vector_set1(pi->x[2] - c->loc[2]);
const vector v_hi = vector_set1(hi);
const vector v_vix = vector_set1(pi->v[0]);
const vector v_viy = vector_set1(pi->v[1]);
const vector v_viz = vector_set1(pi->v[2]);
const float hig2 = hi * hi * kernel_gamma2;
const vector v_hig2 = vector_set1(hig2);
/* Get the inverse of hi. */
vector v_hi_inv = vec_reciprocal(v_hi);
/* Reset cumulative sums of update vectors. */
vector v_rhoSum = vector_setzero();
vector v_rho_dhSum = vector_setzero();
vector v_wcountSum = vector_setzero();
vector v_wcount_dhSum = vector_setzero();
vector v_div_vSum = vector_setzero();
vector v_curlvxSum = vector_setzero();
vector v_curlvySum = vector_setzero();
vector v_curlvzSum = vector_setzero();
/* Pad cache if there is a serial remainder. */
int count_align = count;
const int rem = count % (NUM_VEC_PROC * VEC_SIZE);
if (rem != 0) {
const int pad = (NUM_VEC_PROC * VEC_SIZE) - rem;
count_align += pad;
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for (int i = count; i < count_align; i++) {
cell_cache->x[i] = v_pix.f[0];
cell_cache->y[i] = v_piy.f[0];
cell_cache->z[i] = v_piz.f[0];
}
}
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
for (int pjd = 0; pjd < count_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
/* Load 2 sets of vectors from the particle cache. */
const vector v_pjx = vector_load(&cell_cache->x[pjd]);
const vector v_pjy = vector_load(&cell_cache->y[pjd]);
const vector v_pjz = vector_load(&cell_cache->z[pjd]);
const vector v_pjx2 = vector_load(&cell_cache->x[pjd + VEC_SIZE]);
const vector v_pjy2 = vector_load(&cell_cache->y[pjd + VEC_SIZE]);
const vector v_pjz2 = vector_load(&cell_cache->z[pjd + VEC_SIZE]);
/* Compute the pairwise distance. */
vector v_dx, v_dy, v_dz, v_r2;
vector v_dx_2, v_dy_2, v_dz_2, v_r2_2;
/* p_i - p_j */
v_dx.v = vec_sub(v_pix.v, v_pjx.v);
v_dx_2.v = vec_sub(v_pix.v, v_pjx2.v);
v_dy.v = vec_sub(v_piy.v, v_pjy.v);
v_dy_2.v = vec_sub(v_piy.v, v_pjy2.v);
v_dz.v = vec_sub(v_piz.v, v_pjz.v);
v_dz_2.v = vec_sub(v_piz.v, v_pjz2.v);
/* r2 = dx^2 + dy^2 + dz^2 */
v_r2.v = vec_mul(v_dx.v, v_dx.v);
v_r2_2.v = vec_mul(v_dx_2.v, v_dx_2.v);
v_r2.v = vec_fma(v_dy.v, v_dy.v, v_r2.v);
v_r2_2.v = vec_fma(v_dy_2.v, v_dy_2.v, v_r2_2.v);
v_r2.v = vec_fma(v_dz.v, v_dz.v, v_r2.v);
v_r2_2.v = vec_fma(v_dz_2.v, v_dz_2.v, v_r2_2.v);
/* Form a mask from r2 < hig2 and r2 > 0.*/
mask_t v_doi_mask, v_doi_mask_self_check, v_doi_mask2,
v_doi_mask2_self_check;
/* Form r2 > 0 mask and r2 < hig2 mask. */
vec_create_mask(v_doi_mask_self_check, vec_cmp_gt(v_r2.v, vec_setzero()));
vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_hig2.v));
/* Form r2 > 0 mask and r2 < hig2 mask. */
vec_create_mask(v_doi_mask2_self_check,
vec_cmp_gt(v_r2_2.v, vec_setzero()));
vec_create_mask(v_doi_mask2, vec_cmp_lt(v_r2_2.v, v_hig2.v));
/* Combine two masks and form integer masks. */
const int doi_mask = vec_is_mask_true(v_doi_mask) &
vec_is_mask_true(v_doi_mask_self_check);
const int doi_mask2 = vec_is_mask_true(v_doi_mask2) &
vec_is_mask_true(v_doi_mask2_self_check);
/* If there are any interactions left pack interaction values into c2
* cache. */
if (doi_mask) {
storeInteractions(doi_mask, pjd, &v_r2, &v_dx, &v_dy, &v_dz, cell_cache,
&int_cache, &icount, &v_rhoSum, &v_rho_dhSum,
&v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
&v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
v_vix, v_viy, v_viz);
}
if (doi_mask2) {
storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_2, &v_dy_2,
&v_dz_2, cell_cache, &int_cache, &icount, &v_rhoSum,
&v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
&v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
v_hi_inv, v_vix, v_viy, v_viz);
}
}
/* Perform padded vector remainder interactions if any are present. */
calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum,
&v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
&v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
v_vix, v_viy, v_viz, &icount_align);
/* Initialise masks to true in case remainder interactions have been
* performed. */
mask_t int_mask, int_mask2;
vec_init_mask_true(int_mask);
vec_init_mask_true(int_mask2);
/* Perform interaction with 2 vectors. */
for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) {
runner_iact_nonsym_2_vec_density(
&int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
&int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
&int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
&int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
&v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
&v_curlvzSum, int_mask, int_mask2, 0);
}
/* Perform horizontal adds on vector sums and store result in particle pi.
*/
VEC_HADD(v_rhoSum, pi->rho);
VEC_HADD(v_rho_dhSum, pi->density.rho_dh);
VEC_HADD(v_wcountSum, pi->density.wcount);
VEC_HADD(v_wcount_dhSum, pi->density.wcount_dh);
VEC_HADD(v_div_vSum, pi->density.div_v);
VEC_HADD(v_curlvxSum, pi->density.rot_v[0]);
VEC_HADD(v_curlvySum, pi->density.rot_v[1]);
VEC_HADD(v_curlvzSum, pi->density.rot_v[2]);
/* Reset interaction count. */
icount = 0;
} /* loop over all particles. */
TIMER_TOC(timer_doself_subset);
#endif /* WITH_VECTORIZATION */
}
/**
* @brief Compute the force cell self-interaction (non-symmetric) using vector
* intrinsics with one particle pi at a time.
......@@ -747,7 +944,6 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
const struct engine *e = r->e;
struct part *restrict pi;
int count_align;
const int num_vec_proc = 1;
const timebin_t max_active_bin = e->max_active_bin;
......@@ -822,9 +1018,9 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
/* Pad cache if there is a serial remainder. */
count_align = count;
int rem = count % (num_vec_proc * VEC_SIZE);
int rem = count % VEC_SIZE;
if (rem != 0) {
int pad = (num_vec_proc * VEC_SIZE) - rem;
int pad = VEC_SIZE - rem;
count_align += pad;
......@@ -845,7 +1041,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) {
for (int pjd = 0; pjd < count_align; pjd += VEC_SIZE) {
/* Load 1 set of vectors from the particle cache. */
vector hjg2;
......
......@@ -34,6 +34,10 @@
#include "vector.h"
/* Function prototypes. */
void runner_doself_subset_density_vec(struct runner *r,
struct cell *restrict ci,
struct part *restrict parts,
int *restrict ind, int count);
void runner_doself1_density_vec(struct runner *r, struct cell *restrict c);
void runner_doself2_force_vec(struct runner *r, struct cell *restrict c);
void runner_dopair1_density_vec(struct runner *r, struct cell *restrict ci,
......
......@@ -30,7 +30,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr
# List of test programs to compile
check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
testSPHStep testActivePair test27cells test125cells testParser \
testSPHStep testActivePair test27cells test27cells_subset test125cells testParser \
testKernel testFFT testInteractions testMaths \
testSymmetry testThreadpool \
testAdiabaticIndex testRiemannExact testRiemannTRRS \
......@@ -60,6 +60,10 @@ testActivePair_SOURCES = testActivePair.c
test27cells_SOURCES = test27cells.c
test27cells_subset_SOURCES = test27cells.c
test27cells_subset_CFLAGS = $(AM_CFLAGS) -DDOSELF_SUBSET
testPeriodicBC_SOURCES = testPeriodicBC.c
test125cells_SOURCES = test125cells.c
......
......@@ -32,15 +32,25 @@
#if defined(WITH_VECTORIZATION)
#define DOSELF1 runner_doself1_density_vec
#define DOSELF1_SUBSET runner_doself_subset_density_vec
#define DOPAIR1 runner_dopair1_branch_density
#define DOSELF1_NAME "runner_doself1_density_vec"
#ifdef DOSELF_SUBSET
#define DOSELF1_NAME "runner_doself_subset_density_vec"
#else
#define DOSELF1_NAME "runner_doself_density_vec"
#endif
#define DOPAIR1_NAME "runner_dopair1_density_vec"
#endif
#ifndef DOSELF1
#define DOSELF1 runner_doself1_density
#define DOSELF1_SUBSET runner_doself_subset_density
#ifdef DOSELF_SUBSET
#define DOSELF1_NAME "runner_doself1_subset_density"
#else
#define DOSELF1_NAME "runner_doself1_density"
#endif
#endif
#ifndef DOPAIR1
#define DOPAIR1 runner_dopair1_branch_density
......@@ -300,6 +310,13 @@ void runner_doself1_density(struct runner *r, struct cell *ci);
void runner_doself1_density_vec(struct runner *r, struct cell *ci);
void runner_dopair1_branch_density(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_doself_subset_density(struct runner *r, struct cell *restrict ci,
struct part *restrict parts,
int *restrict ind, int count);
void runner_doself_subset_density_vec(struct runner *r,
struct cell *restrict ci,
struct part *restrict parts,
int *restrict ind, int count);
/* And go... */
int main(int argc, char *argv[]) {
......@@ -464,19 +481,32 @@ int main(int argc, char *argv[]) {
DOPAIR1(&runner, main_cell, cells[j]);
const ticks sub_toc = getticks();
timings[j] += sub_toc - sub_tic;
timings[j] += getticks() - sub_tic;
}
}
#ifdef DOSELF_SUBSET
int *pid = NULL;
int count = 0;
if ((pid = malloc(sizeof(int) * main_cell->count)) == NULL)
error("Can't allocate memory for pid.");
for (int k = 0; k < main_cell->count; k++)
if (part_is_active(&main_cell->parts[k], &engine)) {
pid[count] = k;
++count;
}
#endif
/* And now the self-interaction */
const ticks self_tic = getticks();
#ifdef DOSELF_SUBSET
DOSELF1_SUBSET(&runner, main_cell, main_cell->parts, pid, count);
#else
DOSELF1(&runner, main_cell);
#endif
const ticks self_toc = getticks();
timings[13] += self_toc - self_tic;
timings[13] += getticks() - self_tic;
#endif
......
#!/bin/bash
# Test for particles with the same smoothing length
for v in {0..3}
# List each test that should be run
declare -a TEST_LIST=(test27cells test27cells_subset)
# Run same test for each executable
for TEST in "${TEST_LIST[@]}"
do
# Test for particles with the same smoothing length
for v in {0..3}
do
echo ""
rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat
echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v"
./test27cells -n 6 -r 1 -d 0 -f standard -v $v
echo "Running ./$TEST -n 6 -r 1 -d 0 -f standard -v $v"
./$TEST -n 6 -r 1 -d 0 -f standard -v $v
if [ -e brute_force_27_standard.dat ]
then
if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_normal.dat 6
then
echo "Accuracy test passed"
else
echo "Accuracy test failed"
exit 1
fi
if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_normal.dat 6
then
echo "Accuracy test passed"
else
echo "Accuracy test failed"
exit 1
fi
else
echo "Error Missing test output file"
exit 1
echo "Error Missing test output file"
exit 1
fi
echo "------------"
done
# Test for particles with random smoothing lengths
for v in {0..3}
do
done
# Test for particles with random smoothing lengths
for v in {0..3}
do
echo ""
rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat
echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -p 1.1"
./test27cells -n 6 -r 1 -d 0 -f standard -v $v -p 1.1
echo "Running ./$TEST -n 6 -r 1 -d 0 -f standard -v $v -p 1.1"
./$TEST -n 6 -r 1 -d 0 -f standard -v $v -p 1.1
if [ -e brute_force_27_standard.dat ]
then
if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_perturbed_h.dat 6
then
echo "Accuracy test passed"
else
echo "Accuracy test failed"
exit 1
fi
if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_perturbed_h.dat 6
then
echo "Accuracy test passed"
else
echo "Accuracy test failed"
exit 1
fi
else
echo "Error Missing test output file"
exit 1
echo "Error Missing test output file"
exit 1
fi
echo "------------"
done
# Test for particles with random smoothing lengths
for v in {0..3}
do
done
# Test for particles with random smoothing lengths
for v in {0..3}
do
echo ""
rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat
echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -p 1.3"
./test27cells -n 6 -r 1 -d 0 -f standard -v $v -p 1.3
echo "Running ./$TEST -n 6 -r 1 -d 0 -f standard -v $v -p 1.3"
./$TEST -n 6 -r 1 -d 0 -f standard -v $v -p 1.3
if [ -e brute_force_27_standard.dat ]
then
if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_perturbed_h2.dat 6
then
echo "Accuracy test passed"
else
echo "Accuracy test failed"
exit 1
fi
if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_perturbed_h2.dat 6
then
echo "Accuracy test passed"
else
echo "Accuracy test failed"
exit 1
fi
else
echo "Error Missing test output file"
exit 1
echo "Error Missing test output file"
exit 1
fi
echo "------------"
done
done
exit $?
# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 1e-4 5e-4 1.5e-2 1.4e-5 3e-6 3e-6 1e-5
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-6 2.5e-2 1e-5 5.86e-3 4.96e-4 3e-3 3.7e-3 3e-3
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-6 2.5e-2 1e-5 5.86e-3 4.96e-4 3e-3 4.5e-3 3e-3
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e0 1e-6 4e-6 4e-6 4e-6
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment