Skip to content
Snippets Groups Projects
Commit b2d2bf64 authored by James Willis's avatar James Willis
Browse files

Added test that benchmarks the interaction functions against their vectorised counterparts.

parent 3264ba07
Branches
Tags
1 merge request!306Added test that benchmarks the interaction functions against their vectorised counterparts.
......@@ -31,7 +31,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr
check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
testSPHStep testPair test27cells test125cells testParser \
testKernel testKernelGrav testFFT testInteractions testMaths \
testSymmetry testThreadpool \
testSymmetry testThreadpool benchmarkInteractions \
testAdiabaticIndex testRiemannExact testRiemannTRRS \
testRiemannHLLC testMatrixInversion testDump testLogger
......@@ -66,6 +66,8 @@ testFFT_SOURCES = testFFT.c
testInteractions_SOURCES = testInteractions.c
benchmarkInteractions_SOURCES = benchmarkInteractions.c
testAdiabaticIndex_SOURCES = testAdiabaticIndex.c
testRiemannExact_SOURCES = testRiemannExact.c
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#include <fenv.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "swift.h"
#define array_align sizeof(float) * VEC_SIZE
#define ACC_THRESHOLD 1e-5
#ifdef NONSYM_DENSITY
#define IACT runner_iact_nonsym_density
#define IACT_VEC runner_iact_nonsym_2_vec_density
#define IACT_NAME "test_nonsym_density"
#endif
#ifdef SYM_DENSITY
#define IACT runner_iact_density
#define IACT_VEC runner_iact_vec_density
#define IACT_NAME "test_sym_density"
#endif
#ifdef NONSYM_FORCE
#define IACT runner_iact_nonsym_force
#define IACT_VEC runner_iact_nonsym_vec_force
#define IACT_NAME "test_nonsym_force"
#endif
#ifdef SYM_FORCE
#define IACT runner_iact_force
#define IACT_VEC runner_iact_vec_force
#define IACT_NAME "test_sym_force"
#endif
#ifndef IACT
#define IACT runner_iact_nonsym_density
#define IACT_VEC runner_iact_nonsym_2_vec_density
#define IACT_NAME "test_nonsym_density"
#endif
/**
* @brief Constructs an array of particles in a valid state prior to
* a IACT_NONSYM and IACT_NONSYM_VEC call.
*
* @param count No. of particles to create
* @param offset The position of the particle offset from (0,0,0).
* @param spacing Particle spacing.
* @param h The smoothing length of the particles in units of the inter-particle
*separation.
* @param partId The running counter of IDs.
*/
struct part *make_particles(int count, double *offset, double spacing, double h,
long long *partId) {
struct part *particles;
if (posix_memalign((void **)&particles, part_align,
count * sizeof(struct part)) != 0) {
error("couldn't allocate particles, no. of particles: %d", (int)count);
}
bzero(particles, count * sizeof(struct part));
/* Construct the particles */
struct part *p;
/* Set test particle at centre of unit sphere. */
p = &particles[0];
/* Place the test particle at the centre of a unit sphere. */
p->x[0] = 0.0f;
p->x[1] = 0.0f;
p->x[2] = 0.0f;
p->h = h;
p->id = ++(*partId);
p->mass = 1.0f;
/* Place rest of particles around the test particle
* with random position within a unit sphere. */
for (size_t i = 1; i < count; ++i) {
p = &particles[i];
/* Randomise positions within a unit sphere. */
p->x[0] = random_uniform(-1.0, 1.0);
p->x[1] = random_uniform(-1.0, 1.0);
p->x[2] = random_uniform(-1.0, 1.0);
/* Randomise velocities. */
p->v[0] = random_uniform(-0.05, 0.05);
p->v[1] = random_uniform(-0.05, 0.05);
p->v[2] = random_uniform(-0.05, 0.05);
p->h = h;
p->id = ++(*partId);
p->mass = 1.0f;
}
return particles;
}
/**
* @brief Populates particle properties needed for the force calculation.
*/
void prepare_force(struct part *parts, int count) {
struct part *p;
for (size_t i = 0; i < count; ++i) {
p = &parts[i];
p->rho = i + 1;
p->force.balsara = random_uniform(0.0, 1.0);
p->force.P_over_rho2 = i + 1;
p->force.soundspeed = random_uniform(2.0, 3.0);
p->force.v_sig = 0.0f;
p->force.h_dt = 0.0f;
}
}
/**
* @brief Dumps all particle information to a file
*/
void dump_indv_particle_fields(char *fileName, struct part *p) {
FILE *file = fopen(fileName, "a");
fprintf(file,
"%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e "
"%13e %13e %13e %13e "
"%13e %13e %13e\n",
p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2],
p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho,
p->density.rho_dh, p->density.wcount, p->density.wcount_dh,
p->force.h_dt, p->force.v_sig,
#if defined(MINIMAL_SPH)
0., 0., 0., 0.
#else
p->density.div_v, p->density.rot_v[0], p->density.rot_v[1],
p->density.rot_v[2]
#endif
);
fclose(file);
}
/**
* @brief Creates a header for the output file
*/
void write_header(char *fileName) {
FILE *file = fopen(fileName, "w");
/* Write header */
fprintf(file,
"# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s "
"%13s %13s %13s %13s"
"%13s %13s %13s %13s\n",
"ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y",
"a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig",
"div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt");
fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n");
fclose(file);
}
/**
* @brief Compares the vectorised result against
* the serial result of the interaction.
*
* @param serial_test_part Particle that has been updated serially
* @param serial_parts Particle array that has been interacted serially
* @param vec_test_part Particle that has been updated using vectors
* @param vec_parts Particle array to be interacted using vectors
* @param count No. of particles that have been interacted
*
* @return Non-zero value if difference found, 0 otherwise
*/
int check_results(struct part serial_test_part, struct part *serial_parts,
struct part vec_test_part, struct part *vec_parts,
int count) {
int result = 0;
result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD);
for (int i = 0; i < count; i++)
result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD);
return result;
}
/*
* @brief Calls the serial and vectorised version of the non-symmetrical density
* interaction.
*
* @param test_part Particle that will be updated
* @param parts Particle array to be interacted
* @param count No. of particles to be interacted
* @param serial_inter_func Serial interaction function to be called
* @param vec_inter_func Vectorised interaction function to be called
* @param runs No. of times to call interactions
*
*/
void test_interactions(struct part test_part, struct part *parts, int count,
char *filePrefix, int runs) {
ticks serial_time = 0;
#ifdef WITH_VECTORIZATION
ticks vec_time = 0;
#endif
FILE *file;
char serial_filename[200] = "";
char vec_filename[200] = "";
strcpy(serial_filename, filePrefix);
strcpy(vec_filename, filePrefix);
sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
sprintf(vec_filename + strlen(vec_filename), "_vec.dat");
write_header(serial_filename);
write_header(vec_filename);
struct part pi_serial, pi_vec;
struct part pj_serial[count], pi_list_vec[count], pj_vec[count];
float r2[count] __attribute__((aligned(array_align)));
float dx[3 * count] __attribute__((aligned(array_align)));
float r2q[count] __attribute__((aligned(array_align)));
float hiq[count] __attribute__((aligned(array_align)));
float hjq[count] __attribute__((aligned(array_align)));
float dxq[count] __attribute__((aligned(array_align)));
struct part *piq[count], *pjq[count];
float dyq[count] __attribute__((aligned(array_align)));
float dzq[count] __attribute__((aligned(array_align)));
float miq[count] __attribute__((aligned(array_align)));
float mjq[count] __attribute__((aligned(array_align)));
float vixq[count] __attribute__((aligned(array_align)));
float viyq[count] __attribute__((aligned(array_align)));
float vizq[count] __attribute__((aligned(array_align)));
float vjxq[count] __attribute__((aligned(array_align)));
float vjyq[count] __attribute__((aligned(array_align)));
float vjzq[count] __attribute__((aligned(array_align)));
/* Call serial interaction a set number of times. */
for (int k = 0; k < runs; k++) {
/* Reset particle to initial setup */
pi_serial = test_part;
for (int i = 0; i < count; i++) pj_serial[i] = parts[i];
/* Only dump data on first run. */
if (k == 0) {
/* Dump state of particles before serial interaction. */
dump_indv_particle_fields(serial_filename, &pi_serial);
for (size_t i = 0; i < count; i++)
dump_indv_particle_fields(serial_filename, &pj_serial[i]);
}
/* Perform serial interaction */
for (int i = 0; i < count; i++) {
/* Compute the pairwise distance. */
r2[i] = 0.0f;
for (int k = 0; k < 3; k++) {
int ind = (3 * i) + k;
dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
r2[i] += dx[ind] * dx[ind];
}
}
const ticks tic = getticks();
/* Perform serial interaction */
#pragma novector
for (int i = 0; i < count; i++) {
IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial,
&pj_serial[i]);
}
serial_time += getticks() - tic;
}
file = fopen(serial_filename, "a");
fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
fclose(file);
/* Dump result of serial interaction. */
dump_indv_particle_fields(serial_filename, &pi_serial);
for (size_t i = 0; i < count; i++)
dump_indv_particle_fields(serial_filename, &pj_serial[i]);
/* Call vector interaction a set number of times. */
for (int k = 0; k < runs; k++) {
/* Reset particle to initial setup */
pi_vec = test_part;
for (int i = 0; i < count; i++) pj_vec[i] = parts[i];
for (int i = 0; i < count; i++) pi_list_vec[i] = test_part;
/* Setup arrays for vector interaction. */
for (int i = 0; i < count; i++) {
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
r2 += dx[k] * dx[k];
}
r2q[i] = r2;
dxq[i] = dx[0];
hiq[i] = pi_vec.h;
hjq[i] = pj_vec[i].h;
piq[i] = &pi_vec;
pjq[i] = &pj_vec[i];
dyq[i] = dx[1];
dzq[i] = dx[2];
miq[i] = pi_vec.mass;
mjq[i] = pj_vec[i].mass;
vixq[i] = pi_vec.v[0];
viyq[i] = pi_vec.v[1];
vizq[i] = pi_vec.v[2];
vjxq[i] = pj_vec[i].v[0];
vjyq[i] = pj_vec[i].v[1];
vjzq[i] = pj_vec[i].v[2];
}
/* Only dump data on first run. */
if (k == 0) {
/* Dump state of particles before vector interaction. */
dump_indv_particle_fields(vec_filename, piq[0]);
for (size_t i = 0; i < count; i++)
dump_indv_particle_fields(vec_filename, pjq[i]);
}
/* Perform vector interaction. */
#ifdef WITH_VECTORIZATION
vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, mask, mask2;
vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
curlvySum, curlvzSum;
rhoSum.v = vec_set1(0.f);
rho_dhSum.v = vec_set1(0.f);
wcountSum.v = vec_set1(0.f);
wcount_dhSum.v = vec_set1(0.f);
div_vSum.v = vec_set1(0.f);
curlvxSum.v = vec_set1(0.f);
curlvySum.v = vec_set1(0.f);
curlvzSum.v = vec_set1(0.f);
hi_vec.v = vec_load(&hiq[0]);
vix_vec.v = vec_load(&vixq[0]);
viy_vec.v = vec_load(&viyq[0]);
viz_vec.v = vec_load(&vizq[0]);
hi_inv_vec = vec_reciprocal(hi_vec);
mask.m = vec_setint1(0xFFFFFFFF);
mask2.m = vec_setint1(0xFFFFFFFF);
#ifdef HAVE_AVX512_F
KNL_MASK_16 knl_mask, knl_mask2;
knl_mask = 0xFFFF;
knl_mask2 = 0xFFFF;
#endif
const ticks vec_tic = getticks();
for (int i = 0; i < count; i += 2 * VEC_SIZE) {
IACT_VEC(&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec),
(vix_vec), (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]),
&(vjzq[i]), &(mjq[i]), &rhoSum, &rho_dhSum, &wcountSum,
&wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, &curlvzSum,
mask, mask2,
#ifdef HAVE_AVX512_F
knl_mask, knl_mask2);
#else
0, 0);
#endif
}
VEC_HADD(rhoSum, piq[0]->rho);
VEC_HADD(rho_dhSum, piq[0]->density.rho_dh);
VEC_HADD(wcountSum, piq[0]->density.wcount);
VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh);
VEC_HADD(div_vSum, piq[0]->density.div_v);
VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]);
VEC_HADD(curlvySum, piq[0]->density.rot_v[1]);
VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]);
vec_time += getticks() - vec_tic;
#endif
}
file = fopen(vec_filename, "a");
fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
fclose(file);
/* Dump result of serial interaction. */
dump_indv_particle_fields(vec_filename, piq[0]);
for (size_t i = 0; i < count; i++)
dump_indv_particle_fields(vec_filename, pjq[i]);
#ifdef WITH_VECTORIZATION
/* Check serial results against the vectorised results. */
if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
message("Differences found...");
#endif
message("The serial interactions took : %15lli ticks.",
serial_time / runs);
#ifdef WITH_VECTORIZATION
message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
message("Speed up: %15fx.", (double)(serial_time) / vec_time);
#endif
}
/* And go... */
int main(int argc, char *argv[]) {
size_t runs = 10000;
double h = 1.0, spacing = 0.5;
double offset[3] = {0.0, 0.0, 0.0};
int count = 256;
/* Get some randomness going */
srand(0);
char c;
while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) {
switch (c) {
case 'h':
sscanf(optarg, "%lf", &h);
break;
case 's':
sscanf(optarg, "%lf", &spacing);
case 'n':
sscanf(optarg, "%d", &count);
break;
case 'r':
sscanf(optarg, "%zu", &runs);
break;
case '?':
error("Unknown option.");
break;
}
}
if (h < 0 || spacing < 0) {
printf(
"\nUsage: %s [OPTIONS...]\n"
"\nGenerates a particle array with equal particle separation."
"\nThese are then interacted using runner_iact_density and "
"runner_iact_vec_density."
"\n\nOptions:"
"\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
"\n-s SPACING=0.5 - Spacing between particles"
"\n-n NUMBER=9 - No. of particles",
argv[0]);
exit(1);
}
/* Correct count so that VEC_SIZE of particles interact with the test
* particle. */
count = count - (count % VEC_SIZE) + 1;
/* Build the infrastructure */
static long long partId = 0;
struct part test_particle;
struct part *particles = make_particles(count, offset, spacing, h, &partId);
#if defined(NONSYM_FORCE) || defined(SYM_FORCE)
prepare_force(particles, count);
#endif
test_particle = particles[0];
/* Call the non-sym density test. */
message("Testing %s interaction...", IACT_NAME);
test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment