diff --git a/.gitignore b/.gitignore index 596f144ed1e790d03510c5bdec851adb3fcecc3c..ba0028d163dae64143b0c43b465152e357d042b0 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,7 @@ tests/testRiemannHLLC tests/testMatrixInversion tests/testDump tests/testLogger +tests/benchmarkInteractions theory/latex/swift.pdf theory/SPH/Kernels/kernels.pdf diff --git a/tests/Makefile.am b/tests/Makefile.am index 966df5af7dd11057fb79cf7cd5b155f713752b1c..0db5c2544433012dcd7f451f535391aa81b1f802 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -31,7 +31,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testSPHStep testPair test27cells test125cells testParser \ testKernel testKernelGrav testFFT testInteractions testMaths \ - testSymmetry testThreadpool \ + testSymmetry testThreadpool benchmarkInteractions \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ testRiemannHLLC testMatrixInversion testDump testLogger @@ -66,6 +66,8 @@ testFFT_SOURCES = testFFT.c testInteractions_SOURCES = testInteractions.c +benchmarkInteractions_SOURCES = benchmarkInteractions.c + testAdiabaticIndex_SOURCES = testAdiabaticIndex.c testRiemannExact_SOURCES = testRiemannExact.c diff --git a/tests/benchmarkInteractions.c b/tests/benchmarkInteractions.c new file mode 100644 index 0000000000000000000000000000000000000000..5a6ef60659115f538fc0bd8c6666ab08528f5dbd --- /dev/null +++ b/tests/benchmarkInteractions.c @@ -0,0 +1,488 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "swift.h" + +#define array_align sizeof(float) * VEC_SIZE +#define ACC_THRESHOLD 1e-5 + +#ifdef NONSYM_DENSITY +#define IACT runner_iact_nonsym_density +#define IACT_VEC runner_iact_nonsym_2_vec_density +#define IACT_NAME "test_nonsym_density" +#endif + +#ifdef SYM_DENSITY +#define IACT runner_iact_density +#define IACT_VEC runner_iact_vec_density +#define IACT_NAME "test_sym_density" +#endif + +#ifdef NONSYM_FORCE +#define IACT runner_iact_nonsym_force +#define IACT_VEC runner_iact_nonsym_vec_force +#define IACT_NAME "test_nonsym_force" +#endif + +#ifdef SYM_FORCE +#define IACT runner_iact_force +#define IACT_VEC runner_iact_vec_force +#define IACT_NAME "test_sym_force" +#endif + +#ifndef IACT +#define IACT runner_iact_nonsym_density +#define IACT_VEC runner_iact_nonsym_2_vec_density +#define IACT_NAME "test_nonsym_density" +#endif + +/** + * @brief Constructs an array of particles in a valid state prior to + * a IACT_NONSYM and IACT_NONSYM_VEC call. + * + * @param count No. of particles to create + * @param offset The position of the particle offset from (0,0,0). + * @param spacing Particle spacing. + * @param h The smoothing length of the particles in units of the inter-particle + *separation. + * @param partId The running counter of IDs. + */ +struct part *make_particles(size_t count, double *offset, double spacing, double h, + long long *partId) { + + struct part *particles; + if (posix_memalign((void **)&particles, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(particles, count * sizeof(struct part)); + + /* Construct the particles */ + struct part *p; + + /* Set test particle at centre of unit sphere. */ + p = &particles[0]; + + /* Place the test particle at the centre of a unit sphere. */ + p->x[0] = 0.0f; + p->x[1] = 0.0f; + p->x[2] = 0.0f; + + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + + /* Place rest of particles around the test particle + * with random position within a unit sphere. */ + for (size_t i = 1; i < count; ++i) { + p = &particles[i]; + + /* Randomise positions within a unit sphere. */ + p->x[0] = random_uniform(-1.0, 1.0); + p->x[1] = random_uniform(-1.0, 1.0); + p->x[2] = random_uniform(-1.0, 1.0); + + /* Randomise velocities. */ + p->v[0] = random_uniform(-0.05, 0.05); + p->v[1] = random_uniform(-0.05, 0.05); + p->v[2] = random_uniform(-0.05, 0.05); + + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + } + return particles; +} + +/** + * @brief Populates particle properties needed for the force calculation. + */ +void prepare_force(struct part *parts, size_t count) { + + struct part *p; + for (size_t i = 0; i < count; ++i) { + p = &parts[i]; + p->rho = i + 1; + p->force.balsara = random_uniform(0.0, 1.0); + p->force.P_over_rho2 = i + 1; + p->force.soundspeed = random_uniform(2.0, 3.0); + p->force.v_sig = 0.0f; + p->force.h_dt = 0.0f; + } +} + +/** + * @brief Dumps all particle information to a file + */ +void dump_indv_particle_fields(char *fileName, struct part *p) { + + FILE *file = fopen(fileName, "a"); + + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e " + "%13e %13e %13e %13e " + "%13e %13e %13e\n", + p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], + p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho, + p->density.rho_dh, p->density.wcount, p->density.wcount_dh, + p->force.h_dt, p->force.v_sig, +#if defined(MINIMAL_SPH) + 0., 0., 0., 0. +#else + p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], + p->density.rot_v[2] +#endif + ); + fclose(file); +} + +/** + * @brief Creates a header for the output file + */ +void write_header(char *fileName) { + + FILE *file = fopen(fileName, "w"); + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s " + "%13s %13s %13s %13s" + "%13s %13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y", + "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig", + "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt"); + fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n"); + fclose(file); +} + +/** + * @brief Compares the vectorised result against + * the serial result of the interaction. + * + * @param serial_test_part Particle that has been updated serially + * @param serial_parts Particle array that has been interacted serially + * @param vec_test_part Particle that has been updated using vectors + * @param vec_parts Particle array to be interacted using vectors + * @param count No. of particles that have been interacted + * + * @return Non-zero value if difference found, 0 otherwise + */ +int check_results(struct part serial_test_part, struct part *serial_parts, + struct part vec_test_part, struct part *vec_parts, + int count) { + int result = 0; + result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD); + + for (int i = 0; i < count; i++) + result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD); + + return result; +} + +/* + * @brief Calls the serial and vectorised version of the non-symmetrical density + * interaction. + * + * @param test_part Particle that will be updated + * @param parts Particle array to be interacted + * @param count No. of particles to be interacted + * @param serial_inter_func Serial interaction function to be called + * @param vec_inter_func Vectorised interaction function to be called + * @param runs No. of times to call interactions + * + */ +void test_interactions(struct part test_part, struct part *parts, size_t count, + char *filePrefix, int runs) { + + ticks serial_time = 0; +#ifdef WITH_VECTORIZATION + ticks vec_time = 0; +#endif + + FILE *file; + char serial_filename[200] = ""; + char vec_filename[200] = ""; + + strcpy(serial_filename, filePrefix); + strcpy(vec_filename, filePrefix); + sprintf(serial_filename + strlen(serial_filename), "_serial.dat"); + sprintf(vec_filename + strlen(vec_filename), "_vec.dat"); + + write_header(serial_filename); + write_header(vec_filename); + + struct part pi_serial, pi_vec; + struct part pj_serial[count], pj_vec[count]; + + float r2[count] __attribute__((aligned(array_align))); + float dx[3 * count] __attribute__((aligned(array_align))); + + float r2q[count] __attribute__((aligned(array_align))); + float hiq[count] __attribute__((aligned(array_align))); + float dxq[count] __attribute__((aligned(array_align))); + + struct part *piq[count], *pjq[count]; + + float dyq[count] __attribute__((aligned(array_align))); + float dzq[count] __attribute__((aligned(array_align))); + float mjq[count] __attribute__((aligned(array_align))); + float vixq[count] __attribute__((aligned(array_align))); + float viyq[count] __attribute__((aligned(array_align))); + float vizq[count] __attribute__((aligned(array_align))); + float vjxq[count] __attribute__((aligned(array_align))); + float vjyq[count] __attribute__((aligned(array_align))); + float vjzq[count] __attribute__((aligned(array_align))); + + /* Call serial interaction a set number of times. */ + for (int k = 0; k < runs; k++) { + /* Reset particle to initial setup */ + pi_serial = test_part; + for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i]; + + /* Only dump data on first run. */ + if (k == 0) { + /* Dump state of particles before serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi_serial); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(serial_filename, &pj_serial[i]); + } + + /* Perform serial interaction */ + for (size_t i = 0; i < count; i++) { + /* Compute the pairwise distance. */ + r2[i] = 0.0f; + for (int k = 0; k < 3; k++) { + int ind = (3 * i) + k; + dx[ind] = pi_serial.x[k] - pj_serial[i].x[k]; + r2[i] += dx[ind] * dx[ind]; + } + } + + const ticks tic = getticks(); + /* Perform serial interaction */ +#ifdef __ICC +#pragma novector +#endif + for (size_t i = 0; i < count; i++) { + IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial, + &pj_serial[i]); + } + serial_time += getticks() - tic; + } + + file = fopen(serial_filename, "a"); + fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi_serial); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(serial_filename, &pj_serial[i]); + + /* Call vector interaction a set number of times. */ + for (int k = 0; k < runs; k++) { + /* Reset particle to initial setup */ + pi_vec = test_part; + for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i]; + + /* Setup arrays for vector interaction. */ + for (size_t i = 0; i < count; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pi_vec.x[k] - pj_vec[i].x[k]; + r2 += dx[k] * dx[k]; + } + + r2q[i] = r2; + dxq[i] = dx[0]; + hiq[i] = pi_vec.h; + piq[i] = &pi_vec; + pjq[i] = &pj_vec[i]; + + dyq[i] = dx[1]; + dzq[i] = dx[2]; + mjq[i] = pj_vec[i].mass; + vixq[i] = pi_vec.v[0]; + viyq[i] = pi_vec.v[1]; + vizq[i] = pi_vec.v[2]; + vjxq[i] = pj_vec[i].v[0]; + vjyq[i] = pj_vec[i].v[1]; + vjzq[i] = pj_vec[i].v[2]; + } + + /* Only dump data on first run. */ + if (k == 0) { + /* Dump state of particles before vector interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); + } + +/* Perform vector interaction. */ +#ifdef WITH_VECTORIZATION + vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, mask, mask2; + vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, + curlvySum, curlvzSum; + + rhoSum.v = vec_set1(0.f); + rho_dhSum.v = vec_set1(0.f); + wcountSum.v = vec_set1(0.f); + wcount_dhSum.v = vec_set1(0.f); + div_vSum.v = vec_set1(0.f); + curlvxSum.v = vec_set1(0.f); + curlvySum.v = vec_set1(0.f); + curlvzSum.v = vec_set1(0.f); + + hi_vec.v = vec_load(&hiq[0]); + vix_vec.v = vec_load(&vixq[0]); + viy_vec.v = vec_load(&viyq[0]); + viz_vec.v = vec_load(&vizq[0]); + + hi_inv_vec = vec_reciprocal(hi_vec); + mask.m = vec_setint1(0xFFFFFFFF); + mask2.m = vec_setint1(0xFFFFFFFF); + +#ifdef HAVE_AVX512_F + KNL_MASK_16 knl_mask, knl_mask2; + knl_mask = 0xFFFF; + knl_mask2 = 0xFFFF; +#endif + + const ticks vec_tic = getticks(); + + for (size_t i = 0; i < count; i += 2 * VEC_SIZE) { + + IACT_VEC(&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec), + (vix_vec), (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]), + &(vjzq[i]), &(mjq[i]), &rhoSum, &rho_dhSum, &wcountSum, + &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, + mask, mask2, +#ifdef HAVE_AVX512_F + knl_mask, knl_mask2); +#else + 0, 0); +#endif + } + + VEC_HADD(rhoSum, piq[0]->rho); + VEC_HADD(rho_dhSum, piq[0]->density.rho_dh); + VEC_HADD(wcountSum, piq[0]->density.wcount); + VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh); + VEC_HADD(div_vSum, piq[0]->density.div_v); + VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]); + VEC_HADD(curlvySum, piq[0]->density.rot_v[1]); + VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]); + + vec_time += getticks() - vec_tic; +#endif + } + + file = fopen(vec_filename, "a"); + fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); + +#ifdef WITH_VECTORIZATION + /* Check serial results against the vectorised results. */ + if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count)) + message("Differences found..."); +#endif + + message("The serial interactions took : %15lli ticks.", + serial_time / runs); +#ifdef WITH_VECTORIZATION + message("The vectorised interactions took : %15lli ticks.", vec_time / runs); + message("Speed up: %15fx.", (double)(serial_time) / vec_time); +#endif +} + +/* And go... */ +int main(int argc, char *argv[]) { + size_t runs = 10000; + double h = 1.0, spacing = 0.5; + double offset[3] = {0.0, 0.0, 0.0}; + size_t count = 256; + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 's': + sscanf(optarg, "%lf", &spacing); + case 'n': + sscanf(optarg, "%zu", &count); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || spacing < 0) { + printf( + "\nUsage: %s [OPTIONS...]\n" + "\nGenerates a particle array with equal particle separation." + "\nThese are then interacted using runner_iact_density and " + "runner_iact_vec_density." + "\n\nOptions:" + "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-s SPACING=0.5 - Spacing between particles" + "\n-n NUMBER=9 - No. of particles", + argv[0]); + exit(1); + } + + /* Correct count so that VEC_SIZE of particles interact with the test + * particle. */ + count = count - (count % VEC_SIZE) + 1; + + /* Build the infrastructure */ + static long long partId = 0; + struct part test_particle; + struct part *particles = make_particles(count, offset, spacing, h, &partId); + +#if defined(NONSYM_FORCE) || defined(SYM_FORCE) + prepare_force(particles, count); +#endif + + test_particle = particles[0]; + /* Call the non-sym density test. */ + message("Testing %s interaction...", IACT_NAME); + test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs); + + return 0; +}