Commit 0edc7d11 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'avx2-optimized-checks' into 'master'

Fixes for checks on optimized AXV2 architectures

See merge request !997
parents 9bde0118 f8c88733
...@@ -50,10 +50,10 @@ __attribute__((always_inline, const)) INLINE static float approx_erfcf( ...@@ -50,10 +50,10 @@ __attribute__((always_inline, const)) INLINE static float approx_erfcf(
* @brief Approximate version of expf(x) using a 4th order Taylor expansion * @brief Approximate version of expf(x) using a 4th order Taylor expansion
* *
* The absolute error is smaller than 3 * 10^-6 for -0.2 < x < 0.2. * The absolute error is smaller than 3 * 10^-6 for -0.2 < x < 0.2.
* The absolute error is smaller than 2 * 10^-7 for -0.1 < x < 0.1. * The absolute error is smaller than 3 * 10^-7 for -0.1 < x < 0.1.
* The relative error is smaller than 1 * 10^-6 for -0.2 < x < 0.2. * The relative error is smaller than 1 * 10^-6 for -0.2 < x < 0.2.
* The relative error is smaller than 4 * 10^-8 for -0.1 < x < 0.1. * The relative error is smaller than 3 * 10^-7 for -0.1 < x < 0.1.
* *
* @param x The number to take the exponential of. * @param x The number to take the exponential of.
*/ */
......
...@@ -253,7 +253,8 @@ void end_calculation_density(struct cell *c, const struct cosmology *cosmo) { ...@@ -253,7 +253,8 @@ void end_calculation_density(struct cell *c, const struct cosmology *cosmo) {
*/ */
void end_calculation_force(struct cell *c, const struct cosmology *cosmo) { void end_calculation_force(struct cell *c, const struct cosmology *cosmo) {
for (int pid = 0; pid < c->hydro.count; pid++) { for (int pid = 0; pid < c->hydro.count; pid++) {
hydro_end_force(&c->hydro.parts[pid], cosmo); struct part *volatile part = &c->hydro.parts[pid];
hydro_end_force(part, cosmo);
} }
} }
......
...@@ -50,7 +50,8 @@ int main(int argc, char *argv[]) { ...@@ -50,7 +50,8 @@ int main(int argc, char *argv[]) {
message("executing %i runs of each command.", num_vals); message("executing %i runs of each command.", num_vals);
/* Create and fill an array of floats. */ /* Create and fill an array of floats. */
float *data = (float *)malloc(sizeof(float) * num_vals); float *data;
posix_memalign((void **)&data, 64, num_vals*sizeof(float));
for (int k = 0; k < num_vals; k++) { for (int k = 0; k < num_vals; k++) {
data[k] = (float)rand() / RAND_MAX; data[k] = (float)rand() / RAND_MAX;
data[k] = (1.0f - data[k]) * range_min + data[k] * range_max; data[k] = (1.0f - data[k]) * range_min + data[k] * range_max;
......
...@@ -45,23 +45,23 @@ int main(int argc, char *argv[]) { ...@@ -45,23 +45,23 @@ int main(int argc, char *argv[]) {
printf("Absolute difference too large !\n"); printf("Absolute difference too large !\n");
error = 1; error = 1;
} }
if (abs > 1.2e-7 && fabsf(x) <= 0.1) { if (abs > 3e-7 && fabsf(x) <= 0.1) {
printf("Absolute difference too large !\n"); printf("Absolute difference too large !\n");
error = 1; error = 2;
} }
if (rel > 1e-6 && fabsf(x) <= 0.2) { if (rel > 1e-6 && fabsf(x) <= 0.2) {
printf("Relative difference too large !\n"); printf("Relative difference too large !\n");
error = 1; error = 3;
} }
if (rel > 4e-8 && fabsf(x) <= 0.1) { if (rel > 3e-7 && fabsf(x) <= 0.1) {
printf("Relative difference too large !\n"); printf("Relative difference too large !\n");
error = 1; error = 4;
} }
if (error) { if (error > 0) {
printf("%2d: x= %f exp(x)= %e approx_exp(x)=%e abs=%e rel=%e\n", i, x, printf("%2d/%d: x= %f exp(x)= %e approx_exp(x)=%e abs=%e rel=%e\n", i,
exp_correct, exp_approx, abs, rel); error, x, exp_correct, exp_approx, abs, rel);
return 1; return 1;
} }
} }
......
...@@ -78,7 +78,10 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, ...@@ -78,7 +78,10 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
enum velocity_types vel) { enum velocity_types vel) {
const size_t count = n * n * n; const size_t count = n * n * n;
const double volume = size * size * size; const double volume = size * size * size;
struct cell *cell = (struct cell *)malloc(sizeof(struct cell)); struct cell *cell = NULL;
if (posix_memalign((void **)&cell, cell_align, sizeof(struct cell)) != 0) {
error("couldn't allocate cell");
}
bzero(cell, sizeof(struct cell)); bzero(cell, sizeof(struct cell));
if (posix_memalign((void **)&cell->hydro.parts, part_align, if (posix_memalign((void **)&cell->hydro.parts, part_align,
...@@ -290,7 +293,7 @@ void runner_dopair1_branch_density(struct runner *r, struct cell *ci, ...@@ -290,7 +293,7 @@ void runner_dopair1_branch_density(struct runner *r, struct cell *ci,
struct cell *cj); struct cell *cj);
void runner_doself1_branch_density(struct runner *r, struct cell *c); void runner_doself1_branch_density(struct runner *r, struct cell *c);
void test_boundary_conditions(struct cell **cells, struct runner runner, void test_boundary_conditions(struct cell **cells, struct runner *runner,
const int loc_i, const int loc_j, const int loc_k, const int loc_i, const int loc_j, const int loc_k,
const int dim, char *swiftOutputFileName, const int dim, char *swiftOutputFileName,
char *bruteForceOutputFileName) { char *bruteForceOutputFileName) {
...@@ -303,10 +306,10 @@ void test_boundary_conditions(struct cell **cells, struct runner runner, ...@@ -303,10 +306,10 @@ void test_boundary_conditions(struct cell **cells, struct runner runner,
/* Run all the pairs */ /* Run all the pairs */
#ifdef WITH_VECTORIZATION #ifdef WITH_VECTORIZATION
runner.ci_cache.count = 0; runner->ci_cache.count = 0;
cache_init(&runner.ci_cache, 512); cache_init(&runner->ci_cache, 512);
runner.cj_cache.count = 0; runner->cj_cache.count = 0;
cache_init(&runner.cj_cache, 512); cache_init(&runner->cj_cache, 512);
#endif #endif
/* Now loop over all the neighbours of this cell /* Now loop over all the neighbours of this cell
...@@ -324,17 +327,17 @@ void test_boundary_conditions(struct cell **cells, struct runner runner, ...@@ -324,17 +327,17 @@ void test_boundary_conditions(struct cell **cells, struct runner runner,
/* Get the neighbouring cell */ /* Get the neighbouring cell */
struct cell *cj = cells[iii * (dim * dim) + jjj * dim + kkk]; struct cell *cj = cells[iii * (dim * dim) + jjj * dim + kkk];
if (cj != main_cell) DOPAIR1(&runner, main_cell, cj); if (cj != main_cell) DOPAIR1(runner, main_cell, cj);
} }
} }
} }
/* And now the self-interaction */ /* And now the self-interaction */
DOSELF1(&runner, main_cell); DOSELF1(runner, main_cell);
/* Let's get physical ! */ /* Let's get physical ! */
end_calculation(main_cell, runner.e->cosmology); end_calculation(main_cell, runner->e->cosmology);
/* Dump particles from the main cell. */ /* Dump particles from the main cell. */
dump_particle_fields(swiftOutputFileName, main_cell, loc_i, loc_j, loc_k); dump_particle_fields(swiftOutputFileName, main_cell, loc_i, loc_j, loc_k);
...@@ -359,16 +362,16 @@ void test_boundary_conditions(struct cell **cells, struct runner runner, ...@@ -359,16 +362,16 @@ void test_boundary_conditions(struct cell **cells, struct runner runner,
/* Get the neighbouring cell */ /* Get the neighbouring cell */
struct cell *cj = cells[iii * (dim * dim) + jjj * dim + kkk]; struct cell *cj = cells[iii * (dim * dim) + jjj * dim + kkk];
if (cj != main_cell) pairs_all_density(&runner, main_cell, cj); if (cj != main_cell) pairs_all_density(runner, main_cell, cj);
} }
} }
} }
/* And now the self-interaction */ /* And now the self-interaction */
self_all_density(&runner, main_cell); self_all_density(runner, main_cell);
/* Let's get physical ! */ /* Let's get physical ! */
end_calculation(main_cell, runner.e->cosmology); end_calculation(main_cell, runner->e->cosmology);
/* Dump */ /* Dump */
dump_particle_fields(bruteForceOutputFileName, main_cell, loc_i, loc_j, dump_particle_fields(bruteForceOutputFileName, main_cell, loc_i, loc_j,
...@@ -491,8 +494,9 @@ int main(int argc, char *argv[]) { ...@@ -491,8 +494,9 @@ int main(int argc, char *argv[]) {
engine.hydro_properties = &hp; engine.hydro_properties = &hp;
engine.nodeID = NODE_ID; engine.nodeID = NODE_ID;
struct runner runner; struct runner real_runner;
runner.e = &engine; struct runner *runner = &real_runner;
runner->e = &engine;
struct cosmology cosmo; struct cosmology cosmo;
cosmology_init_no_cosmo(&cosmo); cosmology_init_no_cosmo(&cosmo);
...@@ -508,9 +512,9 @@ int main(int argc, char *argv[]) { ...@@ -508,9 +512,9 @@ int main(int argc, char *argv[]) {
cells[i * (dim * dim) + j * dim + k] = make_cell( cells[i * (dim * dim) + j * dim + k] = make_cell(
particles, offset, size, h, rho, &partId, perturbation, vel); particles, offset, size, h, rho, &partId, perturbation, vel);
runner_do_drift_part(&runner, cells[i * (dim * dim) + j * dim + k], 0); runner_do_drift_part(runner, cells[i * (dim * dim) + j * dim + k], 0);
runner_do_hydro_sort(&runner, cells[i * (dim * dim) + j * dim + k], runner_do_hydro_sort(runner, cells[i * (dim * dim) + j * dim + k],
0x1FFF, 0, 0); 0x1FFF, 0, 0);
} }
} }
......
...@@ -200,15 +200,24 @@ int main(int argc, char *argv[]) { ...@@ -200,15 +200,24 @@ int main(int argc, char *argv[]) {
/* Check the total surface area */ /* Check the total surface area */
assert(fabs(Atot - 1.0f) < 1.e-6); assert(fabs(Atot - 1.0f) < 1.e-6);
/* Check the neighbour relations for an arbitrary cell: cell 44 /* Check the neighbour relations for an arbitrary cell: cell 44 We plotted
We plotted the grid and manually found the correct neighbours and their the grid and manually found the correct neighbours and their
order. */ order. Variation is found when optimizing, so we have two possible
assert(cells[44].nvert == 4); outcomes... */
assert(cells[44].ngbs[0] == 34); if (cells[44].nvert == 5) {
assert(cells[44].ngbs[1] == 45); assert(cells[44].nvert == 5);
assert(cells[44].ngbs[2] == 54); assert(cells[44].ngbs[0] == 43);
assert(cells[44].ngbs[3] == 43); assert(cells[44].ngbs[1] == 34);
assert(cells[44].ngbs[2] == 45);
assert(cells[44].ngbs[3] == 55);
} else {
assert(cells[44].nvert == 4);
assert(cells[44].ngbs[0] == 34);
assert(cells[44].ngbs[1] == 45);
assert(cells[44].ngbs[2] == 54);
assert(cells[44].ngbs[3] == 43);
}
message("Done."); message("Done.");
} }
......
# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 1e-2 1e-5 3e-6 3e-6 7e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 1e-2 1e-5 3e-6 3e-6 7e-6
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-3 1e-5 2e-3 6e-5 3e-3 2e-3 2e-3 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-3 1e-5 2e-3 6e-5 3e-3 2e-3 2e-3
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-6 1e0 1e-6 2e-6 2e-6 2e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-6 1e0 1e-6 2e-6 2e-6 2e-6
# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 1e-4 5e-4 1.4e-2 1.1e-5 3e-6 3e-6 8e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 1e-4 5e-4 1.4e-2 1.1e-5 3e-6 3e-6 8e-6
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-6 1.4e-2 1e-5 2e-3 2.5e-4 3e-3 3e-3 3e-3 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-6 1.7e-2 1e-5 2e-3 2.5e-4 3e-3 3e-3 3e-3
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e0 1e-6 4e-6 4e-6 4e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e0 1e-6 4e-6 4e-6 4e-6
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment