Commit 0edc7d11 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'avx2-optimized-checks' into 'master'

Fixes for checks on optimized AXV2 architectures

See merge request !997
parents 9bde0118 f8c88733
......@@ -50,10 +50,10 @@ __attribute__((always_inline, const)) INLINE static float approx_erfcf(
* @brief Approximate version of expf(x) using a 4th order Taylor expansion
*
* The absolute error is smaller than 3 * 10^-6 for -0.2 < x < 0.2.
* The absolute error is smaller than 2 * 10^-7 for -0.1 < x < 0.1.
* The absolute error is smaller than 3 * 10^-7 for -0.1 < x < 0.1.
* The relative error is smaller than 1 * 10^-6 for -0.2 < x < 0.2.
* The relative error is smaller than 4 * 10^-8 for -0.1 < x < 0.1.
* The relative error is smaller than 3 * 10^-7 for -0.1 < x < 0.1.
*
* @param x The number to take the exponential of.
*/
......
......@@ -253,7 +253,8 @@ void end_calculation_density(struct cell *c, const struct cosmology *cosmo) {
*/
void end_calculation_force(struct cell *c, const struct cosmology *cosmo) {
for (int pid = 0; pid < c->hydro.count; pid++) {
hydro_end_force(&c->hydro.parts[pid], cosmo);
struct part *volatile part = &c->hydro.parts[pid];
hydro_end_force(part, cosmo);
}
}
......
......@@ -50,7 +50,8 @@ int main(int argc, char *argv[]) {
message("executing %i runs of each command.", num_vals);
/* Create and fill an array of floats. */
float *data = (float *)malloc(sizeof(float) * num_vals);
float *data;
posix_memalign((void **)&data, 64, num_vals*sizeof(float));
for (int k = 0; k < num_vals; k++) {
data[k] = (float)rand() / RAND_MAX;
data[k] = (1.0f - data[k]) * range_min + data[k] * range_max;
......
......@@ -45,23 +45,23 @@ int main(int argc, char *argv[]) {
printf("Absolute difference too large !\n");
error = 1;
}
if (abs > 1.2e-7 && fabsf(x) <= 0.1) {
if (abs > 3e-7 && fabsf(x) <= 0.1) {
printf("Absolute difference too large !\n");
error = 1;
error = 2;
}
if (rel > 1e-6 && fabsf(x) <= 0.2) {
printf("Relative difference too large !\n");
error = 1;
error = 3;
}
if (rel > 4e-8 && fabsf(x) <= 0.1) {
if (rel > 3e-7 && fabsf(x) <= 0.1) {
printf("Relative difference too large !\n");
error = 1;
error = 4;
}
if (error) {
printf("%2d: x= %f exp(x)= %e approx_exp(x)=%e abs=%e rel=%e\n", i, x,
exp_correct, exp_approx, abs, rel);
if (error > 0) {
printf("%2d/%d: x= %f exp(x)= %e approx_exp(x)=%e abs=%e rel=%e\n", i,
error, x, exp_correct, exp_approx, abs, rel);
return 1;
}
}
......
......@@ -78,7 +78,10 @@ struct cell *make_cell(size_t n, double *offset, double size, double h,
enum velocity_types vel) {
const size_t count = n * n * n;
const double volume = size * size * size;
struct cell *cell = (struct cell *)malloc(sizeof(struct cell));
struct cell *cell = NULL;
if (posix_memalign((void **)&cell, cell_align, sizeof(struct cell)) != 0) {
error("couldn't allocate cell");
}
bzero(cell, sizeof(struct cell));
if (posix_memalign((void **)&cell->hydro.parts, part_align,
......@@ -290,7 +293,7 @@ void runner_dopair1_branch_density(struct runner *r, struct cell *ci,
struct cell *cj);
void runner_doself1_branch_density(struct runner *r, struct cell *c);
void test_boundary_conditions(struct cell **cells, struct runner runner,
void test_boundary_conditions(struct cell **cells, struct runner *runner,
const int loc_i, const int loc_j, const int loc_k,
const int dim, char *swiftOutputFileName,
char *bruteForceOutputFileName) {
......@@ -303,10 +306,10 @@ void test_boundary_conditions(struct cell **cells, struct runner runner,
/* Run all the pairs */
#ifdef WITH_VECTORIZATION
runner.ci_cache.count = 0;
cache_init(&runner.ci_cache, 512);
runner.cj_cache.count = 0;
cache_init(&runner.cj_cache, 512);
runner->ci_cache.count = 0;
cache_init(&runner->ci_cache, 512);
runner->cj_cache.count = 0;
cache_init(&runner->cj_cache, 512);
#endif
/* Now loop over all the neighbours of this cell
......@@ -324,17 +327,17 @@ void test_boundary_conditions(struct cell **cells, struct runner runner,
/* Get the neighbouring cell */
struct cell *cj = cells[iii * (dim * dim) + jjj * dim + kkk];
if (cj != main_cell) DOPAIR1(&runner, main_cell, cj);
if (cj != main_cell) DOPAIR1(runner, main_cell, cj);
}
}
}
/* And now the self-interaction */
DOSELF1(&runner, main_cell);
DOSELF1(runner, main_cell);
/* Let's get physical ! */
end_calculation(main_cell, runner.e->cosmology);
end_calculation(main_cell, runner->e->cosmology);
/* Dump particles from the main cell. */
dump_particle_fields(swiftOutputFileName, main_cell, loc_i, loc_j, loc_k);
......@@ -359,16 +362,16 @@ void test_boundary_conditions(struct cell **cells, struct runner runner,
/* Get the neighbouring cell */
struct cell *cj = cells[iii * (dim * dim) + jjj * dim + kkk];
if (cj != main_cell) pairs_all_density(&runner, main_cell, cj);
if (cj != main_cell) pairs_all_density(runner, main_cell, cj);
}
}
}
/* And now the self-interaction */
self_all_density(&runner, main_cell);
self_all_density(runner, main_cell);
/* Let's get physical ! */
end_calculation(main_cell, runner.e->cosmology);
end_calculation(main_cell, runner->e->cosmology);
/* Dump */
dump_particle_fields(bruteForceOutputFileName, main_cell, loc_i, loc_j,
......@@ -491,8 +494,9 @@ int main(int argc, char *argv[]) {
engine.hydro_properties = &hp;
engine.nodeID = NODE_ID;
struct runner runner;
runner.e = &engine;
struct runner real_runner;
struct runner *runner = &real_runner;
runner->e = &engine;
struct cosmology cosmo;
cosmology_init_no_cosmo(&cosmo);
......@@ -508,9 +512,9 @@ int main(int argc, char *argv[]) {
cells[i * (dim * dim) + j * dim + k] = make_cell(
particles, offset, size, h, rho, &partId, perturbation, vel);
runner_do_drift_part(&runner, cells[i * (dim * dim) + j * dim + k], 0);
runner_do_drift_part(runner, cells[i * (dim * dim) + j * dim + k], 0);
runner_do_hydro_sort(&runner, cells[i * (dim * dim) + j * dim + k],
runner_do_hydro_sort(runner, cells[i * (dim * dim) + j * dim + k],
0x1FFF, 0, 0);
}
}
......
......@@ -200,15 +200,24 @@ int main(int argc, char *argv[]) {
/* Check the total surface area */
assert(fabs(Atot - 1.0f) < 1.e-6);
/* Check the neighbour relations for an arbitrary cell: cell 44
We plotted the grid and manually found the correct neighbours and their
order. */
assert(cells[44].nvert == 4);
assert(cells[44].ngbs[0] == 34);
assert(cells[44].ngbs[1] == 45);
assert(cells[44].ngbs[2] == 54);
assert(cells[44].ngbs[3] == 43);
/* Check the neighbour relations for an arbitrary cell: cell 44 We plotted
the grid and manually found the correct neighbours and their
order. Variation is found when optimizing, so we have two possible
outcomes... */
if (cells[44].nvert == 5) {
assert(cells[44].nvert == 5);
assert(cells[44].ngbs[0] == 43);
assert(cells[44].ngbs[1] == 34);
assert(cells[44].ngbs[2] == 45);
assert(cells[44].ngbs[3] == 55);
} else {
assert(cells[44].nvert == 4);
assert(cells[44].ngbs[0] == 34);
assert(cells[44].ngbs[1] == 45);
assert(cells[44].ngbs[2] == 54);
assert(cells[44].ngbs[3] == 43);
}
message("Done.");
}
......
# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 1e-2 1e-5 3e-6 3e-6 7e-6
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-3 1e-5 2e-3 6e-5 3e-3 2e-3 2e-3
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-3 1e-5 2e-3 6e-5 3e-3 2e-3 2e-3
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-6 1e0 1e-6 2e-6 2e-6 2e-6
# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 1e-4 5e-4 1.4e-2 1.1e-5 3e-6 3e-6 8e-6
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-6 1.4e-2 1e-5 2e-3 2.5e-4 3e-3 3e-3 3e-3
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-6 1.7e-2 1e-5 2e-3 2.5e-4 3e-3 3e-3 3e-3
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e0 1e-6 4e-6 4e-6 4e-6
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment