/*******************************************************************************
* This file is part of SWIFT.
* Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
******************************************************************************/
#include
/* Some standard headers. */
#include
#include
#include
#include
#include
/* Local headers. */
#include "runner_doiact_grav.h"
#include "swift.h"
const int num_M2L_runs = 1 << 23;
const int num_M2P_runs = 1 << 23;
const int num_PP_runs = 1; // << 8;
void make_cell(struct cell *c, int N, const double loc[3], double width,
int id_base, const struct gravity_props *grav_props) {
bzero(c, sizeof(struct cell));
/* Start by setting the basics */
c->loc[0] = loc[0];
c->loc[1] = loc[1];
c->loc[2] = loc[2];
c->width[0] = width;
c->width[1] = width;
c->width[2] = width;
/* Initialise the locks */
lock_init(&c->grav.plock);
lock_init(&c->grav.mlock);
/* Set the time bins */
c->grav.ti_end_min = 1;
c->grav.ti_beg_max = 1;
c->grav.ti_old_part = 1;
c->grav.ti_old_multipole = 1;
c->grav.ti_end_min = 8;
/* Create the particles */
c->grav.count = N;
c->grav.count_total = N;
c->grav.parts = NULL;
if (posix_memalign((void **)&c->grav.parts, part_align,
N * sizeof(struct gpart)) != 0) {
error("couldn't allocate particles, no. of particles: %d", (int)N);
}
bzero(c->grav.parts, N * sizeof(struct gpart));
for (int i = 0.; i < N; ++i) {
c->grav.parts[i].id_or_neg_offset = id_base + i;
c->grav.parts[i].x[0] = loc[0] + width * rand() / ((double)RAND_MAX);
c->grav.parts[i].x[1] = loc[1] + width * rand() / ((double)RAND_MAX);
c->grav.parts[i].x[2] = loc[2] + width * rand() / ((double)RAND_MAX);
c->grav.parts[i].mass = 1.;
c->grav.parts[i].type = swift_type_dark_matter;
c->grav.parts[i].epsilon = 1.;
c->grav.parts[i].time_bin = 1;
}
/* Create the multipoles */
c->grav.multipole =
(struct gravity_tensors *)malloc(sizeof(struct gravity_tensors));
gravity_reset(c->grav.multipole);
gravity_P2M(c->grav.multipole, c->grav.parts, N, grav_props);
gravity_multipole_compute_power(&c->grav.multipole->m_pole);
}
int main(int argc, char *argv[]) {
/* Initialize CPU frequency, this also starts time. */
unsigned long long cpufreq = 0;
clocks_set_cpufreq(cpufreq);
/* Choke on FPEs */
#ifdef HAVE_FE_ENABLE_EXCEPT
feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
#endif
/* Get some randomness going */
const int seed = time(NULL);
message("Seed = %d", seed);
srand(seed);
/* Construct gravity properties */
struct gravity_props grav_props;
bzero(&grav_props, sizeof(struct gravity_props));
grav_props.use_advanced_MAC = 1;
grav_props.use_adaptive_tolerance = 1;
grav_props.adaptive_tolerance = 1e-4;
grav_props.theta_crit = 0.5;
grav_props.G_Newton = 1.;
grav_props.mesh_size = 64;
grav_props.a_smooth = 1.25;
/* Space properites */
const double dim[3] = {100., 100., 100.};
const double r_s = grav_props.a_smooth * dim[0] / grav_props.mesh_size;
const double r_s_inv = 1. / r_s;
/* Mesh structure */
struct pm_mesh mesh;
mesh.periodic = 0;
mesh.dim[0] = dim[0];
mesh.dim[1] = dim[1];
mesh.dim[2] = dim[2];
/* Construct an engine */
struct engine e;
e.mesh = &mesh;
e.max_active_bin = 56;
e.time = 0.1f;
e.ti_current = 1;
e.gravity_properties = &grav_props;
/* Construct a runner */
struct runner r;
bzero(&r, sizeof(struct runner));
r.e = &e;
/* Init the cache for gravity interaction */
gravity_cache_init(&r.ci_gravity_cache, space_splitsize);
gravity_cache_init(&r.cj_gravity_cache, space_splitsize);
/* Construct two cells */
struct cell ci;
struct cell cj;
const double loc_i[3] = {0., 0., 0.};
const double loc_j[3] = {1., 1., 1.};
const int num_particles = 8;
make_cell(&ci, num_particles, loc_i, 1., 0, &grav_props);
make_cell(&cj, num_particles, loc_j, 1., num_particles, &grav_props);
message("Number of runs: %d", num_M2L_runs);
/* Construct arrays of multipoles to prevent too much optimization */
struct gravity_tensors *tensors_i = NULL;
if (posix_memalign((void **)&tensors_i, SWIFT_CACHE_ALIGNMENT,
num_M2L_runs * sizeof(struct gravity_tensors)) != 0)
error("Error allocating memory for multipoles array.");
struct gravity_tensors *tensors_j = NULL;
if (posix_memalign((void **)&tensors_j, SWIFT_CACHE_ALIGNMENT,
num_M2L_runs * sizeof(struct gravity_tensors)) != 0)
error("Error allocating memory for multipoles array.");
for (int n = 0; n < num_M2L_runs; ++n) {
memcpy(&tensors_i[n], ci.grav.multipole, sizeof(struct gravity_tensors));
memcpy(&tensors_j[n], cj.grav.multipole, sizeof(struct gravity_tensors));
/* Move the values a bit to prevent optimization in the actual loops */
tensors_i[n].CoM[0] += rand() / ((double)RAND_MAX);
tensors_i[n].CoM[1] += rand() / ((double)RAND_MAX);
tensors_i[n].CoM[1] += rand() / ((double)RAND_MAX);
tensors_j[n].CoM[0] += rand() / ((double)RAND_MAX);
tensors_j[n].CoM[1] += rand() / ((double)RAND_MAX);
tensors_j[n].CoM[1] += rand() / ((double)RAND_MAX);
tensors_i[n].m_pole.M_000 += rand() / ((double)RAND_MAX);
tensors_j[n].m_pole.M_000 += rand() / ((double)RAND_MAX);
#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
tensors_i[n].m_pole.M_200 += rand() / ((double)RAND_MAX);
tensors_i[n].m_pole.M_020 += rand() / ((double)RAND_MAX);
tensors_i[n].m_pole.M_002 += rand() / ((double)RAND_MAX);
tensors_j[n].m_pole.M_200 += rand() / ((double)RAND_MAX);
tensors_j[n].m_pole.M_020 += rand() / ((double)RAND_MAX);
tensors_j[n].m_pole.M_002 += rand() / ((double)RAND_MAX);
#endif
}
/* Now run a series of M2L kernels */
/********
* Symmetric non-periodic M2L
********/
ticks tic = getticks();
for (int n = 0; n < num_M2L_runs; ++n) {
gravity_M2L_symmetric(&tensors_i[n].pot, //
&tensors_j[n].pot, //
&tensors_i[n].m_pole, //
&tensors_j[n].m_pole, //
tensors_i[n].CoM, //
tensors_j[n].CoM, //
&grav_props, /* periodic=*/0, dim, r_s_inv);
}
ticks toc = getticks();
message("%30s at order %d took %4d %s.", "Symmetric non-periodic M2L",
SELF_GRAVITY_MULTIPOLE_ORDER,
(int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns");
/********
* Symmetric periodic M2L
********/
tic = getticks();
for (int n = 0; n < num_M2L_runs; ++n) {
gravity_M2L_symmetric(&tensors_i[n].pot, //
&tensors_j[n].pot, //
&tensors_i[n].m_pole, //
&tensors_j[n].m_pole, //
tensors_i[n].CoM, //
tensors_j[n].CoM, //
&grav_props, /* periodic=*/1, dim, r_s_inv);
}
toc = getticks();
message("%30s at order %d took %4d %s.", "Symmetric periodic M2L",
SELF_GRAVITY_MULTIPOLE_ORDER,
(int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns");
/********
* Non-symmetric non-periodic M2L
********/
tic = getticks();
for (int n = 0; n < num_M2L_runs; ++n) {
gravity_M2L_nonsym(&tensors_i[n].pot, //
&tensors_j[n].m_pole, //
tensors_i[n].CoM, //
tensors_j[n].CoM, //
&grav_props, /* periodic=*/0, dim, r_s_inv);
}
toc = getticks();
message("%30s at order %d took %4d %s.", "Non-symmetric non-periodic M2L",
SELF_GRAVITY_MULTIPOLE_ORDER,
(int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns");
/********
* Non-symmetric periodic M2L
********/
tic = getticks();
for (int n = 0; n < num_M2L_runs; ++n) {
gravity_M2L_nonsym(&tensors_i[n].pot, //
&tensors_j[n].m_pole, //
tensors_i[n].CoM, //
tensors_j[n].CoM, //
&grav_props, /* periodic=*/1, dim, r_s_inv);
}
toc = getticks();
message("%30s at order %d took %4d %s.", "Non-symmetric periodic M2L",
SELF_GRAVITY_MULTIPOLE_ORDER,
(int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns");
/* Now run a series of M2L kernels */
/********
* Non-periodic M2P
********/
tic = getticks();
for (int n = 0; n < num_M2P_runs; ++n) {
const int index = n % num_particles;
const float r_x = tensors_j[n].CoM[0] - ci.grav.parts[index].x[0];
const float r_y = tensors_j[n].CoM[1] - ci.grav.parts[index].x[1];
const float r_z = tensors_j[n].CoM[2] - ci.grav.parts[index].x[2];
const float r2 = r_x * r_x + r_y * r_y + r_z * r_z;
const float eps = gravity_get_softening(&ci.grav.parts[index], &grav_props);
struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
gravity_M2P(&tensors_j[n].m_pole, r_x, r_y, r_z, r2, eps,
/*periodic=*/0, r_s_inv, &l);
ci.grav.parts[index].a_grav[0] += l.F_100;
ci.grav.parts[index].a_grav[1] += l.F_010;
ci.grav.parts[index].a_grav[2] += l.F_001;
}
toc = getticks();
message("%30s at order %d took %4d %s.", "Non-periodic M2P",
SELF_GRAVITY_MULTIPOLE_ORDER,
(int)(1e6 * clocks_from_ticks(toc - tic) / num_M2P_runs), "ns");
/********
* Periodic M2P
********/
tic = getticks();
for (int n = 0; n < num_M2P_runs; ++n) {
const int index = n % num_particles;
const float r_x = tensors_j[n].CoM[0] - ci.grav.parts[index].x[0];
const float r_y = tensors_j[n].CoM[1] - ci.grav.parts[index].x[1];
const float r_z = tensors_j[n].CoM[2] - ci.grav.parts[index].x[2];
const float r2 = r_x * r_x + r_y * r_y + r_z * r_z;
const float eps = gravity_get_softening(&ci.grav.parts[index], &grav_props);
struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f};
gravity_M2P(&tensors_j[n].m_pole, r_x, r_y, r_z, r2, eps,
/*periodic=*/1, r_s_inv, &l);
ci.grav.parts[index].a_grav[0] += l.F_100;
ci.grav.parts[index].a_grav[1] += l.F_010;
ci.grav.parts[index].a_grav[2] += l.F_001;
}
toc = getticks();
message("%30s at order %d took %4d %s.", "Periodic M2P",
SELF_GRAVITY_MULTIPOLE_ORDER,
(int)(1e6 * clocks_from_ticks(toc - tic) / num_M2P_runs), "ns");
/* Print out to avoid optimization */
// gravity_field_tensors_print(&ci.grav.multipole->pot);
// gravity_field_tensors_print(&cj.grav.multipole->pot);
tic = getticks();
for (int n = 0; n < num_PP_runs; ++n) {
runner_dopair_grav_pp(&r, &ci, &cj, 1, 0);
}
toc = getticks();
message("%30s at order %d took %4d %s.", "dopair_grav (no mpole)",
SELF_GRAVITY_MULTIPOLE_ORDER,
(int)(1e6 * clocks_from_ticks(toc - tic) / num_PP_runs), "ns");
tic = getticks();
runner_dopair_grav_pp(&r, &ci, &cj, 1, 1);
toc = getticks();
message("%30s at order %d took %4d %s.", "dopair_grav (mpole)",
SELF_GRAVITY_MULTIPOLE_ORDER,
(int)(1e6 * clocks_from_ticks(toc - tic) / num_PP_runs), "ns");
/* Be clean... */
gravity_cache_clean(&r.ci_gravity_cache);
gravity_cache_clean(&r.cj_gravity_cache);
free(tensors_i);
free(tensors_j);
free(ci.grav.parts);
free(cj.grav.parts);
free(ci.grav.multipole);
free(cj.grav.multipole);
return 0;
}