/******************************************************************************* * This file is part of SWIFT. * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl). * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * ******************************************************************************/ #include /* Some standard headers. */ #include #include #include #include #include /* Local headers. */ #include "runner_doiact_grav.h" #include "swift.h" const int num_M2L_runs = 1 << 23; const int num_M2P_runs = 1 << 23; const int num_PP_runs = 1; // << 8; void make_cell(struct cell *c, int N, const double loc[3], double width, int id_base, const struct gravity_props *grav_props) { bzero(c, sizeof(struct cell)); /* Start by setting the basics */ c->loc[0] = loc[0]; c->loc[1] = loc[1]; c->loc[2] = loc[2]; c->width[0] = width; c->width[1] = width; c->width[2] = width; /* Initialise the locks */ lock_init(&c->grav.plock); lock_init(&c->grav.mlock); /* Set the time bins */ c->grav.ti_end_min = 1; c->grav.ti_beg_max = 1; c->grav.ti_old_part = 1; c->grav.ti_old_multipole = 1; c->grav.ti_end_min = 8; /* Create the particles */ c->grav.count = N; c->grav.count_total = N; c->grav.parts = NULL; if (posix_memalign((void **)&c->grav.parts, part_align, N * sizeof(struct gpart)) != 0) { error("couldn't allocate particles, no. of particles: %d", (int)N); } bzero(c->grav.parts, N * sizeof(struct gpart)); for (int i = 0.; i < N; ++i) { c->grav.parts[i].id_or_neg_offset = id_base + i; c->grav.parts[i].x[0] = loc[0] + width * rand() / ((double)RAND_MAX); c->grav.parts[i].x[1] = loc[1] + width * rand() / ((double)RAND_MAX); c->grav.parts[i].x[2] = loc[2] + width * rand() / ((double)RAND_MAX); c->grav.parts[i].mass = 1.; c->grav.parts[i].type = swift_type_dark_matter; c->grav.parts[i].epsilon = 1.; c->grav.parts[i].time_bin = 1; } /* Create the multipoles */ c->grav.multipole = (struct gravity_tensors *)malloc(sizeof(struct gravity_tensors)); gravity_reset(c->grav.multipole); gravity_P2M(c->grav.multipole, c->grav.parts, N, grav_props); gravity_multipole_compute_power(&c->grav.multipole->m_pole); } int main(int argc, char *argv[]) { /* Initialize CPU frequency, this also starts time. */ unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); /* Choke on FPEs */ #ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); #endif /* Get some randomness going */ const int seed = time(NULL); message("Seed = %d", seed); srand(seed); /* Construct gravity properties */ struct gravity_props grav_props; bzero(&grav_props, sizeof(struct gravity_props)); grav_props.use_advanced_MAC = 1; grav_props.use_adaptive_tolerance = 1; grav_props.adaptive_tolerance = 1e-4; grav_props.theta_crit = 0.5; grav_props.G_Newton = 1.; grav_props.mesh_size = 64; grav_props.a_smooth = 1.25; /* Space properites */ const double dim[3] = {100., 100., 100.}; const double r_s = grav_props.a_smooth * dim[0] / grav_props.mesh_size; const double r_s_inv = 1. / r_s; /* Mesh structure */ struct pm_mesh mesh; mesh.periodic = 0; mesh.dim[0] = dim[0]; mesh.dim[1] = dim[1]; mesh.dim[2] = dim[2]; /* Construct an engine */ struct engine e; e.mesh = &mesh; e.max_active_bin = 56; e.time = 0.1f; e.ti_current = 1; e.gravity_properties = &grav_props; /* Construct a runner */ struct runner r; bzero(&r, sizeof(struct runner)); r.e = &e; /* Init the cache for gravity interaction */ gravity_cache_init(&r.ci_gravity_cache, space_splitsize); gravity_cache_init(&r.cj_gravity_cache, space_splitsize); /* Construct two cells */ struct cell ci; struct cell cj; const double loc_i[3] = {0., 0., 0.}; const double loc_j[3] = {1., 1., 1.}; const int num_particles = 8; make_cell(&ci, num_particles, loc_i, 1., 0, &grav_props); make_cell(&cj, num_particles, loc_j, 1., num_particles, &grav_props); message("Number of runs: %d", num_M2L_runs); /* Construct arrays of multipoles to prevent too much optimization */ struct gravity_tensors *tensors_i = NULL; if (posix_memalign((void **)&tensors_i, SWIFT_CACHE_ALIGNMENT, num_M2L_runs * sizeof(struct gravity_tensors)) != 0) error("Error allocating memory for multipoles array."); struct gravity_tensors *tensors_j = NULL; if (posix_memalign((void **)&tensors_j, SWIFT_CACHE_ALIGNMENT, num_M2L_runs * sizeof(struct gravity_tensors)) != 0) error("Error allocating memory for multipoles array."); for (int n = 0; n < num_M2L_runs; ++n) { memcpy(&tensors_i[n], ci.grav.multipole, sizeof(struct gravity_tensors)); memcpy(&tensors_j[n], cj.grav.multipole, sizeof(struct gravity_tensors)); /* Move the values a bit to prevent optimization in the actual loops */ tensors_i[n].CoM[0] += rand() / ((double)RAND_MAX); tensors_i[n].CoM[1] += rand() / ((double)RAND_MAX); tensors_i[n].CoM[1] += rand() / ((double)RAND_MAX); tensors_j[n].CoM[0] += rand() / ((double)RAND_MAX); tensors_j[n].CoM[1] += rand() / ((double)RAND_MAX); tensors_j[n].CoM[1] += rand() / ((double)RAND_MAX); tensors_i[n].m_pole.M_000 += rand() / ((double)RAND_MAX); tensors_j[n].m_pole.M_000 += rand() / ((double)RAND_MAX); #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 tensors_i[n].m_pole.M_200 += rand() / ((double)RAND_MAX); tensors_i[n].m_pole.M_020 += rand() / ((double)RAND_MAX); tensors_i[n].m_pole.M_002 += rand() / ((double)RAND_MAX); tensors_j[n].m_pole.M_200 += rand() / ((double)RAND_MAX); tensors_j[n].m_pole.M_020 += rand() / ((double)RAND_MAX); tensors_j[n].m_pole.M_002 += rand() / ((double)RAND_MAX); #endif } /* Now run a series of M2L kernels */ /******** * Symmetric non-periodic M2L ********/ ticks tic = getticks(); for (int n = 0; n < num_M2L_runs; ++n) { gravity_M2L_symmetric(&tensors_i[n].pot, // &tensors_j[n].pot, // &tensors_i[n].m_pole, // &tensors_j[n].m_pole, // tensors_i[n].CoM, // tensors_j[n].CoM, // &grav_props, /* periodic=*/0, dim, r_s_inv); } ticks toc = getticks(); message("%30s at order %d took %4d %s.", "Symmetric non-periodic M2L", SELF_GRAVITY_MULTIPOLE_ORDER, (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns"); /******** * Symmetric periodic M2L ********/ tic = getticks(); for (int n = 0; n < num_M2L_runs; ++n) { gravity_M2L_symmetric(&tensors_i[n].pot, // &tensors_j[n].pot, // &tensors_i[n].m_pole, // &tensors_j[n].m_pole, // tensors_i[n].CoM, // tensors_j[n].CoM, // &grav_props, /* periodic=*/1, dim, r_s_inv); } toc = getticks(); message("%30s at order %d took %4d %s.", "Symmetric periodic M2L", SELF_GRAVITY_MULTIPOLE_ORDER, (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns"); /******** * Non-symmetric non-periodic M2L ********/ tic = getticks(); for (int n = 0; n < num_M2L_runs; ++n) { gravity_M2L_nonsym(&tensors_i[n].pot, // &tensors_j[n].m_pole, // tensors_i[n].CoM, // tensors_j[n].CoM, // &grav_props, /* periodic=*/0, dim, r_s_inv); } toc = getticks(); message("%30s at order %d took %4d %s.", "Non-symmetric non-periodic M2L", SELF_GRAVITY_MULTIPOLE_ORDER, (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns"); /******** * Non-symmetric periodic M2L ********/ tic = getticks(); for (int n = 0; n < num_M2L_runs; ++n) { gravity_M2L_nonsym(&tensors_i[n].pot, // &tensors_j[n].m_pole, // tensors_i[n].CoM, // tensors_j[n].CoM, // &grav_props, /* periodic=*/1, dim, r_s_inv); } toc = getticks(); message("%30s at order %d took %4d %s.", "Non-symmetric periodic M2L", SELF_GRAVITY_MULTIPOLE_ORDER, (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2L_runs), "ns"); /* Now run a series of M2L kernels */ /******** * Non-periodic M2P ********/ tic = getticks(); for (int n = 0; n < num_M2P_runs; ++n) { const int index = n % num_particles; const float r_x = tensors_j[n].CoM[0] - ci.grav.parts[index].x[0]; const float r_y = tensors_j[n].CoM[1] - ci.grav.parts[index].x[1]; const float r_z = tensors_j[n].CoM[2] - ci.grav.parts[index].x[2]; const float r2 = r_x * r_x + r_y * r_y + r_z * r_z; const float eps = gravity_get_softening(&ci.grav.parts[index], &grav_props); struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; gravity_M2P(&tensors_j[n].m_pole, r_x, r_y, r_z, r2, eps, /*periodic=*/0, r_s_inv, &l); ci.grav.parts[index].a_grav[0] += l.F_100; ci.grav.parts[index].a_grav[1] += l.F_010; ci.grav.parts[index].a_grav[2] += l.F_001; } toc = getticks(); message("%30s at order %d took %4d %s.", "Non-periodic M2P", SELF_GRAVITY_MULTIPOLE_ORDER, (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2P_runs), "ns"); /******** * Periodic M2P ********/ tic = getticks(); for (int n = 0; n < num_M2P_runs; ++n) { const int index = n % num_particles; const float r_x = tensors_j[n].CoM[0] - ci.grav.parts[index].x[0]; const float r_y = tensors_j[n].CoM[1] - ci.grav.parts[index].x[1]; const float r_z = tensors_j[n].CoM[2] - ci.grav.parts[index].x[2]; const float r2 = r_x * r_x + r_y * r_y + r_z * r_z; const float eps = gravity_get_softening(&ci.grav.parts[index], &grav_props); struct reduced_grav_tensor l = {0.f, 0.f, 0.f, 0.f}; gravity_M2P(&tensors_j[n].m_pole, r_x, r_y, r_z, r2, eps, /*periodic=*/1, r_s_inv, &l); ci.grav.parts[index].a_grav[0] += l.F_100; ci.grav.parts[index].a_grav[1] += l.F_010; ci.grav.parts[index].a_grav[2] += l.F_001; } toc = getticks(); message("%30s at order %d took %4d %s.", "Periodic M2P", SELF_GRAVITY_MULTIPOLE_ORDER, (int)(1e6 * clocks_from_ticks(toc - tic) / num_M2P_runs), "ns"); /* Print out to avoid optimization */ // gravity_field_tensors_print(&ci.grav.multipole->pot); // gravity_field_tensors_print(&cj.grav.multipole->pot); tic = getticks(); for (int n = 0; n < num_PP_runs; ++n) { runner_dopair_grav_pp(&r, &ci, &cj, 1, 0); } toc = getticks(); message("%30s at order %d took %4d %s.", "dopair_grav (no mpole)", SELF_GRAVITY_MULTIPOLE_ORDER, (int)(1e6 * clocks_from_ticks(toc - tic) / num_PP_runs), "ns"); tic = getticks(); runner_dopair_grav_pp(&r, &ci, &cj, 1, 1); toc = getticks(); message("%30s at order %d took %4d %s.", "dopair_grav (mpole)", SELF_GRAVITY_MULTIPOLE_ORDER, (int)(1e6 * clocks_from_ticks(toc - tic) / num_PP_runs), "ns"); /* Be clean... */ gravity_cache_clean(&r.ci_gravity_cache); gravity_cache_clean(&r.cj_gravity_cache); free(tensors_i); free(tensors_j); free(ci.grav.parts); free(cj.grav.parts); free(ci.grav.multipole); free(cj.grav.multipole); return 0; }