/*******************************************************************************
* This file is part of SWIFT.
* Copyright (C) 2015 Matthieu Schaller (schaller@strw.leidenuniv.nl).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
******************************************************************************/
/* Config parameters. */
#include
/* Some standard headers. */
#include
#include
#include
#include
#include
/* Local headers. */
#include "swift.h"
#define DOSELF1 runner_doself_branch_stars_density
#define DOSELF1_SUBSET runner_doself_subset_branch_stars_density
#ifdef TEST_DOSELF_SUBSET
#define DOSELF1_NAME "runner_doself_subset_branch_stars_density"
#else
#define DOSELF1_NAME "runner_doself1_branch_stars_density"
#endif
#define DOPAIR1 runner_dopair_branch_stars_density
#define DOPAIR1_SUBSET runner_dopair_subset_branch_stars_density
#ifdef TEST_DOPAIR_SUBSET
#define DOPAIR1_NAME "runner_dopair_subset_branch_stars_density"
#else
#define DOPAIR1_NAME "runner_dopair_branch_stars_density"
#endif
#define NODE_ID 0
/**
* @brief Constructs a cell and all of its particle in a valid state prior to
* a DOPAIR or DOSELF calcuation.
*
* @param n The cube root of the number of particles.
* @param n_stars The cube root of the number of star particles.
* @param offset The position of the cell offset from (0,0,0).
* @param size The cell size.
* @param h The smoothing length of the particles in units of the inter-particle
* separation.
* @param partId The running counter of IDs for gas.
* @param spartId The running counter of IDs for stars.
* @param pert The perturbation to apply to the particles in the cell in units
* of the inter-particle separation.
* @param h_pert The perturbation to apply to the smoothing length.
*/
struct cell *make_cell(size_t n, size_t n_stars, double *offset, double size,
double h, long long *partId, long long *spartId,
double pert, double h_pert) {
const size_t count = n * n * n;
const size_t scount = n_stars * n_stars * n_stars;
float h_max = 0.f;
float stars_h_max = 0.f;
struct cell *cell = NULL;
if (posix_memalign((void **)&cell, cell_align, sizeof(struct cell)) != 0) {
error("Couldn't allocate the cell");
}
bzero(cell, sizeof(struct cell));
if (posix_memalign((void **)&cell->hydro.parts, part_align,
count * sizeof(struct part)) != 0) {
error("couldn't allocate particles, no. of particles: %d", (int)count);
}
bzero(cell->hydro.parts, count * sizeof(struct part));
/* Construct the parts */
struct part *part = cell->hydro.parts;
for (size_t x = 0; x < n; ++x) {
for (size_t y = 0; y < n; ++y) {
for (size_t z = 0; z < n; ++z) {
part->x[0] =
offset[0] +
size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
part->x[1] =
offset[1] +
size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
part->x[2] =
offset[2] +
size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
part->v[0] = 0;
part->v[1] = 0;
part->v[2] = 0;
if (h_pert)
part->h = size * h * random_uniform(1.f, h_pert) / (float)n;
else
part->h = size * h / (float)n;
h_max = fmaxf(h_max, part->h);
part->id = ++(*partId);
part->time_bin = 1;
#ifdef SWIFT_DEBUG_CHECKS
part->ti_drift = 8;
part->ti_kick = 8;
#endif
++part;
}
}
}
/* Construct the sparts */
if (posix_memalign((void **)&cell->stars.parts, spart_align,
scount * sizeof(struct spart)) != 0) {
error("couldn't allocate particles, no. of particles: %d", (int)scount);
}
bzero(cell->stars.parts, scount * sizeof(struct spart));
struct spart *spart = cell->stars.parts;
for (size_t x = 0; x < n_stars; ++x) {
for (size_t y = 0; y < n_stars; ++y) {
for (size_t z = 0; z < n_stars; ++z) {
spart->x[0] =
offset[0] + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) /
(float)n_stars;
spart->x[1] =
offset[1] + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) /
(float)n_stars;
spart->x[2] =
offset[2] + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) /
(float)n_stars;
spart->v[0] = 0;
spart->v[1] = 0;
spart->v[2] = 0;
if (h_pert)
spart->h = size * h * random_uniform(1.f, h_pert) / (float)n_stars;
else
spart->h = size * h / (float)n_stars;
stars_h_max = fmaxf(stars_h_max, spart->h);
spart->id = ++(*spartId);
spart->time_bin = 1;
#ifdef SWIFT_DEBUG_CHECKS
spart->ti_drift = 8;
spart->ti_kick = 8;
#endif
++spart;
}
}
}
/* Cell properties */
cell->split = 0;
cell->hydro.h_max = h_max;
cell->hydro.count = count;
cell->stars.h_max = stars_h_max;
cell->stars.h_max_active = stars_h_max;
cell->stars.count = scount;
cell->hydro.dx_max_part = 0.;
cell->hydro.dx_max_sort = 0.;
cell->stars.dx_max_sort = 0.;
cell->width[0] = size;
cell->width[1] = size;
cell->width[2] = size;
cell->dmin = size;
cell->loc[0] = offset[0];
cell->loc[1] = offset[1];
cell->loc[2] = offset[2];
cell->h_min_allowed = cell->dmin * 0.5 * (1. / kernel_gamma);
cell->h_max_allowed = cell->dmin * (1. / kernel_gamma);
cell->hydro.super = cell;
cell->stars.ti_old_part = 8;
cell->stars.ti_end_min = 8;
cell->hydro.ti_old_part = 8;
cell->hydro.ti_end_min = 8;
cell->grav.ti_old_part = 8;
cell->grav.ti_end_min = 8;
cell->nodeID = NODE_ID;
shuffle_particles(cell->hydro.parts, cell->hydro.count);
shuffle_sparticles(cell->stars.parts, cell->stars.count);
cell->hydro.sorted = 0;
cell->hydro.sort = NULL;
cell->stars.sorted = 0;
cell->stars.sort = NULL;
return cell;
}
void clean_up(struct cell *ci) {
free(ci->hydro.parts);
free(ci->stars.parts);
free(ci->hydro.sort);
free(ci->stars.sort);
free(ci);
}
/**
* @brief Initializes all particles field to be ready for a density calculation
*/
void zero_particle_fields(struct cell *c) {
for (int pid = 0; pid < c->stars.count; pid++) {
stars_init_spart(&c->stars.parts[pid]);
}
}
/**
* @brief Ends the loop by adding the appropriate coefficients
*/
void end_calculation(struct cell *c, const struct cosmology *cosmo) {
for (int pid = 0; pid < c->stars.count; pid++) {
stars_end_density(&c->stars.parts[pid], cosmo);
/* Recover the common "Neighbour number" definition */
c->stars.parts[pid].density.wcount *= pow_dimension(c->stars.parts[pid].h);
c->stars.parts[pid].density.wcount *= kernel_norm;
}
}
/**
* @brief Dump all the particles to a file
*/
void dump_particle_fields(char *fileName, struct cell *main_cell,
struct cell **cells) {
FILE *file = fopen(fileName, "w");
/* Write header */
fprintf(file, "# %4s %10s %10s %10s %13s %13s\n", "ID", "pos_x", "pos_y",
"pos_z", "wcount", "wcount_dh");
fprintf(file, "# Main cell --------------------------------------------\n");
/* Write main cell */
for (int pid = 0; pid < main_cell->stars.count; pid++) {
fprintf(file, "%6llu %10f %10f %10f %13e %13e\n",
main_cell->stars.parts[pid].id, main_cell->stars.parts[pid].x[0],
main_cell->stars.parts[pid].x[1], main_cell->stars.parts[pid].x[2],
main_cell->stars.parts[pid].density.wcount,
main_cell->stars.parts[pid].density.wcount_dh);
}
/* Write all other cells */
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 3; ++k) {
struct cell *cj = cells[i * 9 + j * 3 + k];
if (cj == main_cell) continue;
fprintf(file,
"# Offset: [%2d %2d %2d] -----------------------------------\n",
i - 1, j - 1, k - 1);
for (int pjd = 0; pjd < cj->stars.count; pjd++) {
fprintf(file, "%6llu %10f %10f %10f %13e %13e\n",
cj->stars.parts[pjd].id, cj->stars.parts[pjd].x[0],
cj->stars.parts[pjd].x[1], cj->stars.parts[pjd].x[2],
cj->stars.parts[pjd].density.wcount,
cj->stars.parts[pjd].density.wcount_dh);
}
}
}
}
fclose(file);
}
/* Just a forward declaration... */
void runner_dopair_branch_stars_density(struct runner *r, struct cell *ci,
struct cell *cj, int limit_h_min,
int limit_h_max);
void runner_doself_branch_stars_density(struct runner *r, struct cell *c,
int limit_h_min, int limit_h_max);
void runner_dopair_subset_branch_stars_density(struct runner *r,
struct cell *restrict ci,
struct spart *restrict sparts_i,
int *restrict ind, int scount,
struct cell *restrict cj);
void runner_doself_subset_branch_stars_density(struct runner *r,
struct cell *restrict ci,
struct spart *restrict sparts,
int *restrict ind, int scount);
/* And go... */
int main(int argc, char *argv[]) {
#ifdef HAVE_SETAFFINITY
engine_pin();
#endif
size_t runs = 0, particles = 0;
size_t sparticles = 0;
double h = 1.23485, size = 1.;
double perturbation = 0., h_pert = 0.;
char outputFileNameExtension[100] = "";
char outputFileName[200] = "";
/* Initialize CPU frequency, this also starts time. */
unsigned long long cpufreq = 0;
clocks_set_cpufreq(cpufreq);
/* Choke on FP-exceptions */
#ifdef HAVE_FE_ENABLE_EXCEPT
feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
#endif
/* Get some randomness going */
srand(0);
int c;
while ((c = getopt(argc, argv, "s:h:p:n:N:r:t:d:f:")) != -1) {
switch (c) {
case 'h':
sscanf(optarg, "%lf", &h);
break;
case 'p':
sscanf(optarg, "%lf", &h_pert);
break;
case 's':
sscanf(optarg, "%lf", &size);
break;
case 'n':
sscanf(optarg, "%zu", &particles);
break;
case 'N':
sscanf(optarg, "%zu", &sparticles);
break;
case 'r':
sscanf(optarg, "%zu", &runs);
break;
case 'd':
sscanf(optarg, "%lf", &perturbation);
break;
case 'f':
strcpy(outputFileNameExtension, optarg);
break;
case '?':
error("Unknown option.");
break;
}
}
if (h < 0 || particles == 0 || runs == 0 || sparticles == 0) {
printf(
"\nUsage: %s -n PARTICLES_PER_AXIS -N SPARTICLES_PER_AXIS -r "
"NUMBER_OF_RUNS [OPTIONS...]\n"
"\nGenerates 27 cells, filled with particles on a Cartesian grid."
"\nThese are then interacted using runner_dopair_stars_density() and "
"runner_doself_stars_density()."
"\n\nOptions:"
"\n-h DISTANCE=1.2348 - Smoothing length in units of "
"\n-p - Random fractional change in h, h=h*random(1,p)"
"\n-s size - Physical size of the cell"
"\n-d pert - Perturbation to apply to the particles [0,1["
"\n-f fileName - Part of the file name used to save the dumps\n",
argv[0]);
exit(1);
}
/* Help users... */
message("DOSELF1 function called: %s", DOSELF1_NAME);
message("DOPAIR1 function called: %s", DOPAIR1_NAME);
message("Smoothing length: h = %f", h * size);
message("Kernel: %s", kernel_name);
message("Neighbour target: N = %f", pow_dimension(h) * kernel_norm);
printf("\n");
/* Build the infrastructure */
struct space space;
space.periodic = 1;
space.dim[0] = 3.;
space.dim[1] = 3.;
space.dim[2] = 3.;
struct hydro_props hp;
hp.eta_neighbours = h;
hp.h_tolerance = 1e0;
hp.h_max = FLT_MAX;
hp.max_smoothing_iterations = 1;
hp.CFL_condition = 0.1;
struct stars_props stars_p;
stars_p.eta_neighbours = h;
stars_p.h_tolerance = 1e0;
stars_p.max_smoothing_iterations = 1;
struct engine engine;
engine.s = &space;
engine.time = 0.1f;
engine.ti_current = 8;
engine.max_active_bin = num_time_bins;
engine.hydro_properties = &hp;
engine.stars_properties = &stars_p;
engine.nodeID = NODE_ID;
struct cosmology cosmo;
cosmology_init_no_cosmo(&cosmo);
engine.cosmology = &cosmo;
struct runner runner;
runner.e = &engine;
struct lightcone_array_props lightcone_array_properties;
lightcone_array_properties.nr_lightcones = 0;
engine.lightcone_array_properties = &lightcone_array_properties;
/* Construct some cells */
struct cell *cells[27];
struct cell *main_cell;
static long long partId = 0;
long long spartId = particles * particles * particles * 27;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 3; ++k) {
double offset[3] = {i * size, j * size, k * size};
cells[i * 9 + j * 3 + k] =
make_cell(particles, sparticles, offset, size, h, &partId, &spartId,
perturbation, h_pert);
runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0);
runner_do_drift_spart(&runner, cells[i * 9 + j * 3 + k], 0);
runner_do_hydro_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0, 0,
0);
runner_do_stars_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0, 0);
}
}
}
/* Store the main cell for future use */
main_cell = cells[13];
ticks timings[27];
for (int i = 0; i < 27; i++) timings[i] = 0;
ticks time = 0;
for (size_t i = 0; i < runs; ++i) {
/* Zero the fields */
for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]);
const ticks tic = getticks();
#if defined(TEST_DOSELF_SUBSET) || defined(TEST_DOPAIR_SUBSET)
int *pid = NULL;
int scount = 0;
if ((pid = (int *)malloc(sizeof(int) * main_cell->stars.count)) == NULL)
error("Can't allocate memory for pid.");
for (int k = 0; k < main_cell->stars.count; k++)
if (spart_is_active(&main_cell->stars.parts[k], &engine)) {
pid[scount] = k;
++scount;
}
#endif
/* Run all the pairs */
for (int j = 0; j < 27; ++j) {
if (cells[j] != main_cell) {
const ticks sub_tic = getticks();
#ifdef TEST_DOPAIR_SUBSET
DOPAIR1_SUBSET(&runner, main_cell, main_cell->stars.parts, pid, scount,
cells[j]);
#else
DOPAIR1(&runner, main_cell, cells[j], /*limit_h_min=*/0,
/*limit_h_max=*/0);
#endif
timings[j] += getticks() - sub_tic;
}
}
/* And now the self-interaction */
const ticks self_tic = getticks();
#ifdef TEST_DOSELF_SUBSET
DOSELF1_SUBSET(&runner, main_cell, main_cell->stars.parts, pid, scount);
#else
DOSELF1(&runner, main_cell, /*limit_h_min=*/0,
/*limit_h_max=*/0);
#endif
timings[13] += getticks() - self_tic;
const ticks toc = getticks();
time += toc - tic;
/* Let's get physical ! */
end_calculation(main_cell, &cosmo);
/* Dump if necessary */
if (i % 50 == 0) {
sprintf(outputFileName, "swift_star_dopair_27_%.150s.dat",
outputFileNameExtension);
dump_particle_fields(outputFileName, main_cell, cells);
}
}
/* Output timing */
ticks corner_time = timings[0] + timings[2] + timings[6] + timings[8] +
timings[18] + timings[20] + timings[24] + timings[26];
ticks edge_time = timings[1] + timings[3] + timings[5] + timings[7] +
timings[9] + timings[11] + timings[15] + timings[17] +
timings[19] + timings[21] + timings[23] + timings[25];
ticks face_time = timings[4] + timings[10] + timings[12] + timings[14] +
timings[16] + timings[22];
ticks self_time = timings[13];
message("Corner calculations took: %.3f %s.",
clocks_from_ticks(corner_time / runs), clocks_getunit());
message("Edge calculations took: %.3f %s.",
clocks_from_ticks(edge_time / runs), clocks_getunit());
message("Face calculations took: %.3f %s.",
clocks_from_ticks(face_time / runs), clocks_getunit());
message("Self calculations took: %.3f %s.",
clocks_from_ticks(self_time / runs), clocks_getunit());
message("SWIFT calculation took: %.3f %s.",
clocks_from_ticks(time / runs), clocks_getunit());
/* Now perform a brute-force version for accuracy tests */
/* Zero the fields */
for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]);
const ticks tic = getticks();
/* Run all the brute-force pairs */
for (int j = 0; j < 27; ++j)
if (cells[j] != main_cell)
pairs_all_stars_density(&runner, main_cell, cells[j]);
/* And now the self-interaction */
self_all_stars_density(&runner, main_cell);
const ticks toc = getticks();
/* Let's get physical ! */
end_calculation(main_cell, &cosmo);
/* Dump */
sprintf(outputFileName, "star_brute_force_27_%.150s.dat",
outputFileNameExtension);
dump_particle_fields(outputFileName, main_cell, cells);
/* Output timing */
message("Brute force calculation took : %.3f %s.",
clocks_from_ticks(toc - tic), clocks_getunit());
/* Clean things to make the sanitizer happy ... */
for (int i = 0; i < 27; ++i) clean_up(cells[i]);
return 0;
}