/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
* Matthieu Schaller (schaller@strw.leidenuniv.nl)
* 2015 Peter W. Draper (p.w.draper@durham.ac.uk)
* Angus Lepper (angus.lepper@ed.ac.uk)
* 2016 John A. Regan (john.a.regan@durham.ac.uk)
* Tom Theuns (tom.theuns@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
******************************************************************************/
/* Config parameters. */
#include
/* Some standard headers. */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* MPI headers. */
#ifdef WITH_MPI
#include
#endif
#ifdef HAVE_LIBNUMA
#include
#include
#endif
/* This object's header. */
#include "engine.h"
/* Local headers. */
#include "active.h"
#include "atomic.h"
#include "black_holes_properties.h"
#include "cell.h"
#include "chemistry.h"
#include "clocks.h"
#include "cooling.h"
#include "cooling_properties.h"
#include "cosmology.h"
#include "csds.h"
#include "csds_io.h"
#include "cycle.h"
#include "debug.h"
#include "equation_of_state.h"
#include "error.h"
#include "extra_io.h"
#include "feedback.h"
#include "fof.h"
#include "forcing.h"
#include "gravity.h"
#include "gravity_cache.h"
#include "hydro.h"
#include "lightcone/lightcone.h"
#include "lightcone/lightcone_array.h"
#include "line_of_sight.h"
#include "map.h"
#include "memuse.h"
#include "minmax.h"
#include "mpiuse.h"
#include "multipole_struct.h"
#include "neutrino.h"
#include "neutrino_properties.h"
#include "output_list.h"
#include "output_options.h"
#include "partition.h"
#include "potential.h"
#include "power_spectrum.h"
#include "pressure_floor.h"
#include "profiler.h"
#include "proxy.h"
#include "restart.h"
#include "rt_properties.h"
#include "runner.h"
#include "sink_properties.h"
#include "sort_part.h"
#include "star_formation.h"
#include "star_formation_logger.h"
#include "stars_io.h"
#include "statistics.h"
#include "timers.h"
#include "tools.h"
#include "units.h"
#include "velociraptor_interface.h"
const char *engine_policy_names[] = {"none",
"rand",
"steal",
"keep",
"block",
"cpu tight",
"mpi",
"numa affinity",
"hydro",
"self gravity",
"external gravity",
"cosmological integration",
"drift everything",
"reconstruct multi-poles",
"temperature",
"cooling",
"stars",
"structure finding",
"star formation",
"feedback",
"black holes",
"fof search",
"time-step limiter",
"time-step sync",
"csds",
"line of sight",
"sink",
"rt",
"power spectra",
"moving mesh",
"moving mesh hydro"};
const int engine_default_snapshot_subsample[swift_type_count] = {0};
/** The rank of the engine as a global variable (for messages). */
int engine_rank;
/** The current step of the engine as a global variable (for messages). */
int engine_current_step;
/**
* @brief Link a density/force task to a cell.
*
* @param e The #engine.
* @param l A pointer to the #link, will be modified atomically.
* @param t The #task.
*
* @return The new #link pointer.
*/
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
#ifdef SWIFT_DEBUG_CHECKS
if (t == NULL) {
error("Trying to link NULL task.");
}
#endif
/* Get the next free link. */
const size_t ind = atomic_inc(&e->nr_links);
if (ind >= e->size_links) {
error(
"Link table overflow. Increase the value of "
"`Scheduler:links_per_tasks`.");
}
struct link *res = &e->links[ind];
/* Set it atomically. */
res->t = t;
res->next = atomic_swap(l, res);
}
/**
* @brief Repartition the cells amongst the nodes.
*
* @param e The #engine.
*/
void engine_repartition(struct engine *e) {
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
ticks tic = getticks();
#ifdef SWIFT_DEBUG_CHECKS
/* Be verbose about this. */
if (e->nodeID == 0 || e->verbose) message("repartitioning space");
fflush(stdout);
/* Check that all cells have been drifted to the current time */
space_check_drift_point(e->s, e->ti_current, /*check_multipoles=*/0);
#endif
/* Clear the repartition flag. */
e->forcerepart = 0;
/* Nothing to do if only using a single node. Also avoids METIS
* bug that doesn't handle this case well. */
if (e->nr_nodes == 1) return;
/* Generate the fixed costs include file. */
if (e->step > 3 && e->reparttype->trigger <= 1.f) {
task_dump_stats("partition_fixed_costs.h", e,
/* task_dump_threshold = */ 0.f,
/* header = */ 1, /* allranks = */ 1);
}
/* Do the repartitioning. */
partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
e->sched.tasks, e->sched.nr_tasks);
/* Partitioning requires copies of the particles, so we need to reduce the
* memory in use to the minimum, we can free the sorting indices and the
* tasks as these will be regenerated at the next rebuild. Also the foreign
* particle arrays can go as these will be regenerated in proxy exchange. */
/* Sorting indices. */
if (e->s->cells_top != NULL) space_free_cells(e->s);
/* Report the time spent in the different task categories */
if (e->verbose) scheduler_report_task_times(&e->sched, e->nr_threads);
/* Task arrays. */
scheduler_free_tasks(&e->sched);
/* Foreign parts. (no need to nullify the cell pointers as the cells
* will be regenerated) */
space_free_foreign_parts(e->s, /*clear_cell_pointers=*/0);
/* Now comes the tricky part: Exchange particles between all nodes.
This is done in two steps, first allreducing a matrix of
how many particles go from where to where, then re-allocating
the parts array, and emitting the sends and receives.
Finally, the space, tasks, and proxies need to be rebuilt. */
/* Redistribute the particles between the nodes. */
engine_redistribute(e);
/* Make the proxies. */
engine_makeproxies(e);
/* Tell the engine it should re-build whenever possible */
e->forcerebuild = 1;
/* Flag that a repartition has taken place */
e->step_props |= engine_step_prop_repartition;
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#else
if (e->reparttype->type != REPART_NONE)
error("SWIFT was not compiled with MPI and METIS or ParMETIS support.");
/* Clear the repartition flag. */
e->forcerepart = 0;
#endif
}
/**
* @brief Decide whether trigger a repartition the cells amongst the nodes.
*
* @param e The #engine.
*/
void engine_repartition_trigger(struct engine *e) {
#ifdef WITH_MPI
const ticks tic = getticks();
static int opened = 0;
if (e->restarting) opened = 1;
/* Do nothing if there have not been enough steps since the last repartition
* as we don't want to repeat this too often or immediately after a
* repartition step, or also immediately on restart. We check all this
* even when we are not repartitioning as the balance logs can still be
* interesting. */
if (e->step - e->last_repartition >= 2 && !e->restarting) {
/* If we have fixed costs available and this is step 2 or we are forcing
* repartitioning then we do a forced fixed costs repartition regardless. */
int forced = 0;
if (e->reparttype->type != REPART_NONE) {
if (e->reparttype->trigger > 1 ||
(e->step == 2 && e->reparttype->use_fixed_costs)) {
if (e->reparttype->trigger > 1) {
if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1;
} else {
e->forcerepart = 1;
}
e->reparttype->use_ticks = 0;
forced = 1;
}
}
/* We only check the CPU loads when we have processed a significant number
* of all particles as we require all tasks to have timings or are
* interested in the various balances logs. */
if ((e->updates > 1 &&
e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
(e->g_updates > 1 &&
e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {
/* Are we using the task tick timings or fixed costs? */
if (e->reparttype->use_fixed_costs > 1) {
e->reparttype->use_ticks = 0;
} else {
e->reparttype->use_ticks = 1;
}
/* Get the resident size of the process for the memory logs. */
long size, resident, shared, text, library, data, dirty;
memuse_use(&size, &resident, &shared, &text, &data, &library, &dirty);
/* Gather it together with the CPU times used by the tasks in the last
* step (and the deadtime fraction, for output). */
double timemem[4] = {
e->usertime_last_step, e->systime_last_step, (double)resident,
e->local_deadtime / (e->nr_threads * e->wallclock_time)};
double timemems[e->nr_nodes * 4];
MPI_Gather(timemem, 4, MPI_DOUBLE, timemems, 4, MPI_DOUBLE, 0,
MPI_COMM_WORLD);
if (e->nodeID == 0) {
/* Get the range and mean of the two CPU times and memory. */
double umintime = timemems[0];
double umaxtime = timemems[0];
double smintime = timemems[1];
double smaxtime = timemems[1];
double minmem = timemems[2];
double maxmem = timemems[2];
double tmintime = umintime + smintime;
double tmaxtime = umaxtime + smaxtime;
double usum = timemems[0];
double ssum = timemems[1];
double tsum = usum + ssum;
double msum = timemems[2];
for (int k = 4; k < e->nr_nodes * 4; k += 4) {
if (timemems[k] > umaxtime) umaxtime = timemems[k];
if (timemems[k] < umintime) umintime = timemems[k];
if (timemems[k + 1] > smaxtime) smaxtime = timemems[k + 1];
if (timemems[k + 1] < smintime) smintime = timemems[k + 1];
double total = timemems[k] + timemems[k + 1];
if (total > tmaxtime) tmaxtime = total;
if (total < tmintime) tmintime = total;
usum += timemems[k];
ssum += timemems[k + 1];
tsum += total;
if (timemems[k + 2] > maxmem) maxmem = timemems[k + 2];
if (timemems[k + 2] < minmem) minmem = timemems[k + 2];
msum += timemems[k + 2];
}
double umean = usum / (double)e->nr_nodes;
double smean = ssum / (double)e->nr_nodes;
double tmean = tsum / (double)e->nr_nodes;
double mmean = msum / (double)e->nr_nodes;
/* Are we out of balance and need to repartition? */
/* ---------------------------------------------- */
double abs_trigger = fabs(e->reparttype->trigger);
double balance = (umaxtime - umintime) / umean;
if (e->reparttype->type != REPART_NONE) {
/* When forced we don't care about the balance. */
if (!forced) {
if (balance > abs_trigger) {
if (e->verbose)
message("trigger fraction %.3f > %.3f will repartition",
balance, abs_trigger);
e->forcerepart = 1;
} else {
if (e->verbose && e->reparttype->type != REPART_NONE)
message("trigger fraction %.3f =< %.3f will not repartition",
balance, abs_trigger);
}
}
} else {
/* Not repartitioning, would that have been done otherwise? */
if (e->verbose) {
if (balance > abs_trigger) {
message("trigger fraction %.3f > %.3f would have repartitioned",
balance, abs_trigger);
} else {
message(
"trigger fraction %.3f =< %.3f would not have repartitioned",
balance, abs_trigger);
}
}
}
/* Keep logs of all CPU times and resident memory size for debugging
* load issues. */
FILE *timelog = NULL;
FILE *memlog = NULL;
if (!opened) {
timelog = fopen("rank_cpu_balance.log", "w");
if (timelog == NULL)
error("Could not create file 'rank_cpu_balance.log'.");
fprintf(timelog, "# step rank user sys sum deadfrac\n");
memlog = fopen("rank_memory_balance.log", "w");
if (memlog == NULL)
error("Could not create file 'rank_memory_balance.log'.");
fprintf(memlog, "# step rank resident\n");
opened = 1;
} else {
timelog = fopen("rank_cpu_balance.log", "a");
if (timelog == NULL)
error("Could not open file 'rank_cpu_balance.log' for writing.");
memlog = fopen("rank_memory_balance.log", "a");
if (memlog == NULL)
error("Could not open file 'rank_memory_balance.log' for writing.");
}
for (int k = 0; k < e->nr_nodes * 4; k += 4) {
fprintf(timelog, "%d %d %f %f %f %f\n", e->step, k / 4, timemems[k],
timemems[k + 1], timemems[k] + timemems[k + 1],
timemems[k + 3]);
fprintf(memlog, "%d %d %ld\n", e->step, k / 4, (long)timemems[k + 2]);
}
fprintf(timelog, "# %d mean times: %f %f %f\n", e->step, umean, smean,
tmean);
if (abs_trigger > 1.f) abs_trigger = 0.f; /* Not relevant. */
double systime = smean > 0. ? (smaxtime - smintime) / smean : 0.;
double ttime = tmean > 0. ? (tmaxtime - tmintime) / tmean : 0.;
fprintf(timelog,
"# %d balance: %f, expected: %f (sys: %f, total: %f)\n",
e->step, balance, abs_trigger, systime, ttime);
fclose(timelog);
fprintf(memlog, "# %d mean resident memory: %f, balance: %f\n", e->step,
mmean, (maxmem - minmem) / mmean);
fclose(memlog);
}
/* All nodes do this together, so send to other ranks. */
MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
/* Remember we did this. */
if (e->forcerepart) e->last_repartition = e->step;
}
if (e->verbose)
message("took %.3f %s", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#endif
}
/**
* @brief Exchange cell structures with other nodes.
*
* @param e The #engine.
*/
void engine_exchange_cells(struct engine *e) {
#ifdef WITH_MPI
const int with_gravity = e->policy & engine_policy_self_gravity;
const ticks tic = getticks();
/* Exchange the cell structure with neighbouring ranks. */
proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity);
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#else
error("SWIFT was not compiled with MPI support.");
#endif
}
/**
* @brief Exchange extra information for the grid construction with other nodes.
*
* @param e The #engine.
*/
void engine_exchange_grid_extra(struct engine *e) {
#ifdef WITH_MPI
#ifdef SWIFT_DEBUG_CHECKS
if (!(e->policy & engine_policy_grid))
error("Not running with grid, but trying to exchange grid information!");
#endif
const ticks tic = getticks();
/* Exchange the grid info with neighbouring ranks. */
proxy_grid_extra_exchange(e->proxies, e->nr_proxies, e->s);
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#else
error("SWIFT was not compiled with MPI support.");
#endif
}
/**
* @brief Exchanges the top-level multipoles between all the nodes
* such that every node has a multipole for each top-level cell.
*
* @param e The #engine.
*/
void engine_exchange_top_multipoles(struct engine *e) {
#ifdef WITH_MPI
ticks tic = getticks();
#ifdef SWIFT_DEBUG_CHECKS
for (int i = 0; i < e->s->nr_cells; ++i) {
const struct gravity_tensors *m = &e->s->multipoles_top[i];
if (e->s->cells_top[i].nodeID == engine_rank) {
if (m->m_pole.M_000 > 0.) {
if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0])
error("Invalid multipole position in X");
if (m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1])
error("Invalid multipole position in Y");
if (m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2])
error("Invalid multipole position in Z");
}
} else {
if (m->m_pole.M_000 != 0.) error("Non-zero mass for foreign m-pole");
if (m->CoM[0] != 0.) error("Non-zero position in X for foreign m-pole");
if (m->CoM[1] != 0.) error("Non-zero position in Y for foreign m-pole");
if (m->CoM[2] != 0.) error("Non-zero position in Z for foreign m-pole");
if (m->m_pole.num_gpart != 0)
error("Non-zero gpart count in foreign m-pole");
}
}
#endif
/* Each node (space) has constructed its own top-level multipoles.
* We now need to make sure every other node has a copy of everything.
*
* We use our home-made reduction operation that simply performs a XOR
* operation on the multipoles. Since only local multipoles are non-zero and
* each multipole is only present once, the bit-by-bit XOR will
* create the desired result.
*/
int err = MPI_Allreduce(MPI_IN_PLACE, e->s->multipoles_top, e->s->nr_cells,
multipole_mpi_type, multipole_mpi_reduce_op,
MPI_COMM_WORLD);
if (err != MPI_SUCCESS)
mpi_error(err, "Failed to all-reduce the top-level multipoles.");
#ifdef SWIFT_DEBUG_CHECKS
long long counter = 0;
/* Let's check that what we received makes sense */
for (int i = 0; i < e->s->nr_cells; ++i) {
const struct gravity_tensors *m = &e->s->multipoles_top[i];
counter += m->m_pole.num_gpart;
if (m->m_pole.num_gpart < 0) {
error("m->m_pole.num_gpart is negative: %lld", m->m_pole.num_gpart);
}
if (m->m_pole.M_000 > 0.) {
if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0])
error("Invalid multipole position in X");
if (m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1])
error("Invalid multipole position in Y");
if (m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2])
error("Invalid multipole position in Z");
}
}
if (counter != e->total_nr_gparts)
error(
"Total particles in multipoles inconsistent with engine.\n "
" counter = %lld, nr_gparts = %lld",
counter, e->total_nr_gparts);
#endif
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#else
error("SWIFT was not compiled with MPI support.");
#endif
}
void engine_exchange_proxy_multipoles(struct engine *e) {
#ifdef WITH_MPI
const ticks tic = getticks();
/* Start by counting the number of cells to send and receive */
int count_send_cells = 0;
int count_recv_cells = 0;
int count_send_requests = 0;
int count_recv_requests = 0;
/* Loop over the proxies. */
for (int pid = 0; pid < e->nr_proxies; pid++) {
/* Get a handle on the proxy. */
const struct proxy *p = &e->proxies[pid];
/* Now collect the number of requests associated */
count_recv_requests += p->nr_cells_in;
count_send_requests += p->nr_cells_out;
/* And the actual number of things we are going to ship */
for (int k = 0; k < p->nr_cells_in; k++)
count_recv_cells += p->cells_in[k]->mpi.pcell_size;
for (int k = 0; k < p->nr_cells_out; k++)
count_send_cells += p->cells_out[k]->mpi.pcell_size;
}
/* Allocate the buffers for the packed data */
struct gravity_tensors *buffer_send = NULL;
if (swift_memalign("send_gravity_tensors", (void **)&buffer_send,
SWIFT_CACHE_ALIGNMENT,
count_send_cells * sizeof(struct gravity_tensors)) != 0)
error("Unable to allocate memory for multipole transactions");
struct gravity_tensors *buffer_recv = NULL;
if (swift_memalign("recv_gravity_tensors", (void **)&buffer_recv,
SWIFT_CACHE_ALIGNMENT,
count_recv_cells * sizeof(struct gravity_tensors)) != 0)
error("Unable to allocate memory for multipole transactions");
/* Also allocate the MPI requests */
const int count_requests = count_send_requests + count_recv_requests;
MPI_Request *requests =
(MPI_Request *)malloc(sizeof(MPI_Request) * count_requests);
if (requests == NULL) error("Unable to allocate memory for MPI requests");
int this_request = 0;
int this_recv = 0;
int this_send = 0;
/* Loop over the proxies to issue the receives. */
for (int pid = 0; pid < e->nr_proxies; pid++) {
/* Get a handle on the proxy. */
const struct proxy *p = &e->proxies[pid];
for (int k = 0; k < p->nr_cells_in; k++) {
const int num_elements = p->cells_in[k]->mpi.pcell_size;
/* Receive everything */
MPI_Irecv(&buffer_recv[this_recv], num_elements, multipole_mpi_type,
p->cells_in[k]->nodeID, p->cells_in[k]->mpi.tag, MPI_COMM_WORLD,
&requests[this_request]);
/* Move to the next slot in the buffers */
this_recv += num_elements;
this_request++;
}
/* Loop over the proxies to issue the sends. */
for (int k = 0; k < p->nr_cells_out; k++) {
/* Number of multipoles in this cell hierarchy */
const int num_elements = p->cells_out[k]->mpi.pcell_size;
/* Let's pack everything recursively */
cell_pack_multipoles(p->cells_out[k], &buffer_send[this_send]);
/* Send everything (note the use of cells_in[0] to get the correct node
* ID. */
MPI_Isend(&buffer_send[this_send], num_elements, multipole_mpi_type,
p->cells_in[0]->nodeID, p->cells_out[k]->mpi.tag,
MPI_COMM_WORLD, &requests[this_request]);
/* Move to the next slot in the buffers */
this_send += num_elements;
this_request++;
}
}
/* Wait for all the requests to arrive home */
MPI_Status *stats = (MPI_Status *)malloc(count_requests * sizeof(MPI_Status));
int res;
if ((res = MPI_Waitall(count_requests, requests, stats)) != MPI_SUCCESS) {
for (int k = 0; k < count_requests; ++k) {
char buff[MPI_MAX_ERROR_STRING];
MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
message("request from source %i, tag %i has error '%s'.",
stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
}
error("Failed during waitall for multipole data.");
}
/* Let's now unpack the multipoles at the right place */
this_recv = 0;
for (int pid = 0; pid < e->nr_proxies; pid++) {
/* Get a handle on the proxy. */
const struct proxy *p = &e->proxies[pid];
for (int k = 0; k < p->nr_cells_in; k++) {
const int num_elements = p->cells_in[k]->mpi.pcell_size;
#ifdef SWIFT_DEBUG_CHECKS
/* Check that the first element (top-level cell's multipole) matches what
* we received */
if (p->cells_in[k]->grav.multipole->m_pole.num_gpart !=
buffer_recv[this_recv].m_pole.num_gpart)
error("Current: M_000=%e num_gpart=%lld\n New: M_000=%e num_gpart=%lld",
p->cells_in[k]->grav.multipole->m_pole.M_000,
p->cells_in[k]->grav.multipole->m_pole.num_gpart,
buffer_recv[this_recv].m_pole.M_000,
buffer_recv[this_recv].m_pole.num_gpart);
#endif
/* Unpack recursively */
cell_unpack_multipoles(p->cells_in[k], &buffer_recv[this_recv]);
/* Move to the next slot in the buffers */
this_recv += num_elements;
}
}
/* Free everything */
free(stats);
free(buffer_send);
free(buffer_recv);
free(requests);
/* How much time did this take? */
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#else
error("SWIFT was not compiled with MPI support.");
#endif
}
/**
* @brief Allocate memory for the foreign particles.
*
* We look into the proxies for cells that have tasks and count
* the number of particles in these cells. We then allocate
* memory and link all the cells that have tasks and all cells
* deeper in the tree.
*
* When running FOF, we only need #gpart arrays so we restrict
* the allocations to this particle type only
*
* @param e The #engine.
* @param fof Are we allocating buffers just for FOF?
*/
void engine_allocate_foreign_particles(struct engine *e, const int fof) {
#ifdef WITH_MPI
const int nr_proxies = e->nr_proxies;
const int with_hydro =
e->policy & (engine_policy_hydro | engine_policy_grid_hydro);
const int with_stars = e->policy & engine_policy_stars;
const int with_black_holes = e->policy & engine_policy_black_holes;
const int with_sinks = e->policy & engine_policy_sinks;
struct space *s = e->s;
ticks tic = getticks();
/* Count the number of particles we need to import and re-allocate
the buffer if needed. */
size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0,
count_bparts_in = 0, count_sinks_in = 0;
for (int k = 0; k < nr_proxies; k++) {
for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) {
count_parts_in += cell_count_parts_for_tasks(e->proxies[k].cells_in[j]);
}
if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) {
count_gparts_in +=
cell_count_gparts_for_tasks(e->proxies[k].cells_in[j]);
}
/* For stars, we just use the numbers in the top-level cells */
count_sparts_in +=
e->proxies[k].cells_in[j]->stars.count + space_extra_sparts;
/* For black holes, we just use the numbers in the top-level cells */
count_bparts_in += e->proxies[k].cells_in[j]->black_holes.count;
/* For sinks, we just use the numbers in the top-level cells + some
extra space */
count_sinks_in +=
e->proxies[k].cells_in[j]->sinks.count + space_extra_sinks;
}
}
if (!with_hydro && count_parts_in)
error(
"Not running with hydro but about to receive gas particles in "
"proxies!");
if (e->verbose)
message("Counting number of foreign particles took %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
tic = getticks();
/* Allocate space for the foreign particles we will receive */
size_t old_size_parts_foreign = s->size_parts_foreign;
if (!fof && count_parts_in > s->size_parts_foreign) {
if (s->parts_foreign != NULL) swift_free("parts_foreign", s->parts_foreign);
s->size_parts_foreign = engine_foreign_alloc_margin * count_parts_in;
if (swift_memalign("parts_foreign", (void **)&s->parts_foreign, part_align,
sizeof(struct part) * s->size_parts_foreign) != 0)
error("Failed to allocate foreign part data.");
}
/* Allocate space for the foreign particles we will receive */
size_t old_size_gparts_foreign = s->size_gparts_foreign;
if (count_gparts_in > s->size_gparts_foreign) {
if (s->gparts_foreign != NULL)
swift_free("gparts_foreign", s->gparts_foreign);
s->size_gparts_foreign = engine_foreign_alloc_margin * count_gparts_in;
if (swift_memalign("gparts_foreign", (void **)&s->gparts_foreign,
gpart_align,
sizeof(struct gpart) * s->size_gparts_foreign) != 0)
error("Failed to allocate foreign gpart data.");
}
/* Allocate space for the foreign particles we will receive */
size_t old_size_sparts_foreign = s->size_sparts_foreign;
if (!fof && count_sparts_in > s->size_sparts_foreign) {
if (s->sparts_foreign != NULL)
swift_free("sparts_foreign", s->sparts_foreign);
s->size_sparts_foreign = engine_foreign_alloc_margin * count_sparts_in;
if (swift_memalign("sparts_foreign", (void **)&s->sparts_foreign,
spart_align,
sizeof(struct spart) * s->size_sparts_foreign) != 0)
error("Failed to allocate foreign spart data.");
#ifdef SWIFT_DEBUG_CHECKS
bzero(s->sparts_foreign, s->size_sparts_foreign * sizeof(struct spart));
for (size_t i = 0; i < s->size_sparts_foreign; ++i) {
s->sparts_foreign[i].time_bin = time_bin_not_created;
s->sparts_foreign[i].id = -43;
}
#endif
}
/* Allocate space for the foreign particles we will receive */
size_t old_size_bparts_foreign = s->size_bparts_foreign;
if (!fof && count_bparts_in > s->size_bparts_foreign) {
if (s->bparts_foreign != NULL)
swift_free("bparts_foreign", s->bparts_foreign);
s->size_bparts_foreign = engine_foreign_alloc_margin * count_bparts_in;
if (swift_memalign("bparts_foreign", (void **)&s->bparts_foreign,
bpart_align,
sizeof(struct bpart) * s->size_bparts_foreign) != 0)
error("Failed to allocate foreign bpart data.");
}
/* Allocate space for the foreign particles we will receive */
size_t old_size_sinks_foreign = s->size_sinks_foreign;
if (!fof && count_sinks_in > s->size_sinks_foreign) {
if (s->sinks_foreign != NULL) swift_free("sinks_foreign", s->sinks_foreign);
s->size_sinks_foreign = engine_foreign_alloc_margin * count_sinks_in;
if (swift_memalign("sinks_foreign", (void **)&s->sinks_foreign, sink_align,
sizeof(struct sink) * s->size_sinks_foreign) != 0)
error("Failed to allocate foreign sink data.");
#ifdef SWIFT_DEBUG_CHECKS
bzero(s->sinks_foreign, s->size_sinks_foreign * sizeof(struct sink));
/* Note: If you ever see a sink particle with id = -666, the following
lines is the ones that sets the ID to this value. */
for (size_t i = 0; i < s->size_sinks_foreign; ++i) {
s->sinks_foreign[i].time_bin = time_bin_not_created;
s->sinks_foreign[i].id = -666;
}
#endif
}
if (e->verbose) {
message(
"Allocating %zd/%zd/%zd/%zd/%zd foreign part/gpart/spart/bpart/sink "
"(%zd/%zd/%zd/%zd/%zd MB)",
s->size_parts_foreign, s->size_gparts_foreign, s->size_sparts_foreign,
s->size_bparts_foreign, s->size_sinks_foreign,
s->size_parts_foreign * sizeof(struct part) / (1024 * 1024),
s->size_gparts_foreign * sizeof(struct gpart) / (1024 * 1024),
s->size_sparts_foreign * sizeof(struct spart) / (1024 * 1024),
s->size_bparts_foreign * sizeof(struct bpart) / (1024 * 1024),
s->size_sinks_foreign * sizeof(struct sink) / (1024 * 1024));
if ((s->size_parts_foreign - old_size_parts_foreign) > 0 ||
(s->size_gparts_foreign - old_size_gparts_foreign) > 0 ||
(s->size_sparts_foreign - old_size_sparts_foreign) > 0 ||
(s->size_bparts_foreign - old_size_bparts_foreign) > 0 ||
(s->size_sinks_foreign - old_size_sinks_foreign) > 0) {
message(
"Re-allocations %zd/%zd/%zd/%zd/%zd part/gpart/spart/bpart/sink "
"(%zd/%zd/%zd/%zd/%zd MB)",
(s->size_parts_foreign - old_size_parts_foreign),
(s->size_gparts_foreign - old_size_gparts_foreign),
(s->size_sparts_foreign - old_size_sparts_foreign),
(s->size_bparts_foreign - old_size_bparts_foreign),
(s->size_sinks_foreign - old_size_sinks_foreign),
(s->size_parts_foreign - old_size_parts_foreign) *
sizeof(struct part) / (1024 * 1024),
(s->size_gparts_foreign - old_size_gparts_foreign) *
sizeof(struct gpart) / (1024 * 1024),
(s->size_sparts_foreign - old_size_sparts_foreign) *
sizeof(struct spart) / (1024 * 1024),
(s->size_bparts_foreign - old_size_bparts_foreign) *
sizeof(struct bpart) / (1024 * 1024),
(s->size_sinks_foreign - old_size_sinks_foreign) *
sizeof(struct sink) / (1024 * 1024));
}
}
if (e->verbose)
message("Allocating and zeroing arrays took %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
tic = getticks();
/* Unpack the cells and link to the particle data. */
struct part *parts = s->parts_foreign;
struct gpart *gparts = s->gparts_foreign;
struct spart *sparts = s->sparts_foreign;
struct bpart *bparts = s->bparts_foreign;
struct sink *sinks = s->sinks_foreign;
for (int k = 0; k < nr_proxies; k++) {
for (int j = 0; j < e->proxies[k].nr_cells_in; j++) {
if (!fof && e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) {
const size_t count_parts =
cell_link_foreign_parts(e->proxies[k].cells_in[j], parts);
parts = &parts[count_parts];
}
if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) {
const size_t count_gparts =
cell_link_foreign_gparts(e->proxies[k].cells_in[j], gparts);
gparts = &gparts[count_gparts];
}
if (!fof && with_stars) {
/* For stars, we just use the numbers in the top-level cells */
cell_link_sparts(e->proxies[k].cells_in[j], sparts);
sparts = &sparts[e->proxies[k].cells_in[j]->stars.count +
space_extra_sparts];
}
if (!fof && with_black_holes) {
/* For black holes, we just use the numbers in the top-level cells */
cell_link_bparts(e->proxies[k].cells_in[j], bparts);
bparts = &bparts[e->proxies[k].cells_in[j]->black_holes.count];
}
if (!fof && with_sinks) {
/* For sinks, we just use the numbers in the top-level cells */
cell_link_sinks(e->proxies[k].cells_in[j], sinks);
sinks =
&sinks[e->proxies[k].cells_in[j]->sinks.count + space_extra_sinks];
}
}
}
/* Update the counters */
s->nr_parts_foreign = parts - s->parts_foreign;
s->nr_gparts_foreign = gparts - s->gparts_foreign;
s->nr_sparts_foreign = sparts - s->sparts_foreign;
s->nr_bparts_foreign = bparts - s->bparts_foreign;
s->nr_sinks_foreign = sinks - s->sinks_foreign;
if (e->verbose)
message("Recursively linking foreign arrays took %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
#else
error("SWIFT was not compiled with MPI support.");
#endif
}
void engine_do_tasks_count_mapper(void *map_data, int num_elements,
void *extra_data) {
const struct task *tasks = (struct task *)map_data;
int *const global_counts = (int *)extra_data;
/* Local accumulator copy */
int local_counts[task_type_count + 1];
for (int k = 0; k <= task_type_count; k++) local_counts[k] = 0;
/* Add task counts locally */
for (int k = 0; k < num_elements; k++) {
if (tasks[k].skip)
local_counts[task_type_count] += 1;
else
local_counts[(int)tasks[k].type] += 1;
}
/* Update the global counts */
for (int k = 0; k <= task_type_count; k++) {
if (local_counts[k]) atomic_add(global_counts + k, local_counts[k]);
}
}
/**
* @brief Prints the number of tasks in the engine
*
* @param e The #engine.
*/
void engine_print_task_counts(const struct engine *e) {
const ticks tic = getticks();
const struct scheduler *sched = &e->sched;
const int nr_tasks = sched->nr_tasks;
const struct task *const tasks = sched->tasks;
/* Global tasks and cells when using MPI. */
#ifdef WITH_MPI
if (e->nodeID == 0 && e->total_nr_tasks > 0)
printf(
"[%04i] %s engine_print_task_counts: System total: %lld,"
" no. cells: %lld\n",
e->nodeID, clocks_get_timesincestart(), e->total_nr_tasks,
e->total_nr_cells);
fflush(stdout);
#endif
/* Report value that can be used to estimate the task_per_cells parameter. */
float tasks_per_cell = (float)nr_tasks / (float)e->s->tot_cells;
#ifdef WITH_MPI
message("Total = %d (per cell = %.2f)", nr_tasks, tasks_per_cell);
/* And the system maximum on rank 0, only after first step, increase by our
* margin to allow for some variation in repartitioning. */
if (e->nodeID == 0 && e->total_nr_tasks > 0) {
message("Total = %d (maximum per cell = %.2f)", nr_tasks,
e->tasks_per_cell_max * engine_tasks_per_cell_margin);
}
#else
message("Total = %d (per cell = %.2f)", nr_tasks, tasks_per_cell);
#endif
fflush(stdout);
/* Count and print the number of each task type. */
int counts[task_type_count + 1];
for (int k = 0; k <= task_type_count; k++) counts[k] = 0;
threadpool_map((struct threadpool *)&e->threadpool,
engine_do_tasks_count_mapper, (void *)tasks, nr_tasks,
sizeof(struct task), threadpool_auto_chunk_size, counts);
#ifdef WITH_MPI
printf("[%04i] %s engine_print_task_counts: task counts are [ %s=%i",
e->nodeID, clocks_get_timesincestart(), taskID_names[0], counts[0]);
#else
printf("%s engine_print_task_counts: task counts are [ %s=%i",
clocks_get_timesincestart(), taskID_names[0], counts[0]);
#endif
for (int k = 1; k < task_type_count; k++)
printf(" %s=%i", taskID_names[k], counts[k]);
printf(" skipped=%i ]\n", counts[task_type_count]);
fflush(stdout);
message("nr_parts = %zu.", e->s->nr_parts);
message("nr_gparts = %zu.", e->s->nr_gparts);
message("nr_sink = %zu.", e->s->nr_sinks);
message("nr_sparts = %zu.", e->s->nr_sparts);
message("nr_bparts = %zu.", e->s->nr_bparts);
#if defined(SWIFT_DEBUG_CHECKS) && defined(WITH_MPI)
if (e->verbose == 2) {
/* check that the global number of sends matches the global number of
recvs */
int global_counts[2] = {counts[task_type_send], counts[task_type_recv]};
MPI_Allreduce(MPI_IN_PLACE, global_counts, 2, MPI_INT, MPI_SUM,
MPI_COMM_WORLD);
if (global_counts[0] != global_counts[1])
error("Missing communications (%i sends, %i recvs)!", global_counts[0],
global_counts[1]);
}
#endif
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief if necessary, estimate the number of tasks required given
* the current tasks in use and the numbers of cells.
*
* If e->tasks_per_cell is set greater than 0.0 then that value is used
* as the estimate of the average number of tasks per cell,
* otherwise we attempt an estimate.
*
* @param e the #engine
*
* @return the estimated total number of tasks
*/
int engine_estimate_nr_tasks(const struct engine *e) {
float tasks_per_cell = e->tasks_per_cell;
if (tasks_per_cell > 0.0f) {
if (e->verbose)
message("tasks per cell given as: %.2f, so maximum tasks: %d",
e->tasks_per_cell, (int)(e->s->tot_cells * tasks_per_cell));
return (int)(e->s->tot_cells * tasks_per_cell);
}
/* Our guess differs depending on the types of tasks we are using, but we
* basically use a formula *ntopcells + *(totcells - ntopcells).
* Where is the expected maximum tasks per top-level/super cell, and
* the expected maximum tasks for all other cells. These should give
* a safe upper limit. */
int n1 = 0;
int n2 = 0;
if (e->policy & engine_policy_hydro) {
/* 2 self (density, force), 1 sort, 26/2 density pairs
26/2 force pairs, 1 drift, 3 ghosts, 2 kicks, 1 time-step,
1 end_force, 1 collect, 2 extra space
*/
n1 += 38;
n2 += 2;
#ifdef WITH_MPI
n1 += 6;
#endif
#ifdef EXTRA_HYDRO_LOOP
n1 += 15;
#ifdef WITH_MPI
n1 += 2;
#endif
#endif
}
if (e->policy & engine_policy_timestep_limiter) {
n1 += 18;
n2 += 1;
}
if (e->policy & engine_policy_self_gravity) {
n1 += 125;
n2 += 8;
#ifdef WITH_MPI
n2 += 2;
#endif
}
if (e->policy & engine_policy_external_gravity) {
n1 += 2;
}
if (e->policy & engine_policy_cosmology) {
n1 += 2;
}
if (e->policy & engine_policy_cooling) {
/* Cooling task + extra space */
n1 += 2;
}
if (e->policy & engine_policy_star_formation) {
n1 += 1;
}
if (e->policy & engine_policy_stars) {
/* 2 self (density, feedback), 1 sort, 26/2 density pairs
26/2 feedback pairs, 1 drift, 3 ghosts, 2 kicks, 1 time-step,
1 end_force, 2 extra space
*/
n1 += 37;
n2 += 2;
#ifdef WITH_MPI
n1 += 6;
#endif
}
if (e->policy & engine_policy_sinks) {
/* 1 drift, 2 kicks, 1 time-step, 1 sink formation | 5
density: 1 self + 13 pairs | 14
swallow: 1 self + 13 pairs | 14
do_gas_swallow: 1 self + 13 pairs | 14
do_sink_swallow: 1 self + 13 pairs | 14
ghosts: density_ghost, sink_ghost_1, sink_ghost_2 | 3
implicit: sink_in, sink_out | 2 */
n1 += 66;
n2 += 3;
if (e->policy & engine_policy_stars) {
/* 1 star formation */
n1 += 1;
}
}
if (e->policy & engine_policy_fof) {
n1 += 2;
}
#if defined(WITH_CSDS)
/* each cell logs its particles */
if (e->policy & engine_policy_csds) {
n1 += 1;
}
#endif
if (e->policy & engine_policy_rt) {
/* gradient: 1 self + 13 pairs | 14
* transport: 1 self + 13 pairs | + 14
* implicits: in + out, transport_out | + 3
* others: ghost1, ghost2, tchem, cell advance | + 4
* sort, collect_times | + 2
* 2 extra space | + 2 */
n1 += 39;
#ifdef WITH_MPI
n1 += 2;
#endif
}
if (e->policy & engine_policy_grid) {
/* Grid construction: 1 self + 26 (asymmetric) pairs + 1 ghost + 1 sort */
n1 += 29;
n2 += 3;
#ifdef WITH_MPI
n1 += 3;
#endif
}
if (e->policy & engine_policy_grid_hydro) {
/* slope estimate: 1 self + 13 pairs (on average) | 14
* others: 1 ghosts, 2 kicks, 1 drift, 1 timestep | + 7
* Total: = 21 */
n1 += 21;
n2 += 2;
#ifdef EXTRA_HYDRO_LOOP
/* slope limiter: 1 self + 13 pairs | + 14
* flux: 1 self + 13 pairs | + 14
* others: 2 ghost. | + 2
* Total: = 30 */
n1 += 30;
n2 += 3;
#endif
#ifdef WITH_MPI
n1 += 1;
#ifdef EXTRA_HYDRO_LOOP
n1 += 1;
#endif
#endif
}
#ifdef WITH_MPI
/* We need fewer tasks per rank when using MPI, but we could have
* imbalances, so we need to work using the locally active cells, not just
* some equipartition amongst the nodes. Don't want to recurse the whole
* cell tree, so just make a guess of the maximum possible total cells. */
int ntop = 0;
int ncells = 0;
for (int k = 0; k < e->s->nr_cells; k++) {
struct cell *c = &e->s->cells_top[k];
/* Any cells with particles will have tasks (local & foreign). */
int nparts = c->hydro.count + c->grav.count + c->stars.count +
c->black_holes.count + c->sinks.count;
if (nparts > 0) {
ntop++;
ncells++;
/* Count cell depth until we get below the parts per cell threshold. */
int depth = 0;
while (nparts > space_splitsize) {
depth++;
nparts /= 8;
ncells += (1 << (depth * 3));
}
}
}
/* If no local cells, we are probably still initialising, so just keep
* room for the top-level. */
if (ncells == 0) {
ntop = e->s->nr_cells;
ncells = ntop;
}
#else
int ntop = e->s->nr_cells;
int ncells = e->s->tot_cells;
#endif
float ntasks = n1 * ntop + n2 * (ncells - ntop);
if (ncells > 0) tasks_per_cell = ceilf(ntasks / ncells);
if (tasks_per_cell < 1.0f) tasks_per_cell = 1.0f;
if (e->verbose)
message("tasks per cell estimated as: %.2f, maximum tasks: %d",
tasks_per_cell, (int)(ncells * tasks_per_cell));
return (int)(ncells * tasks_per_cell);
}
/**
* @brief Rebuild the space and tasks.
*
* @param e The #engine.
* @param repartitioned Did we just redistribute?
* @param clean_smoothing_length_values Are we cleaning up the values of
* the smoothing lengths before building the tasks ?
*/
void engine_rebuild(struct engine *e, const int repartitioned,
const int clean_smoothing_length_values) {
const ticks tic = getticks();
/* Clear the forcerebuild flag, whatever it was. */
e->forcerebuild = 0;
e->restarting = 0;
/* Report the time spent in the different task categories */
if (e->verbose && !repartitioned)
scheduler_report_task_times(&e->sched, e->nr_threads);
/* Give some breathing space */
scheduler_free_tasks(&e->sched);
/* Free the foreign particles to get more breathing space. */
#ifdef WITH_MPI
if (e->free_foreign_when_rebuilding)
space_free_foreign_parts(e->s, /*clear_cell_pointers=*/1);
#endif
/* Re-build the space. */
space_rebuild(e->s, repartitioned, e->verbose);
/* Report the number of cells and memory */
if (e->verbose)
message(
"Nr. of top-level cells: %d Nr. of local cells: %d memory use: %zd MB.",
e->s->nr_cells, e->s->tot_cells,
(e->s->nr_cells + e->s->tot_cells) * sizeof(struct cell) /
(1024 * 1024));
/* Report the number of multipoles and memory */
if (e->verbose && (e->policy & engine_policy_self_gravity))
message(
"Nr. of top-level mpoles: %d Nr. of local mpoles: %d memory use: %zd "
"MB.",
e->s->nr_cells, e->s->tot_cells,
(e->s->nr_cells + e->s->tot_cells) * sizeof(struct gravity_tensors) /
(1024 * 1024));
/* Report the number of particles and memory */
if (e->verbose)
message(
"Space has memory for %zd/%zd/%zd/%zd/%zd part/gpart/spart/sink/bpart "
"(%zd/%zd/%zd/%zd/%zd MB)",
e->s->size_parts, e->s->size_gparts, e->s->size_sparts,
e->s->size_sinks, e->s->size_bparts,
e->s->size_parts * sizeof(struct part) / (1024 * 1024),
e->s->size_gparts * sizeof(struct gpart) / (1024 * 1024),
e->s->size_sparts * sizeof(struct spart) / (1024 * 1024),
e->s->size_sinks * sizeof(struct sink) / (1024 * 1024),
e->s->size_bparts * sizeof(struct bpart) / (1024 * 1024));
if (e->verbose)
message(
"Space holds %zd/%zd/%zd/%zd/%zd part/gpart/spart/sink/bpart (fracs: "
"%f/%f/%f/%f/%f)",
e->s->nr_parts, e->s->nr_gparts, e->s->nr_sparts, e->s->nr_sinks,
e->s->nr_bparts,
e->s->nr_parts ? e->s->nr_parts / ((double)e->s->size_parts) : 0.,
e->s->nr_gparts ? e->s->nr_gparts / ((double)e->s->size_gparts) : 0.,
e->s->nr_sparts ? e->s->nr_sparts / ((double)e->s->size_sparts) : 0.,
e->s->nr_sinks ? e->s->nr_sinks / ((double)e->s->size_sinks) : 0.,
e->s->nr_bparts ? e->s->nr_bparts / ((double)e->s->size_bparts) : 0.);
const ticks tic2 = getticks();
/* Update the global counters of particles */
long long num_particles[5] = {
(long long)(e->s->nr_parts - e->s->nr_extra_parts),
(long long)(e->s->nr_gparts - e->s->nr_extra_gparts),
(long long)(e->s->nr_sparts - e->s->nr_extra_sparts),
(long long)(e->s->nr_sinks - e->s->nr_extra_sinks),
(long long)(e->s->nr_bparts - e->s->nr_extra_bparts)};
#ifdef WITH_MPI
MPI_Allreduce(MPI_IN_PLACE, num_particles, 5, MPI_LONG_LONG, MPI_SUM,
MPI_COMM_WORLD);
#endif
e->total_nr_parts = num_particles[0];
e->total_nr_gparts = num_particles[1];
e->total_nr_sparts = num_particles[2];
e->total_nr_sinks = num_particles[3];
e->total_nr_bparts = num_particles[4];
#ifdef WITH_MPI
MPI_Allreduce(MPI_IN_PLACE, &e->s->min_a_grav, 1, MPI_FLOAT, MPI_MIN,
MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, &e->s->max_softening, 1, MPI_FLOAT, MPI_MAX,
MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, e->s->max_mpole_power,
SELF_GRAVITY_MULTIPOLE_ORDER + 1, MPI_FLOAT, MPI_MAX,
MPI_COMM_WORLD);
#endif
/* Flag that there are no inhibited particles */
e->nr_inhibited_parts = 0;
e->nr_inhibited_gparts = 0;
e->nr_inhibited_sparts = 0;
e->nr_inhibited_sinks = 0;
e->nr_inhibited_bparts = 0;
if (e->verbose)
message("updating particle counts took %.3f %s.",
clocks_from_ticks(getticks() - tic2), clocks_getunit());
#ifdef SWIFT_DEBUG_CHECKS
part_verify_links(e->s->parts, e->s->gparts, e->s->sinks, e->s->sparts,
e->s->bparts, e->s->nr_parts, e->s->nr_gparts,
e->s->nr_sinks, e->s->nr_sparts, e->s->nr_bparts,
e->verbose);
#endif
/* Initial cleaning up session ? */
if (clean_smoothing_length_values) space_sanitize(e->s);
/* Set the initial completeness flag for the moving mesh (before exchange) */
if (e->policy & engine_policy_grid) {
cell_grid_set_self_completeness_mapper(e->s->cells_top, e->s->nr_cells,
NULL);
}
/* If in parallel, exchange the cell structure, top-level and neighbouring
* multipoles. To achieve this, free the foreign particle buffers first. */
#ifdef WITH_MPI
if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e);
space_free_foreign_parts(e->s, /*clear_cell_pointers=*/1);
engine_exchange_cells(e);
#endif
#ifdef SWIFT_DEBUG_CHECKS
/* Let's check that what we received makes sense */
if (e->policy & engine_policy_self_gravity) {
long long counter = 0;
for (int i = 0; i < e->s->nr_cells; ++i) {
const struct gravity_tensors *m = &e->s->multipoles_top[i];
counter += m->m_pole.num_gpart;
}
if (counter != e->total_nr_gparts)
error("Total particles in multipoles inconsistent with engine");
}
if (e->policy & engine_policy_grid) {
for (int i = 0; i < e->s->nr_cells; i++) {
const struct cell *ci = &e->s->cells_top[i];
if (ci->hydro.count > 0 && !(ci->grid.self_completeness == grid_complete))
error("Encountered incomplete top level cell!");
}
}
#endif
/* Set the grid construction level, is needed before splitting the tasks */
if (e->policy & engine_policy_grid) {
/* Set the completeness and construction level */
threadpool_map(&e->threadpool, cell_set_grid_completeness_mapper, NULL,
e->s->nr_cells, 1, threadpool_auto_chunk_size, e);
threadpool_map(&e->threadpool, cell_set_grid_construction_level_mapper,
NULL, e->s->nr_cells, 1, threadpool_auto_chunk_size, e);
#ifdef WITH_MPI
engine_exchange_grid_extra(e);
#endif
}
/* Re-build the tasks. */
engine_maketasks(e);
/* Reallocate freed memory */
#ifdef WITH_MPI
if (e->free_foreign_when_rebuilding)
engine_allocate_foreign_particles(e, /*fof=*/0);
#endif
/* Make the list of top-level cells that have tasks */
space_list_useful_top_level_cells(e->s);
#ifdef SWIFT_DEBUG_CHECKS
/* Check that all cells have been drifted to the current time.
* That can include cells that have not
* previously been active on this rank. */
space_check_drift_point(e->s, e->ti_current,
e->policy & engine_policy_self_gravity);
if (e->policy & engine_policy_self_gravity) {
for (int k = 0; k < e->s->nr_local_cells; k++)
cell_check_foreign_multipole(&e->s->cells_top[e->s->local_cells_top[k]]);
}
space_check_sort_flags(e->s);
/* Check whether all the unskip recursion flags are not set */
space_check_unskip_flags(e->s);
#endif
/* Run through the cells, and their tasks to mark as unskipped. */
engine_unskip(e);
if (e->forcerebuild) error("engine_unskip faled after a rebuild!");
/* Print the status of the system */
if (e->verbose) engine_print_task_counts(e);
/* Clear the counters of updates since the last rebuild */
e->updates_since_rebuild = 0;
e->g_updates_since_rebuild = 0;
e->s_updates_since_rebuild = 0;
e->sink_updates_since_rebuild = 0;
e->b_updates_since_rebuild = 0;
/* Flag that a rebuild has taken place */
e->step_props |= engine_step_prop_rebuild;
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Prepare the #engine by re-building the cells and tasks.
*
* @param e The #engine to prepare.
*
* @return 1 if the function drifted all particles 0 if not.
*/
int engine_prepare(struct engine *e) {
TIMER_TIC2;
const ticks tic = getticks();
int drifted_all = 0;
int repartitioned = 0;
/* Unskip active tasks and check for rebuild */
if (!e->forcerebuild && !e->forcerepart && !e->restarting) engine_unskip(e);
const ticks tic3 = getticks();
#ifdef WITH_MPI
MPI_Allreduce(MPI_IN_PLACE, &e->forcerebuild, 1, MPI_INT, MPI_MAX,
MPI_COMM_WORLD);
#endif
if (e->verbose)
message("Communicating rebuild flag took %.3f %s.",
clocks_from_ticks(getticks() - tic3), clocks_getunit());
/* Perform FOF search to seed black holes. Only if there is a rebuild coming
* and no repartitioing. */
if (e->policy & engine_policy_fof && e->forcerebuild && !e->forcerepart &&
e->run_fof && e->fof_properties->seed_black_holes_enabled) {
/* Let's start by drifting everybody to the current time */
engine_drift_all(e, /*drift_mpole=*/0);
drifted_all = 1;
engine_fof(e, e->dump_catalogue_when_seeding, /*dump_debug=*/0,
/*seed_black_holes=*/1, /*foreign buffers allocated=*/1);
if (e->dump_catalogue_when_seeding) e->snapshot_output_count++;
}
/* Perform particle splitting. Only if there is a rebuild coming and no
repartitioning. */
if (!e->restarting && e->forcerebuild && !e->forcerepart && e->step > 1) {
/* Let's start by drifting everybody to the current time */
if (!drifted_all) engine_drift_all(e, /*drift_mpole=*/0);
drifted_all = 1;
engine_split_gas_particles(e);
}
/* Do we need repartitioning ? */
if (e->forcerepart) {
/* Let's start by drifting everybody to the current time */
engine_drift_all(e, /*drift_mpole=*/0);
drifted_all = 1;
/* Free the PM grid */
if ((e->policy & engine_policy_self_gravity) && e->s->periodic)
pm_mesh_free(e->mesh);
/* And repartition */
engine_repartition(e);
repartitioned = 1;
/* Reallocate the mesh */
if ((e->policy & engine_policy_self_gravity) && e->s->periodic)
pm_mesh_allocate(e->mesh);
}
/* Do we need rebuilding ? */
if (e->forcerebuild) {
/* Let's start by drifting everybody to the current time */
if (!e->restarting && !drifted_all) engine_drift_all(e, /*drift_mpole=*/0);
drifted_all = 1;
/* And rebuild */
engine_rebuild(e, repartitioned, 0);
}
#ifdef SWIFT_DEBUG_CHECKS
if (e->forcerepart || e->forcerebuild) {
/* Check that all cells have been drifted to the current time.
* That can include cells that have not previously been active on this
* rank. Skip if haven't got any cells (yet). */
if (e->s->cells_top != NULL)
space_check_drift_point(e->s, e->ti_current,
e->policy & engine_policy_self_gravity);
}
#endif
/* Re-rank the tasks every now and then. XXX this never executes. */
if (e->tasks_age % engine_tasksreweight == 1) {
scheduler_reweight(&e->sched, e->verbose);
}
e->tasks_age += 1;
TIMER_TOC2(timer_prepare);
if (e->verbose)
message("took %.3f %s (including unskip, rebuild and reweight).",
clocks_from_ticks(getticks() - tic), clocks_getunit());
return drifted_all;
}
/**
* @brief Implements a barrier for the #runner threads.
*
* @param e The #engine.
*/
void engine_barrier(struct engine *e) {
/* Wait at the wait barrier. */
swift_barrier_wait(&e->wait_barrier);
/* Wait at the run barrier. */
swift_barrier_wait(&e->run_barrier);
}
/**
* @brief Print the conserved quantities statistics to a log file
*
* @param e The #engine.
*/
void engine_print_stats(struct engine *e) {
const ticks tic = getticks();
#ifdef SWIFT_DEBUG_CHECKS
/* Check that all cells have been drifted to the current time.
* That can include cells that have not
* previously been active on this rank. */
space_check_drift_point(e->s, e->ti_current, /*chek_mpoles=*/0);
/* Be verbose about this */
if (e->nodeID == 0) {
if (e->policy & engine_policy_cosmology)
message("Saving statistics at a=%e",
exp(e->ti_current * e->time_base) * e->cosmology->a_begin);
else
message("Saving statistics at t=%e",
e->ti_current * e->time_base + e->time_begin);
}
#else
if (e->verbose) {
if (e->policy & engine_policy_cosmology)
message("Saving statistics at a=%e",
exp(e->ti_current * e->time_base) * e->cosmology->a_begin);
else
message("Saving statistics at t=%e",
e->ti_current * e->time_base + e->time_begin);
}
#endif
struct statistics stats;
stats_init(&stats);
/* Collect the stats on this node */
stats_collect(e->s, &stats);
/* Aggregate the data from the different nodes. */
#ifdef WITH_MPI
struct statistics global_stats;
stats_init(&global_stats);
if (MPI_Reduce(&stats, &global_stats, 1, statistics_mpi_type,
statistics_mpi_reduce_op, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
error("Failed to aggregate stats.");
#else
struct statistics global_stats = stats;
#endif
/* Finalize operations */
stats_finalize(&global_stats);
/* Print info */
if (e->nodeID == 0)
stats_write_to_file(e->file_stats, &global_stats, e->time, e->cosmology->a,
e->cosmology->z, e->step);
/* Flag that we dumped some statistics */
e->step_props |= engine_step_prop_statistics;
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Sets all the force, drift and kick tasks to be skipped.
*
* @param e The #engine to act on.
*/
void engine_skip_force_and_kick(struct engine *e) {
struct task *tasks = e->sched.tasks;
const int nr_tasks = e->sched.nr_tasks;
for (int i = 0; i < nr_tasks; ++i) {
struct task *t = &tasks[i];
/* Skip everything that updates the particles */
if (t->type == task_type_drift_part || t->type == task_type_drift_gpart ||
t->type == task_type_drift_spart || t->type == task_type_drift_bpart ||
t->type == task_type_drift_sink || t->type == task_type_kick1 ||
t->type == task_type_kick2 || t->type == task_type_timestep ||
t->type == task_type_timestep_limiter ||
t->type == task_type_timestep_sync || t->type == task_type_collect ||
t->type == task_type_end_hydro_force || t->type == task_type_cooling ||
t->type == task_type_stars_in || t->type == task_type_stars_out ||
t->type == task_type_star_formation ||
t->type == task_type_star_formation_sink ||
t->type == task_type_stars_resort || t->type == task_type_extra_ghost ||
t->type == task_type_stars_ghost ||
t->type == task_type_stars_ghost_in ||
t->type == task_type_stars_ghost_out || t->type == task_type_sink_in ||
t->type == task_type_sink_ghost1 || t->type == task_type_sink_ghost2 ||
t->type == task_type_sink_formation || t->type == task_type_sink_out ||
t->type == task_type_stars_prep_ghost1 ||
t->type == task_type_hydro_prep_ghost1 ||
t->type == task_type_stars_prep_ghost2 ||
t->type == task_type_bh_swallow_ghost1 ||
t->type == task_type_bh_swallow_ghost2 ||
t->type == task_type_bh_swallow_ghost3 || t->type == task_type_bh_in ||
t->type == task_type_bh_out || t->type == task_type_rt_ghost1 ||
t->type == task_type_rt_ghost2 || t->type == task_type_rt_tchem ||
t->type == task_type_rt_advance_cell_time ||
t->type == task_type_neutrino_weight || t->type == task_type_csds ||
t->subtype == task_subtype_force ||
t->subtype == task_subtype_limiter ||
t->subtype == task_subtype_gradient ||
t->subtype == task_subtype_stars_prep1 ||
t->subtype == task_subtype_stars_prep2 ||
t->subtype == task_subtype_stars_feedback ||
t->subtype == task_subtype_bh_feedback ||
t->subtype == task_subtype_bh_swallow ||
t->subtype == task_subtype_do_gas_swallow ||
t->subtype == task_subtype_do_bh_swallow ||
t->subtype == task_subtype_bpart_rho ||
t->subtype == task_subtype_part_swallow ||
t->subtype == task_subtype_bpart_merger ||
t->subtype == task_subtype_bpart_feedback ||
t->subtype == task_subtype_sink_swallow ||
t->subtype == task_subtype_sink_do_sink_swallow ||
t->subtype == task_subtype_sink_do_gas_swallow ||
t->subtype == task_subtype_tend || t->subtype == task_subtype_rho ||
t->subtype == task_subtype_spart_density ||
t->subtype == task_subtype_part_prep1 ||
t->subtype == task_subtype_spart_prep2 ||
t->subtype == task_subtype_sf_counts ||
t->subtype == task_subtype_grav_counts ||
t->subtype == task_subtype_rt_gradient ||
t->subtype == task_subtype_rt_transport)
t->skip = 1;
}
/* Run through the cells and clear some flags. */
space_map_cells_pre(e->s, 1, cell_clear_drift_flags, NULL);
space_map_cells_pre(e->s, 1, cell_clear_limiter_flags, NULL);
}
/**
* @brief Sets all the drift and first kick tasks to be skipped.
*
* @param e The #engine to act on.
*/
void engine_skip_drift(struct engine *e) {
struct task *tasks = e->sched.tasks;
const int nr_tasks = e->sched.nr_tasks;
for (int i = 0; i < nr_tasks; ++i) {
struct task *t = &tasks[i];
/* Skip everything that moves the particles */
if (t->type == task_type_drift_part || t->type == task_type_drift_gpart ||
t->type == task_type_drift_spart || t->type == task_type_drift_bpart ||
t->type == task_type_drift_sink)
t->skip = 1;
}
/* Run through the cells and clear some flags. */
space_map_cells_pre(e->s, 1, cell_clear_drift_flags, NULL);
}
/**
* @brief Launch the runners.
*
* @param e The #engine.
* @param call What kind of tasks are we running? (For time analysis)
*/
void engine_launch(struct engine *e, const char *call) {
const ticks tic = getticks();
#ifdef SWIFT_DEBUG_CHECKS
/* Re-set all the cell task counters to 0 */
space_reset_task_counters(e->s);
#endif
/* reset the active time counters for the runners */
for (int i = 0; i < e->nr_threads; ++i) {
runner_reset_active_time(&e->runners[i]);
}
/* Prepare the scheduler. */
atomic_inc(&e->sched.waiting);
/* Cry havoc and let loose the dogs of war. */
swift_barrier_wait(&e->run_barrier);
/* Load the tasks. */
scheduler_start(&e->sched);
/* Remove the safeguard. */
pthread_mutex_lock(&e->sched.sleep_mutex);
atomic_dec(&e->sched.waiting);
pthread_cond_broadcast(&e->sched.sleep_cond);
pthread_mutex_unlock(&e->sched.sleep_mutex);
/* Sit back and wait for the runners to come home. */
swift_barrier_wait(&e->wait_barrier);
/* Store the wallclock time */
e->sched.total_ticks += getticks() - tic;
/* accumulate active counts for all runners */
ticks active_time = 0;
for (int i = 0; i < e->nr_threads; ++i) {
active_time += runner_get_active_time(&e->runners[i]);
}
e->sched.deadtime.active_ticks += active_time;
e->sched.deadtime.waiting_ticks += getticks() - tic;
#ifdef SWIFT_DEBUG_CHECKS
e->sched.last_successful_task_fetch = 0LL;
#endif
if (e->verbose)
message("(%s) took %.3f %s.", call, clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Calls the 'first init' function on the particles of all types.
*
* @param e The #engine.
*/
void engine_first_init_particles(struct engine *e) {
const ticks tic = getticks();
/* Set the particles in a state where they are ready for a run. */
space_first_init_parts(e->s, e->verbose);
space_first_init_gparts(e->s, e->verbose);
space_first_init_sparts(e->s, e->verbose);
space_first_init_bparts(e->s, e->verbose);
space_first_init_sinks(e->s, e->verbose);
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Compute the maximal ID of any #part in the run.
*
* @param e The #engine.
*/
void engine_get_max_ids(struct engine *e) {
e->max_parts_id = space_get_max_parts_id(e->s);
#ifdef WITH_MPI
MPI_Allreduce(MPI_IN_PLACE, &e->max_parts_id, 1, MPI_LONG_LONG_INT, MPI_MAX,
MPI_COMM_WORLD);
#endif
}
/**
* @brief Gather the information about the top-level cells whose time-step has
* changed and activate the communications required to synchonize the
* time-steps.
*
* @param e The #engine.
*/
void engine_synchronize_times(struct engine *e) {
#ifdef WITH_MPI
const ticks tic = getticks();
/* Collect which top-level cells have been updated */
MPI_Allreduce(MPI_IN_PLACE, e->s->cells_top_updated, e->s->nr_cells, MPI_CHAR,
MPI_SUM, MPI_COMM_WORLD);
/* Activate tend communications involving the cells that have changed. */
for (int i = 0; i < e->s->nr_cells; ++i) {
if (e->s->cells_top_updated[i]) {
struct cell *c = &e->s->cells_top[i];
scheduler_activate_all_subtype(&e->sched, c->mpi.send, task_subtype_tend);
scheduler_activate_all_subtype(&e->sched, c->mpi.recv, task_subtype_tend);
}
}
if (e->verbose)
message("Gathering and activating tend took %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
TIMER_TIC;
engine_launch(e, "tend");
TIMER_TOC(timer_runners);
#else
error("SWIFT was not compiled with MPI support.");
#endif
}
/**
* @brief Run the radiative transfer sub-cycles outside the
* regular time-steps.
*
* @param e The #engine
**/
void engine_run_rt_sub_cycles(struct engine *e) {
/* Do we have work to do? */
if (!(e->policy & engine_policy_rt)) return;
/* Note that if running without sub-cycles, no RT-specific timestep data will
* be written to screen or to the RT subcycles timestep data file. It's
* meaningless to do so, as all data will already be contained in the normal
* timesteps file. */
if (e->max_nr_rt_subcycles <= 1) return;
/* Get the subcycling step */
const integertime_t rt_step_size = e->ti_rt_end_min - e->ti_current;
if (rt_step_size == 0) {
/* When we arrive at the final step, the rt_step_size can be == 0 */
if (!engine_is_done(e)) error("Got rt_step_size = 0");
return;
}
/* At this point, the non-RT ti_end_min is up-to-date. Use that and
* the time of the previous regular step to get how many subcycles
* we need. */
const int nr_rt_cycles = (e->ti_end_min - e->ti_current) / rt_step_size;
/* You can't check here that the number of cycles is exactly the number
* you fixed it to be. E.g. stars or gravity may reduce the time step
* sizes for some main steps such that they coincide with the RT bins,
* yielding effectively no subcycles. (At least for low numbers.) */
if (nr_rt_cycles < 0) {
error(
"Got negative nr of sub-cycles??? ti_rt_end_min = %lld ti_current = "
"%lld rt_step_size = %lld",
e->ti_rt_end_min, e->ti_current, rt_step_size);
} else if (nr_rt_cycles == 0) {
/* This can happen if in the previous main step no RT/hydro updates
* happened, but something else (e.g. stars, gravity) only. In this
* case, exit early. */
return;
}
/* Get some time variables for printouts. Don't update the ones in the
* engine like in the regular step, or the outputs in the regular steps
* will be wrong. */
double dt_subcycle;
if (e->policy & engine_policy_cosmology) {
dt_subcycle =
cosmology_get_delta_time(e->cosmology, e->ti_current, e->ti_rt_end_min);
} else {
dt_subcycle = rt_step_size * e->time_base;
}
double time = e->time;
/* Keep track and accumulate the deadtime over all sub-cycles. */
/* We need to manually put this back in the engine struct when
* the sub-cycling is completed. */
double global_deadtime_acc = e->global_deadtime;
/* Collect and print info before it's gone */
engine_collect_end_of_sub_cycle(e);
if (e->nodeID == 0) {
printf(
" [rt-sc] %-4d %12e %11.6f %11.6f %13e %4d %4d %12lld %12s %12s "
"%12s %12s %21s %6s %17s\n",
0, e->time, e->cosmology->a, e->cosmology->z, dt_subcycle,
e->min_active_bin_subcycle, e->max_active_bin_subcycle, e->rt_updates,
/*g, s, sink, bh updates=*/"-", "-", "-", "-", /*wallclock_time=*/"-",
/*props=*/"-", /*dead_time=*/"-");
#ifdef SWIFT_DEBUG_CHECKS
fflush(stdout);
#endif
if (!e->restarting) {
fprintf(
e->file_rt_subcycles,
" %6d %9d %14e %12.7f %12.7f %14e %4d %4d %12lld %21.3f %17.3f\n",
e->step, 0, time, e->cosmology->a, e->cosmology->z, dt_subcycle,
e->min_active_bin_subcycle, e->max_active_bin_subcycle, e->rt_updates,
/*wall-clock time=*/-1.f, /*deadtime=*/-1.f);
}
#ifdef SWIFT_DEBUG_CHECKS
fflush(e->file_rt_subcycles);
#endif
}
/* Take note of the (integer) time until which the radiative transfer
* has been integrated so far. At the start of the sub-cycling, this
* should be e->ti_current_subcycle + dt_rt_min, since the first (i.e.
* zeroth) RT cycle has been completed during the regular step.
* This is used for a consistency/debugging check. */
integertime_t rt_integration_end = e->ti_current_subcycle + rt_step_size;
for (int sub_cycle = 1; sub_cycle < nr_rt_cycles; ++sub_cycle) {
/* Keep track of the wall-clock time of each additional sub-cycle. */
struct clocks_time time1, time2;
clocks_gettime(&time1);
/* reset the deadtime information in the scheduler */
e->sched.deadtime.active_ticks = 0;
e->sched.deadtime.waiting_ticks = 0;
/* Set and re-set times, bins, etc. */
e->rt_updates = 0ll;
integertime_t ti_subcycle_old = e->ti_current_subcycle;
e->ti_current_subcycle = e->ti_current + sub_cycle * rt_step_size;
e->max_active_bin_subcycle = get_max_active_bin(e->ti_current_subcycle);
e->min_active_bin_subcycle =
get_min_active_bin(e->ti_current_subcycle, ti_subcycle_old);
/* Update rt properties */
rt_props_update(e->rt_props, e->internal_units, e->cosmology);
if (e->policy & engine_policy_cosmology) {
double time_old = time;
cosmology_update(
e->cosmology, e->physical_constants,
e->ti_current_subcycle); // Update cosmological parameters
time = e->cosmology->time; // Grab new cosmology time
dt_subcycle = time - time_old;
} else {
time = e->ti_current_subcycle * e->time_base + e->time_begin;
}
/* Do the actual work now. */
engine_unskip_rt_sub_cycle(e);
TIMER_TIC;
engine_launch(e, "cycles");
TIMER_TOC(timer_runners);
/* Compute the local accumulated deadtime. */
const ticks deadticks = (e->nr_threads * e->sched.deadtime.waiting_ticks) -
e->sched.deadtime.active_ticks;
e->local_deadtime = clocks_from_ticks(deadticks);
/* Collect number of updates and print */
engine_collect_end_of_sub_cycle(e);
/* Add our sub-cycling deadtime. */
global_deadtime_acc += e->global_deadtime;
/* Keep track how far we have integrated over. */
rt_integration_end += rt_step_size;
if (e->nodeID == 0) {
const double dead_time =
e->global_deadtime / (e->nr_nodes * e->nr_threads);
/* engine_step() stores the wallclock time in the engine struct.
* Don't do that here - we want the full step to include the full
* duration of the step, which includes all sub-cycles. (Also it
* would be overwritten anyway.) */
clocks_gettime(&time2);
const float wallclock_time = (float)clocks_diff(&time1, &time2);
printf(
" [rt-sc] %-4d %12e %11.6f %11.6f %13e %4d %4d %12lld %12s %12s "
"%12s %12s %21.3f %6s %17.3f\n",
sub_cycle, time, e->cosmology->a, e->cosmology->z, dt_subcycle,
e->min_active_bin_subcycle, e->max_active_bin_subcycle, e->rt_updates,
/*g, s, sink, bh updates=*/"-", "-", "-", "-", wallclock_time,
/*props=*/"-", dead_time);
#ifdef SWIFT_DEBUG_CHECKS
fflush(stdout);
#endif
fprintf(
e->file_rt_subcycles,
" %6d %9d %14e %12.7f %12.7f %14e %4d %4d %12lld %21.3f %17.3f\n",
e->step, sub_cycle, time, e->cosmology->a, e->cosmology->z,
dt_subcycle, e->min_active_bin_subcycle, e->max_active_bin_subcycle,
e->rt_updates, wallclock_time, dead_time);
#ifdef SWIFT_DEBUG_CHECKS
fflush(e->file_rt_subcycles);
#endif
}
}
if (rt_integration_end != e->ti_end_min)
error(
"End of sub-cycling doesn't add up: got %lld should have %lld. Started "
"at ti_current = %lld dt_rt = %lld cycles = %d",
rt_integration_end, e->ti_end_min, e->ti_current, rt_step_size,
nr_rt_cycles);
/* Once we're done, clean up after ourselves */
e->rt_updates = 0ll;
e->global_deadtime = global_deadtime_acc;
}
/**
* @brief Initialises the particles and set them in a state ready to move
*forward in time.
*
* @param e The #engine
* @param flag_entropy_ICs Did the 'Internal Energy' of the particles actually
* contain entropy ?
* @param clean_h_values Are we cleaning up the values of h before building
*/
void engine_init_particles(struct engine *e, int flag_entropy_ICs,
int clean_h_values) {
struct space *s = e->s;
struct clocks_time time1, time2;
clocks_gettime(&time1);
/* reset the deadtime information in the scheduler */
e->sched.deadtime.active_ticks = 0;
e->sched.deadtime.waiting_ticks = 0;
/* Update the softening lengths */
if (e->policy & engine_policy_self_gravity)
gravity_props_update(e->gravity_properties, e->cosmology);
/* Udpate the hydro properties */
if (e->policy & engine_policy_hydro)
hydro_props_update(e->hydro_properties, e->gravity_properties,
e->cosmology);
/* Start by setting the particles in a good state */
if (e->nodeID == 0) message("Setting particles to a valid state...");
engine_first_init_particles(e);
/* Initialise the particle splitting mechanism */
if (e->hydro_properties->particle_splitting)
engine_init_split_gas_particles(e);
if (e->nodeID == 0)
message("Computing initial gas densities and approximate gravity.");
/* Construct all cells and tasks to start everything */
engine_rebuild(e, 0, clean_h_values);
/* Compute the mesh forces for the first time */
if ((e->policy & engine_policy_self_gravity) && e->s->periodic) {
/* Compute mesh forces */
pm_mesh_compute_potential(e->mesh, e->s, &e->threadpool, e->verbose);
/* Compute mesh time-step length */
engine_recompute_displacement_constraint(e);
e->step_props |= engine_step_prop_mesh;
}
/* No time integration. We just want the density and ghosts */
engine_skip_force_and_kick(e);
/* Print the number of active tasks ? */
if (e->verbose) engine_print_task_counts(e);
/* Init the particle data (by hand). */
space_init_parts(s, e->verbose);
space_init_gparts(s, e->verbose);
space_init_sparts(s, e->verbose);
space_init_bparts(s, e->verbose);
space_init_sinks(s, e->verbose);
/* Update the cooling function */
if ((e->policy & engine_policy_cooling) ||
(e->policy & engine_policy_temperature))
cooling_update(e->physical_constants, e->cosmology, e->pressure_floor_props,
e->cooling_func, e->s, e->time);
if (e->policy & engine_policy_rt)
rt_props_update(e->rt_props, e->internal_units, e->cosmology);
#ifdef WITH_CSDS
if (e->policy & engine_policy_csds) {
/* Mark the first time step in the particle csds file. */
if (e->policy & engine_policy_cosmology) {
csds_log_timestamp(e->csds, e->ti_current, e->cosmology->a,
&e->csds->timestamp_offset);
} else {
csds_log_timestamp(e->csds, e->ti_current, e->time,
&e->csds->timestamp_offset);
}
/* Make sure that we have enough space in the particle csds file
* to store the particles in current time step. */
csds_ensure_size(e->csds, e);
csds_write_description(e->csds, e);
}
#endif
/* Zero the list of cells that have had their time-step updated */
bzero(e->s->cells_top_updated, e->s->nr_cells * sizeof(char));
/* Now, launch the calculation */
TIMER_TIC;
engine_launch(e, "tasks");
TIMER_TOC(timer_runners);
#ifdef SWIFT_HYDRO_DENSITY_CHECKS
/* Run the brute-force hydro calculation for some parts */
if (e->policy & engine_policy_hydro)
hydro_exact_density_compute(e->s, e, /*check_force=*/0);
/* Check the accuracy of the hydro calculation */
if (e->policy & engine_policy_hydro)
hydro_exact_density_check(e->s, e, /*rel_tol=*/1e-3, /*check_force=*/0);
#endif
/* Apply some conversions (e.g. internal energy -> entropy) */
if (!flag_entropy_ICs) {
if (e->nodeID == 0) message("Converting internal energy variable.");
space_convert_quantities(e->s, e->verbose);
/* Correct what we did (e.g. in PE-SPH, need to recompute rho_bar) */
if (hydro_need_extra_init_loop) {
engine_unskip(e);
engine_skip_force_and_kick(e);
engine_launch(e, "tasks");
}
}
/* Do some post initialisations */
space_post_init_parts(e->s, e->verbose);
/* Apply some RT conversions (e.g. energy -> energy density) */
if (e->policy & engine_policy_rt)
space_convert_rt_quantities(e->s, e->verbose);
/* Collect initial mean mass of each particle type */
space_collect_mean_masses(e->s, e->verbose);
#ifdef SWIFT_DEBUG_CHECKS
/* Check that we have the correct total mass in the top-level multipoles */
long long num_gpart_mpole = 0;
if (e->policy & engine_policy_self_gravity) {
for (int i = 0; i < e->s->nr_cells; ++i)
num_gpart_mpole += e->s->cells_top[i].grav.multipole->m_pole.num_gpart;
if (num_gpart_mpole != e->total_nr_gparts)
error(
"Top-level multipoles don't contain the total number of gpart "
"s->nr_gpart=%lld, "
"m_poles=%lld",
e->total_nr_gparts, num_gpart_mpole);
}
#endif
/* Now time to get ready for the first time-step */
if (e->nodeID == 0) message("Running initial fake time-step.");
/* Update the MAC strategy if necessary */
if (e->policy & engine_policy_self_gravity)
gravity_props_update_MAC_choice(e->gravity_properties);
/* Construct all cells again for a new round (need to update h_max) */
engine_rebuild(e, 0, 0);
/* No drift this time */
engine_skip_drift(e);
/* Init the particle data (by hand). */
space_init_parts(e->s, e->verbose);
space_init_gparts(e->s, e->verbose);
space_init_sparts(e->s, e->verbose);
space_init_bparts(e->s, e->verbose);
space_init_sinks(e->s, e->verbose);
/* Print the number of active tasks ? */
if (e->verbose) engine_print_task_counts(e);
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Run the brute-force gravity calculation for some gparts */
if (e->policy & engine_policy_self_gravity)
gravity_exact_force_compute(e->s, e);
#endif
scheduler_write_dependencies(&e->sched, e->verbose, e->step);
scheduler_write_cell_dependencies(&e->sched, e->verbose, e->step);
if (e->nodeID == 0) scheduler_write_task_level(&e->sched, e->step);
/* Zero the list of cells that have had their time-step updated */
bzero(e->s->cells_top_updated, e->s->nr_cells * sizeof(char));
/* Run the 0th time-step */
TIMER_TIC2;
engine_launch(e, "tasks");
TIMER_TOC2(timer_runners);
/* When running over MPI, synchronize top-level cells */
#ifdef WITH_MPI
engine_synchronize_times(e);
#endif
#ifdef SWIFT_HYDRO_DENSITY_CHECKS
/* Run the brute-force hydro calculation for some parts */
if (e->policy & engine_policy_hydro)
hydro_exact_density_compute(e->s, e, /*check_force=*/1);
/* Check the accuracy of the hydro calculation */
if (e->policy & engine_policy_hydro)
hydro_exact_density_check(e->s, e, /*rel_tol=*/1e-3, /*check_force=*/1);
#endif
#ifdef SWIFT_STARS_DENSITY_CHECKS
/* Run the brute-force stars calculation for some parts */
if (e->policy & engine_policy_stars) stars_exact_density_compute(e->s, e);
/* Check the accuracy of the stars calculation */
if (e->policy & engine_policy_stars)
stars_exact_density_check(e->s, e, /*rel_tol=*/1e-3);
#endif
#ifdef SWIFT_SINK_DENSITY_CHECKS
/* Run the brute-force sink calculation for some sinks */
if (e->policy & engine_policy_sinks) sink_exact_density_compute(e->s, e);
/* Check the accuracy of the sink calculation */
if (e->policy & engine_policy_sinks)
sink_exact_density_check(e->s, e, /*rel_tol=*/1e-3);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Check the accuracy of the gravity calculation */
if (e->policy & engine_policy_self_gravity)
gravity_exact_force_check(e->s, e, 1e-1);
#endif
#ifdef SWIFT_DEBUG_CHECKS
/* Make sure all woken-up particles have been processed */
space_check_limiter(e->s);
space_check_swallow(e->s);
#endif
/* Compute the local accumulated deadtime. */
const ticks deadticks = (e->nr_threads * e->sched.deadtime.waiting_ticks) -
e->sched.deadtime.active_ticks;
e->local_deadtime = clocks_from_ticks(deadticks);
/* Recover the (integer) end of the next time-step */
engine_collect_end_of_step(e, 1);
/* Check if any particles have the same position. This is not
* allowed (/0) so we abort.*/
if (s->nr_parts > 0) {
/* Sorting should put the same positions next to each other... */
int failed = 0;
double *prev_x = s->parts[0].x;
long long *prev_id = &s->parts[0].id;
for (size_t k = 1; k < s->nr_parts; k++) {
/* Ignore fake buffer particles for on-the-fly creation */
if (s->parts[k].time_bin == time_bin_not_created) continue;
if (prev_x[0] == s->parts[k].x[0] && prev_x[1] == s->parts[k].x[1] &&
prev_x[2] == s->parts[k].x[2]) {
if (e->verbose)
message("Two particles occupy location: %f %f %f id=%lld id=%lld",
prev_x[0], prev_x[1], prev_x[2], *prev_id, s->parts[k].id);
failed++;
}
prev_x = s->parts[k].x;
prev_id = &s->parts[k].id;
}
if (failed > 0)
error(
"Have %d particle pairs with the same locations.\n"
"Cannot continue",
failed);
}
/* Also check any gparts. This is not supposed to be fatal so only warn. */
if (s->nr_gparts > 0) {
int failed = 0;
double *prev_x = s->gparts[0].x;
for (size_t k = 1; k < s->nr_gparts; k++) {
/* Ignore fake buffer particles for on-the-fly creation */
if (s->gparts[k].time_bin == time_bin_not_created) continue;
if (prev_x[0] == s->gparts[k].x[0] && prev_x[1] == s->gparts[k].x[1] &&
prev_x[2] == s->gparts[k].x[2]) {
if (e->verbose)
message("Two gparts occupy location: %f %f %f / %f %f %f", prev_x[0],
prev_x[1], prev_x[2], s->gparts[k].x[0], s->gparts[k].x[1],
s->gparts[k].x[2]);
failed++;
}
prev_x = s->gparts[k].x;
}
if (failed > 0)
message(
"WARNING: found %d gpart pairs at the same location. "
"That is not optimal",
failed);
}
/* Check the top-level cell h_max matches the particles as these can be
* updated in the the ghost tasks (only a problem if the ICs estimates for h
* are too small). Note this must be followed by a rebuild as sub-cells will
* not be updated until that is done. */
if (s->cells_top != NULL && s->nr_parts > 0) {
for (int i = 0; i < s->nr_cells; i++) {
struct cell *c = &s->cells_top[i];
if (c->nodeID == engine_rank && c->hydro.count > 0) {
float part_h_max = c->hydro.parts[0].h;
for (int k = 1; k < c->hydro.count; k++) {
if (c->hydro.parts[k].h > part_h_max)
part_h_max = c->hydro.parts[k].h;
}
c->hydro.h_max = max(part_h_max, c->hydro.h_max);
}
}
}
if (s->cells_top != NULL && s->nr_sparts > 0) {
for (int i = 0; i < s->nr_cells; i++) {
struct cell *c = &s->cells_top[i];
if (c->nodeID == engine_rank && c->stars.count > 0) {
float spart_h_max = c->stars.parts[0].h;
for (int k = 1; k < c->stars.count; k++) {
if (c->stars.parts[k].h > spart_h_max)
spart_h_max = c->stars.parts[k].h;
}
c->stars.h_max = max(spart_h_max, c->stars.h_max);
}
}
}
if (s->cells_top != NULL && s->nr_sinks > 0) {
for (int i = 0; i < s->nr_cells; i++) {
struct cell *c = &s->cells_top[i];
if (c->nodeID == engine_rank && c->sinks.count > 0) {
float sink_h_max = c->sinks.parts[0].h;
for (int k = 1; k < c->sinks.count; k++) {
if (c->sinks.parts[k].h > sink_h_max)
sink_h_max = c->sinks.parts[k].h;
}
c->sinks.h_max = max(sink_h_max, c->sinks.h_max);
}
}
}
/* Run the RT sub-cycles now. */
engine_run_rt_sub_cycles(e);
clocks_gettime(&time2);
#ifdef SWIFT_DEBUG_CHECKS
space_check_timesteps(e->s);
part_verify_links(e->s->parts, e->s->gparts, e->s->sinks, e->s->sparts,
e->s->bparts, e->s->nr_parts, e->s->nr_gparts,
e->s->nr_sinks, e->s->nr_sparts, e->s->nr_bparts,
e->verbose);
#endif
/* Gather the max IDs at this stage */
engine_get_max_ids(e);
/* Ready to go */
e->step = 0;
e->forcerebuild = 1;
e->wallclock_time = (float)clocks_diff(&time1, &time2);
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
e->force_checks_snapshot_flag = 0;
#endif
#ifdef SWIFT_RT_DEBUG_CHECKS
/* if we're running the debug RT scheme, do some checks after every step,
* and reset debugging flags now. */
if (e->policy & engine_policy_rt) {
rt_debugging_checks_end_of_step(e, e->verbose);
}
#endif
if (e->verbose) message("took %.3f %s.", e->wallclock_time, clocks_getunit());
}
/**
* @brief Let the #engine loose to compute the forces.
*
* @param e The #engine.
* @return Should the run stop after this step?
*/
int engine_step(struct engine *e) {
TIMER_TIC2;
struct clocks_time time1, time2;
clocks_gettime(&time1);
/* reset the deadtime information in the scheduler */
e->sched.deadtime.active_ticks = 0;
e->sched.deadtime.waiting_ticks = 0;
#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI)
/* We may want to compare times across ranks, so make sure all steps start
* at the same time, just different ticks. */
MPI_Barrier(MPI_COMM_WORLD);
#endif
e->tic_step = getticks();
if (e->nodeID == 0) {
const double dead_time = e->global_deadtime / (e->nr_nodes * e->nr_threads);
const ticks tic_files = getticks();
/* Print some information to the screen */
printf(
" %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld "
"%12lld %12lld %21.3f %6d %17.3f\n",
e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step,
e->min_active_bin, e->max_active_bin, e->updates, e->g_updates,
e->s_updates, e->sink_updates, e->b_updates, e->wallclock_time,
e->step_props, dead_time);
#ifdef SWIFT_DEBUG_CHECKS
fflush(stdout);
#endif
/* Write the star formation information to the file */
if (e->policy & engine_policy_star_formation) {
star_formation_logger_write_to_log_file(e->sfh_logger, e->time,
e->cosmology->a, e->cosmology->z,
e->sfh, e->step);
#ifdef SWIFT_DEBUG_CHECKS
fflush(e->sfh_logger);
#else
if (e->step % 32 == 0) fflush(e->sfh_logger);
#endif
}
if (!e->restarting)
fprintf(
e->file_timesteps,
" %6d %14e %12.7f %12.7f %14e %4d %4d %12lld %12lld %12lld %12lld "
"%12lld %21.3f %6d %17.3f\n",
e->step, e->time, e->cosmology->a, e->cosmology->z, e->time_step,
e->min_active_bin, e->max_active_bin, e->updates, e->g_updates,
e->s_updates, e->sink_updates, e->b_updates, e->wallclock_time,
e->step_props, dead_time);
#ifdef SWIFT_DEBUG_CHECKS
fflush(e->file_timesteps);
#endif
if (e->verbose)
message("Writing step info to files took %.3f %s",
clocks_from_ticks(getticks() - tic_files), clocks_getunit());
}
/* When restarting, we may have had some i/o to do on the step
* where we decided to stop. We have to do this now.
* We need some cells to exist but not the whole task stuff. */
if (e->restarting) space_rebuild(e->s, 0, e->verbose);
if (e->restarting) engine_io(e);
#ifdef SWIFT_RT_DEBUG_CHECKS
/* If we're restarting, clean up some flags and counters first. If would
* usually be done at the end of the step, but the restart dump
* interrupts it. */
if (e->restarting && (e->policy & engine_policy_rt))
rt_debugging_checks_end_of_step(e, e->verbose);
#endif
/* Move forward in time */
e->ti_old = e->ti_current;
e->ti_current = e->ti_end_min;
e->max_active_bin = get_max_active_bin(e->ti_end_min);
e->min_active_bin = get_min_active_bin(e->ti_current, e->ti_old);
e->step += 1;
engine_current_step = e->step;
e->step_props = engine_step_prop_none;
/* RT sub-cycling related time updates */
e->max_active_bin_subcycle = get_max_active_bin(e->ti_end_min);
e->min_active_bin_subcycle =
get_min_active_bin(e->ti_end_min, e->ti_current_subcycle);
e->ti_current_subcycle = e->ti_end_min;
/* When restarting, move everyone to the current time. */
if (e->restarting) engine_drift_all(e, /*drift_mpole=*/1);
/* Get the physical value of the time and time-step size */
if (e->policy & engine_policy_cosmology) {
e->time_old = e->time;
cosmology_update(e->cosmology, e->physical_constants, e->ti_current);
e->time = e->cosmology->time;
e->time_step = e->time - e->time_old;
} else {
e->time = e->ti_current * e->time_base + e->time_begin;
e->time_old = e->ti_old * e->time_base + e->time_begin;
e->time_step = (e->ti_current - e->ti_old) * e->time_base;
}
#ifdef WITH_LIGHTCONE
/* Determine which periodic replications could contribute to the lightcone
during this time step */
lightcone_array_prepare_for_step(e->lightcone_array_properties, e->cosmology,
e->ti_earliest_undrifted, e->ti_current);
#endif
/*****************************************************/
/* OK, we now know what the next end of time-step is */
/*****************************************************/
const ticks tic_updates = getticks();
/* Update the cooling function */
if ((e->policy & engine_policy_cooling) ||
(e->policy & engine_policy_temperature))
cooling_update(e->physical_constants, e->cosmology, e->pressure_floor_props,
e->cooling_func, e->s, e->time);
/* Update the softening lengths */
if (e->policy & engine_policy_self_gravity)
gravity_props_update(e->gravity_properties, e->cosmology);
/* Udpate the hydro properties */
if (e->policy & engine_policy_hydro)
hydro_props_update(e->hydro_properties, e->gravity_properties,
e->cosmology);
/* Update the rt properties */
if (e->policy & engine_policy_rt)
rt_props_update(e->rt_props, e->internal_units, e->cosmology);
/* Check for any snapshot triggers */
engine_io_check_snapshot_triggers(e);
if (e->verbose)
message("Updating general quantities took %.3f %s",
clocks_from_ticks(getticks() - tic_updates), clocks_getunit());
/* Trigger a tree-rebuild if we passed the frequency threshold */
if ((e->policy & engine_policy_self_gravity) &&
((double)e->g_updates_since_rebuild >
((double)e->total_nr_gparts) * e->gravity_properties->rebuild_frequency))
e->forcerebuild = 1;
/* Trigger a FOF black hole seeding? */
if (e->policy & engine_policy_fof && !e->restarting) {
if (e->ti_end_min > e->ti_next_fof && e->ti_next_fof > 0) {
e->run_fof = 1;
e->forcerebuild = 1;
}
}
/* Trigger a rebuild if we reached a gravity mesh step? */
if ((e->policy & engine_policy_self_gravity) && e->s->periodic &&
e->mesh->ti_end_mesh_next == e->ti_current)
e->forcerebuild = 1;
/* Do we want a snapshot that will trigger a FOF call? */
if (e->ti_current + (e->ti_current - e->ti_old) > e->ti_next_snapshot &&
e->ti_next_snapshot > 0) {
if ((e->policy & engine_policy_fof) && e->snapshot_invoke_fof) {
e->forcerebuild = 1;
}
}
/* Trigger a tree-rebuild if the fraction of active gparts is large enough */
if ((e->policy & engine_policy_self_gravity) && !e->forcerebuild &&
e->gravity_properties->rebuild_active_fraction <= 1.0f) {
ticks tic = getticks();
/* Count the number of active particles */
size_t nr_gparts = e->s->nr_gparts;
size_t nr_active_gparts = 0;
for (size_t i = 0; i < nr_gparts; ++i) {
struct gpart *gp = &e->s->gparts[i];
if (gpart_is_active(gp, e)) nr_active_gparts++;
}
long long total_nr_active_gparts = nr_active_gparts;
#ifdef WITH_MPI
MPI_Allreduce(MPI_IN_PLACE, &total_nr_active_gparts, 1, MPI_LONG_LONG_INT,
MPI_SUM, MPI_COMM_WORLD);
#endif
if (e->verbose)
message("Counting active gparts took %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Trigger the tree-rebuild? */
if (((double)total_nr_active_gparts >
((double)e->total_nr_gparts) *
e->gravity_properties->rebuild_active_fraction))
e->forcerebuild = 1;
}
#ifdef WITH_CSDS
if (e->policy & engine_policy_csds) {
/* Mark the current time step in the particle csds file. */
if (e->policy & engine_policy_cosmology) {
csds_log_timestamp(e->csds, e->ti_current, e->cosmology->a,
&e->csds->timestamp_offset);
} else {
csds_log_timestamp(e->csds, e->ti_current, e->time,
&e->csds->timestamp_offset);
}
/* Make sure that we have enough space in the particle csds file
* to store the particles in current time step. */
csds_ensure_size(e->csds, e);
}
#endif
/* Are we drifting everything (a la Gadget/GIZMO) ? */
if (e->policy & engine_policy_drift_all && !e->forcerebuild)
engine_drift_all(e, /*drift_mpole=*/1);
/* Are we reconstructing the multipoles or drifting them ?*/
if ((e->policy & engine_policy_self_gravity) && !e->forcerebuild) {
if (e->policy & engine_policy_reconstruct_mpoles)
engine_reconstruct_multipoles(e);
else
engine_drift_top_multipoles(e);
}
#ifdef WITH_MPI
/* Repartition the space amongst the nodes? */
engine_repartition_trigger(e);
#endif
/* Prepare the tasks to be launched, rebuild or repartition if needed. */
const int drifted_all = engine_prepare(e);
/* Dump local cells and active particle counts. */
// dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step);
#ifdef SWIFT_DEBUG_CHECKS
/* Print the number of active tasks */
if (e->verbose) engine_print_task_counts(e);
/* Check that we have the correct total mass in the top-level multipoles */
long long num_gpart_mpole = 0;
if (e->policy & engine_policy_self_gravity) {
for (int i = 0; i < e->s->nr_cells; ++i)
num_gpart_mpole += e->s->cells_top[i].grav.multipole->m_pole.num_gpart;
if (num_gpart_mpole != e->total_nr_gparts)
error(
"Multipoles don't contain the total number of gpart mpoles=%lld "
"ngparts=%lld",
num_gpart_mpole, e->total_nr_gparts);
}
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Do we need to check if all gparts are active? */
if (e->force_checks_only_all_active) {
size_t nr_gparts = e->s->nr_gparts;
e->all_gparts_active = 1;
/* Look for inactive gparts */
for (size_t i = 0; i < nr_gparts; ++i) {
struct gpart *gp = &e->s->gparts[i];
/* If one gpart is inactive we can stop. */
if (!gpart_is_active(gp, e)) {
e->all_gparts_active = 0;
break;
}
}
}
/* Check if we want to run force checks this timestep. */
if (e->policy & engine_policy_self_gravity) {
/* Are all gparts active (and the option is selected)? */
if ((e->all_gparts_active && e->force_checks_only_all_active) ||
!e->force_checks_only_all_active) {
/* Is this a snapshot timestep (and the option is selected)? */
if ((e->force_checks_snapshot_flag &&
e->force_checks_only_at_snapshots) ||
!e->force_checks_only_at_snapshots) {
/* Do checks */
gravity_exact_force_compute(e->s, e);
}
}
}
#endif
/* Re-compute the mesh forces? */
if ((e->policy & engine_policy_self_gravity) && e->s->periodic &&
e->mesh->ti_end_mesh_next == e->ti_current) {
/* We might need to drift things */
if (!drifted_all) engine_drift_all(e, /*drift_mpole=*/0);
/* ... and recompute */
pm_mesh_compute_potential(e->mesh, e->s, &e->threadpool, e->verbose);
/* Check whether we need to update the mesh time-step length */
engine_recompute_displacement_constraint(e);
e->step_props |= engine_step_prop_mesh;
}
/* Get current CPU times.*/
#ifdef WITH_MPI
double start_usertime = 0.0;
double start_systime = 0.0;
clocks_get_cputimes_used(&start_usertime, &start_systime);
#endif
/* Write the dependencies */
if (e->sched.frequency_dependency != 0 &&
e->step % e->sched.frequency_dependency == 0) {
scheduler_write_dependencies(&e->sched, e->verbose, e->step);
scheduler_write_cell_dependencies(&e->sched, e->verbose, e->step);
}
/* Write the task levels */
if (e->sched.frequency_task_levels != 0 &&
e->step % e->sched.frequency_task_levels == 0)
scheduler_write_task_level(&e->sched, e->step);
/* we have to reset the ghost histograms here and not in engine_launch,
because engine_launch is re-used for the limiter and sync (and we don't
want to lose the data from the tasks) */
space_reset_ghost_histograms(e->s);
/* Zero the list of cells that have had their time-step updated */
bzero(e->s->cells_top_updated, e->s->nr_cells * sizeof(char));
/* Start all the tasks. */
TIMER_TIC;
engine_launch(e, "tasks");
TIMER_TOC(timer_runners);
/* When running over MPI, synchronize top-level cells */
#ifdef WITH_MPI
engine_synchronize_times(e);
#endif
/* Now record the CPU times used by the tasks. */
#ifdef WITH_MPI
double end_usertime = 0.0;
double end_systime = 0.0;
clocks_get_cputimes_used(&end_usertime, &end_systime);
e->usertime_last_step = end_usertime - start_usertime;
e->systime_last_step = end_systime - start_systime;
#endif
#ifdef SWIFT_HYDRO_DENSITY_CHECKS
/* Run the brute-force hydro calculation for some parts */
if (e->policy & engine_policy_hydro)
hydro_exact_density_compute(e->s, e, /*check_force=*/1);
/* Check the accuracy of the hydro calculation */
if (e->policy & engine_policy_hydro)
hydro_exact_density_check(e->s, e, /*rel_tol=*/1e-3, /*check_force=*/1);
#endif
#ifdef SWIFT_STARS_DENSITY_CHECKS
/* Run the brute-force stars calculation for some parts */
if (e->policy & engine_policy_stars) stars_exact_density_compute(e->s, e);
/* Check the accuracy of the stars calculation */
if (e->policy & engine_policy_stars)
stars_exact_density_check(e->s, e, /*rel_tol=*/1e-2);
#endif
#ifdef SWIFT_SINK_DENSITY_CHECKS
/* Run the brute-force sink calculation for some sinks */
if (e->policy & engine_policy_sinks) sink_exact_density_compute(e->s, e);
/* Check the accuracy of the sink calculation */
if (e->policy & engine_policy_sinks)
sink_exact_density_check(e->s, e, /*rel_tol=*/1e-2);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Check if we want to run force checks this timestep. */
if (e->policy & engine_policy_self_gravity) {
/* Are all gparts active (and the option is selected)? */
if ((e->all_gparts_active && e->force_checks_only_all_active) ||
!e->force_checks_only_all_active) {
/* Is this a snapshot timestep (and the option is selected)? */
if ((e->force_checks_snapshot_flag &&
e->force_checks_only_at_snapshots) ||
!e->force_checks_only_at_snapshots) {
/* Do checks */
gravity_exact_force_check(e->s, e, 1e-1);
/* Reset flag waiting for next output time */
e->force_checks_snapshot_flag = 0;
}
}
}
#endif
#ifdef SWIFT_DEBUG_CHECKS
/* Make sure all woken-up particles have been processed */
space_check_limiter(e->s);
space_check_sort_flags(e->s);
space_check_swallow(e->s);
/* Verify that all the unskip flags for the gravity have been cleaned */
space_check_unskip_flags(e->s);
#endif
/* Compute the local accumulated deadtime. */
const ticks deadticks = (e->nr_threads * e->sched.deadtime.waiting_ticks) -
e->sched.deadtime.active_ticks;
e->local_deadtime = clocks_from_ticks(deadticks);
/* Collect information about the next time-step */
engine_collect_end_of_step(e, 1);
e->forcerebuild = e->collect_group1.forcerebuild;
e->updates_since_rebuild += e->collect_group1.updated;
e->g_updates_since_rebuild += e->collect_group1.g_updated;
e->s_updates_since_rebuild += e->collect_group1.s_updated;
e->sink_updates_since_rebuild += e->collect_group1.sink_updated;
e->b_updates_since_rebuild += e->collect_group1.b_updated;
/* Check if we updated all of the particles on this step */
if ((e->collect_group1.updated == e->total_nr_parts) &&
(e->collect_group1.g_updated == e->total_nr_gparts) &&
(e->collect_group1.s_updated == e->total_nr_sparts) &&
(e->collect_group1.sink_updated == e->total_nr_sinks) &&
(e->collect_group1.b_updated == e->total_nr_bparts))
e->ti_earliest_undrifted = e->ti_current;
#ifdef SWIFT_DEBUG_CHECKS
/* Verify that all cells have correct time-step information */
space_check_timesteps(e->s);
if (e->ti_end_min == e->ti_current && e->ti_end_min < max_nr_timesteps)
error("Obtained a time-step of size 0");
#endif
/* Run the RT sub-cycling now. */
engine_run_rt_sub_cycles(e);
#ifdef WITH_CSDS
if (e->policy & engine_policy_csds && e->verbose)
message("The CSDS currently uses %f GB of storage",
e->collect_group1.csds_file_size_gb);
#endif
/********************************************************/
/* OK, we are done with the regular stuff. Time for i/o */
/********************************************************/
#ifdef WITH_LIGHTCONE
/* Flush lightcone buffers if necessary */
const int flush = e->flush_lightcone_maps;
lightcone_array_flush(e->lightcone_array_properties, &(e->threadpool),
e->cosmology, e->internal_units, e->snapshot_units,
/*flush_map_updates=*/flush, /*flush_particles=*/0,
/*end_file=*/0, /*dump_all_shells=*/0);
#endif
/* Create a restart file if needed. */
const int force_stop =
engine_dump_restarts(e, 0, e->restart_onexit && engine_is_done(e));
/* Is there any form of i/o this step?
*
* Note that if the run was forced to stop, we do not dump,
* we will do so when the run is restarted*/
if (!force_stop) engine_io(e);
#ifdef SWIFT_RT_DEBUG_CHECKS
/* if we're running the debug RT scheme, do some checks after every step.
* Do this after the output so we can safely reset debugging checks now. */
if (e->policy & engine_policy_rt)
rt_debugging_checks_end_of_step(e, e->verbose);
#endif
TIMER_TOC2(timer_step);
clocks_gettime(&time2);
e->wallclock_time = (float)clocks_diff(&time1, &time2);
/* Time in ticks at the end of this step. */
e->toc_step = getticks();
return force_stop;
}
/**
* @brief Returns 1 if the simulation has reached its end point, 0 otherwise
*/
int engine_is_done(struct engine *e) {
return !(e->ti_current < max_nr_timesteps);
}
void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements,
void *extra_data) {
struct engine *e = (struct engine *)extra_data;
struct cell *cells = (struct cell *)map_data;
for (int ind = 0; ind < num_elements; ind++) {
struct cell *c = &cells[ind];
if (c != NULL && c->nodeID == e->nodeID) {
/* Construct the multipoles in this cell hierarchy */
cell_make_multipoles(c, e->ti_current, e->gravity_properties);
}
}
}
/**
* @brief Reconstruct all the multipoles at all the levels in the tree.
*
* @param e The #engine.
*/
void engine_reconstruct_multipoles(struct engine *e) {
const ticks tic = getticks();
#ifdef SWIFT_DEBUG_CHECKS
if (e->nodeID == 0) {
if (e->policy & engine_policy_cosmology)
message("Reconstructing multipoles at a=%e",
exp(e->ti_current * e->time_base) * e->cosmology->a_begin);
else
message("Reconstructing multipoles at t=%e",
e->ti_current * e->time_base + e->time_begin);
}
#endif
threadpool_map(&e->threadpool, engine_do_reconstruct_multipoles_mapper,
e->s->cells_top, e->s->nr_cells, sizeof(struct cell),
threadpool_auto_chunk_size, e);
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Split the underlying space into regions, construct proxies and
* distribute the particles where they belong.
*
* @param e The #engine.
* @param initial_partition structure defining the cell partition technique
*/
void engine_split(struct engine *e, struct partition *initial_partition) {
#ifdef WITH_MPI
const ticks tic = getticks();
struct space *s = e->s;
/* Do the initial partition of the cells. */
partition_initial_partition(initial_partition, e->nodeID, e->nr_nodes, s);
/* Make the proxies. */
engine_makeproxies(e);
/* Turn off the csds to avoid writing the communications to
* the CSDS (since we haven't properly started the run yet) */
const int with_csds = e->policy & engine_policy_csds;
if (with_csds) e->policy &= ~engine_policy_csds;
/* Move the particles to the ranks they belong to */
engine_redistribute(e);
/* Turn it back on */
if (with_csds) e->policy |= engine_policy_csds;
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#else
error("SWIFT was not compiled with MPI support.");
#endif
}
#ifdef DEBUG_INTERACTIONS_STARS
/**
* @brief Exchange the feedback counters between stars
* @param e The #engine.
*/
void engine_collect_stars_counter(struct engine *e) {
#ifdef WITH_MPI
if (e->total_nr_sparts > 1e5) {
message("WARNING: too many sparts, skipping exchange of counters");
return;
}
/* Get number of sparticles for each rank */
size_t *n_sparts = (size_t *)malloc(e->nr_nodes * sizeof(size_t));
int err = MPI_Allgather(&e->s->nr_sparts_foreign, 1, MPI_UNSIGNED_LONG,
n_sparts, 1, MPI_UNSIGNED_LONG, MPI_COMM_WORLD);
if (err != MPI_SUCCESS) error("Communication failed");
/* Compute derivated quantities */
int total = 0;
int *n_sparts_int = (int *)malloc(e->nr_nodes * sizeof(int));
int *displs = (int *)malloc(e->nr_nodes * sizeof(int));
for (int i = 0; i < e->nr_nodes; i++) {
displs[i] = total;
total += n_sparts[i];
n_sparts_int[i] = n_sparts[i];
}
/* Get all sparticles */
struct spart *sparts =
(struct spart *)swift_malloc("sparts", total * sizeof(struct spart));
err = MPI_Allgatherv(e->s->sparts_foreign, e->s->nr_sparts_foreign,
spart_mpi_type, sparts, n_sparts_int, displs,
spart_mpi_type, MPI_COMM_WORLD);
if (err != MPI_SUCCESS) error("Communication failed");
/* Reset counters */
for (size_t i = 0; i < e->s->nr_sparts_foreign; i++) {
e->s->sparts_foreign[i].num_ngb_feedback = 0;
}
/* Update counters */
struct spart *local_sparts = e->s->sparts;
for (size_t i = 0; i < e->s->nr_sparts; i++) {
const long long id_i = local_sparts[i].id;
for (int j = 0; j < total; j++) {
const long long id_j = sparts[j].id;
if (id_j == id_i) {
if (j >= displs[engine_rank] &&
j < displs[engine_rank] + n_sparts_int[engine_rank]) {
error(
"Found a local spart in foreign cell ID=%lli: j=%i, displs=%i, "
"n_sparts=%i",
id_j, j, displs[engine_rank], n_sparts_int[engine_rank]);
}
local_sparts[i].num_ngb_feedback += sparts[j].num_ngb_feedback;
}
}
}
free(n_sparts);
free(n_sparts_int);
swift_free("sparts", sparts);
#endif
}
#endif
#ifdef HAVE_SETAFFINITY
/**
* @brief Returns the initial affinity the main thread is using.
*/
cpu_set_t *engine_entry_affinity(void) {
static int use_entry_affinity = 0;
static cpu_set_t entry_affinity;
if (!use_entry_affinity) {
pthread_t engine = pthread_self();
pthread_getaffinity_np(engine, sizeof(entry_affinity), &entry_affinity);
use_entry_affinity = 1;
}
return &entry_affinity;
}
#endif
/**
* @brief Ensure the NUMA node on which we initialise (first touch) everything
* doesn't change before engine_init allocates NUMA-local workers.
*/
void engine_pin(void) {
#ifdef HAVE_SETAFFINITY
cpu_set_t *entry_affinity = engine_entry_affinity();
/* Share this affinity with the threadpool, it will use this even when the
* main thread is otherwise pinned. */
threadpool_set_affinity_mask(entry_affinity);
int pin;
for (pin = 0; pin < CPU_SETSIZE && !CPU_ISSET(pin, entry_affinity); ++pin) {
/* Nothing to do here */
}
cpu_set_t affinity;
CPU_ZERO(&affinity);
CPU_SET(pin, &affinity);
if (sched_setaffinity(0, sizeof(affinity), &affinity) != 0) {
error("failed to set engine's affinity.");
}
#else
error("SWIFT was not compiled with support for pinning.");
#endif
}
/**
* @brief Unpins the main thread.
*/
void engine_unpin(void) {
#ifdef HAVE_SETAFFINITY
pthread_t main_thread = pthread_self();
cpu_set_t *entry_affinity = engine_entry_affinity();
pthread_setaffinity_np(main_thread, sizeof(*entry_affinity), entry_affinity);
#else
error("SWIFT was not compiled with support for pinning.");
#endif
}
/**
* @brief Define a NUMA memory placement policy of interleave across the
* available NUMA nodes rather than having memory in the local node, which
* means we have a lot of memory associated with the main thread NUMA node, so
* we don't make good use of the overall memory bandwidth between nodes.
*
* @param rank the MPI rank, if relevant.
* @param verbose whether to make a report about the selected NUMA nodes.
*/
void engine_numa_policies(int rank, int verbose) {
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
/* Get our affinity mask (on entry), that defines what NUMA nodes we should
* use. */
cpu_set_t *entry_affinity = engine_entry_affinity();
/* Now convert the affinity mask into NUMA nodemask. */
struct bitmask *nodemask = numa_allocate_nodemask();
int nnuma = numa_num_configured_nodes();
for (unsigned long i = 0; i < CPU_SETSIZE; i++) {
/* If in the affinity mask we set NUMA node of CPU bit. */
if (CPU_ISSET(i, entry_affinity)) {
int numanode = numa_node_of_cpu(i);
numa_bitmask_setbit(nodemask, numanode);
}
}
if (verbose) {
char report[1024];
int len = sprintf(report, "NUMA nodes in use: [");
for (int i = 0; i < nnuma; i++) {
if (numa_bitmask_isbitset(nodemask, i)) {
len += sprintf(&report[len], "%d ", i);
} else {
len += sprintf(&report[len], ". ");
}
}
sprintf(&report[len], "]");
pretime_message("%s", report);
}
/* And set. */
set_mempolicy(MPOL_INTERLEAVE, nodemask->maskp, nodemask->size + 1);
numa_free_nodemask(nodemask);
#endif
}
/**
* @brief init an engine struct with the necessary properties for the
* simulation.
*
* Note do not use when restarting. Engine initialisation
* is completed by a call to engine_config().
*
* @param e The #engine.
* @param s The #space in which this #runner will run.
* @param params The parsed parameter file.
* @param output_options Output options for snapshots.
* @param Ngas total number of gas particles in the simulation.
* @param Ngparts total number of gravity particles in the simulation.
* @param Nsinks total number of sink particles in the simulation.
* @param Nstars total number of star particles in the simulation.
* @param Nblackholes total number of black holes in the simulation.
* @param Nbackground_gparts Total number of background DM particles.
* @param Nnuparts Total number of neutrino DM particles.
* @param policy The queuing policy to use.
* @param verbose Is this #engine talkative ?
* @param internal_units The system of units used internally.
* @param physical_constants The #phys_const used for this run.
* @param cosmo The #cosmology used for this run.
* @param hydro The #hydro_props used for this run.
* @param entropy_floor The #entropy_floor_properties for this run.
* @param gravity The #gravity_props used for this run.
* @param stars The #stars_props used for this run.
* @param black_holes The #black_holes_props used for this run.
* @param sinks The #sink_props used for this run.
* @param neutrinos The #neutrino_props used for this run.
* @param feedback The #feedback_props used for this run.
* @param mesh The #pm_mesh used for the long-range periodic forces.
* @param pow_data The properties and pointers for power spectrum calculation.
* @param potential The properties of the external potential.
* @param cooling_func The properties of the cooling function.
* @param starform The #star_formation model of this run.
* @param chemistry The chemistry information.
* @param io_extra_props The properties needed for the extra i/o fields.
* @param fof_properties The #fof_props of this run.
* @param los_properties the #los_props of this run.
* @param lightcone_array_properties the #lightcone_array_props of this run.
* @param ics_metadata metadata read from the simulation ICs
*/
void engine_init(
struct engine *e, struct space *s, struct swift_params *params,
struct output_options *output_options, long long Ngas, long long Ngparts,
long long Nsinks, long long Nstars, long long Nblackholes,
long long Nbackground_gparts, long long Nnuparts, int policy, int verbose,
const struct unit_system *internal_units,
const struct phys_const *physical_constants, struct cosmology *cosmo,
struct hydro_props *hydro,
const struct entropy_floor_properties *entropy_floor,
struct gravity_props *gravity, struct stars_props *stars,
const struct black_holes_props *black_holes, const struct sink_props *sinks,
const struct neutrino_props *neutrinos,
struct neutrino_response *neutrino_response,
struct feedback_props *feedback,
struct pressure_floor_props *pressure_floor, struct rt_props *rt,
struct pm_mesh *mesh, struct power_spectrum_data *pow_data,
const struct external_potential *potential,
const struct forcing_terms *forcing_terms,
struct cooling_function_data *cooling_func,
const struct star_formation *starform,
const struct chemistry_global_data *chemistry,
struct extra_io_properties *io_extra_props,
struct fof_props *fof_properties, struct los_props *los_properties,
struct lightcone_array_props *lightcone_array_properties,
struct ic_info *ics_metadata) {
struct clocks_time tic, toc;
if (engine_rank == 0) clocks_gettime(&tic);
/* Clean-up everything */
bzero(e, sizeof(struct engine));
/* Store the all values in the fields of the engine. */
e->s = s;
e->policy = policy;
e->step = 0;
e->total_nr_parts = Ngas;
e->total_nr_gparts = Ngparts;
e->total_nr_sparts = Nstars;
e->total_nr_sinks = Nsinks;
e->total_nr_bparts = Nblackholes;
e->total_nr_DM_background_gparts = Nbackground_gparts;
e->total_nr_neutrino_gparts = Nnuparts;
e->proxy_ind = NULL;
e->nr_proxies = 0;
e->ti_old = 0;
e->ti_current = 0;
e->ti_earliest_undrifted = 0;
e->time_step = 0.;
e->time_base = 0.;
e->time_base_inv = 0.;
e->time_begin = 0.;
e->time_end = 0.;
e->max_active_bin = num_time_bins;
e->min_active_bin = 1;
e->ti_current_subcycle = 0;
e->max_active_bin_subcycle = num_time_bins;
e->min_active_bin_subcycle = 1;
e->internal_units = internal_units;
e->output_list_snapshots = NULL;
if (num_snapshot_triggers_part)
parser_get_param_double_array(params, "Snapshots:recording_triggers_part",
num_snapshot_triggers_part,
e->snapshot_recording_triggers_desired_part);
if (num_snapshot_triggers_spart)
parser_get_param_double_array(params, "Snapshots:recording_triggers_spart",
num_snapshot_triggers_spart,
e->snapshot_recording_triggers_desired_spart);
if (num_snapshot_triggers_bpart)
parser_get_param_double_array(params, "Snapshots:recording_triggers_bpart",
num_snapshot_triggers_bpart,
e->snapshot_recording_triggers_desired_bpart);
e->a_first_snapshot =
parser_get_opt_param_double(params, "Snapshots:scale_factor_first", 0.1);
e->time_first_snapshot =
parser_get_opt_param_double(params, "Snapshots:time_first", 0.);
e->delta_time_snapshot =
parser_get_opt_param_double(params, "Snapshots:delta_time", -1.);
e->ti_next_snapshot = 0;
parser_get_param_string(params, "Snapshots:basename", e->snapshot_base_name);
parser_get_opt_param_string(params, "Snapshots:subdir", e->snapshot_subdir,
engine_default_snapshot_subdir);
parser_get_opt_param_int_array(params, "Snapshots:subsample",
swift_type_count, e->snapshot_subsample);
parser_get_opt_param_float_array(params, "Snapshots:subsample_fraction",
swift_type_count,
e->snapshot_subsample_fraction);
e->snapshot_run_on_dump =
parser_get_opt_param_int(params, "Snapshots:run_on_dump", 0);
if (e->snapshot_run_on_dump) {
parser_get_param_string(params, "Snapshots:dump_command",
e->snapshot_dump_command);
}
e->snapshot_compression =
parser_get_opt_param_int(params, "Snapshots:compression", 0);
e->snapshot_distributed =
parser_get_opt_param_int(params, "Snapshots:distributed", 0);
e->snapshot_lustre_OST_count =
parser_get_opt_param_int(params, "Snapshots:lustre_OST_count", 0);
e->snapshot_invoke_stf =
parser_get_opt_param_int(params, "Snapshots:invoke_stf", 0);
e->snapshot_invoke_fof =
parser_get_opt_param_int(params, "Snapshots:invoke_fof", 0);
e->snapshot_invoke_ps =
parser_get_opt_param_int(params, "Snapshots:invoke_ps", 0);
e->snapshot_use_delta_from_edge =
parser_get_opt_param_int(params, "Snapshots:use_delta_from_edge", 0);
if (e->snapshot_use_delta_from_edge) {
e->snapshot_delta_from_edge =
parser_get_param_double(params, "Snapshots:delta_from_edge");
}
e->dump_catalogue_when_seeding =
parser_get_opt_param_int(params, "FOF:dump_catalogue_when_seeding", 0);
e->snapshot_units = (struct unit_system *)malloc(sizeof(struct unit_system));
units_init_default(e->snapshot_units, params, "Snapshots", internal_units);
e->free_foreign_when_dumping_restart = parser_get_opt_param_int(
params, "Scheduler:free_foreign_during_restart", 0);
e->free_foreign_when_rebuilding = parser_get_opt_param_int(
params, "Scheduler:free_foreign_during_rebuild", 0);
e->snapshot_output_count = 0;
e->stf_output_count = 0;
e->los_output_count = 0;
e->ps_output_count = 0;
e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min");
e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max");
e->max_nr_rt_subcycles = parser_get_opt_param_int(
params, "TimeIntegration:max_nr_rt_subcycles", /*default=*/0);
e->dt_max_RMS_displacement = FLT_MAX;
e->max_RMS_displacement_factor = parser_get_opt_param_double(
params, "TimeIntegration:max_dt_RMS_factor", 0.25);
e->max_RMS_dt_use_only_gas = parser_get_opt_param_int(
params, "TimeIntegration:dt_RMS_use_gas_only", 0);
e->dt_kick_grav_mesh_for_io = 0.f;
e->a_first_statistics =
parser_get_opt_param_double(params, "Statistics:scale_factor_first", 0.1);
e->time_first_statistics =
parser_get_opt_param_double(params, "Statistics:time_first", 0.);
e->delta_time_statistics =
parser_get_param_double(params, "Statistics:delta_time");
e->ti_next_stats = 0;
e->ti_next_stf = 0;
e->ti_next_fof = 0;
e->ti_next_ps = 0;
e->verbose = verbose;
e->wallclock_time = 0.f;
e->physical_constants = physical_constants;
e->cosmology = cosmo;
e->hydro_properties = hydro;
e->entropy_floor = entropy_floor;
e->gravity_properties = gravity;
e->stars_properties = stars;
e->black_holes_properties = black_holes;
e->sink_properties = sinks;
e->neutrino_properties = neutrinos;
e->neutrino_response = neutrino_response;
e->mesh = mesh;
e->power_data = pow_data;
e->external_potential = potential;
e->forcing_terms = forcing_terms;
e->cooling_func = cooling_func;
e->star_formation = starform;
e->feedback_props = feedback;
e->pressure_floor_props = pressure_floor;
e->rt_props = rt;
e->chemistry = chemistry;
e->io_extra_props = io_extra_props;
e->fof_properties = fof_properties;
e->parameter_file = params;
e->output_options = output_options;
e->stf_this_timestep = 0;
e->los_properties = los_properties;
e->lightcone_array_properties = lightcone_array_properties;
e->ics_metadata = ics_metadata;
#ifdef WITH_MPI
e->usertime_last_step = 0.0;
e->systime_last_step = 0.0;
e->last_repartition = 0;
#endif
e->total_nr_cells = 0;
e->total_nr_tasks = 0;
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
e->force_checks_only_all_active =
parser_get_opt_param_int(params, "ForceChecks:only_when_all_active", 0);
e->force_checks_only_at_snapshots =
parser_get_opt_param_int(params, "ForceChecks:only_at_snapshots", 0);
#endif
/* Make the space link back to the engine. */
s->e = e;
/* Read the run label */
memset(e->run_name, 0, PARSER_MAX_LINE_SIZE);
parser_get_opt_param_string(params, "MetaData:run_name", e->run_name,
"Untitled SWIFT simulation");
if (strlen(e->run_name) == 0) {
error("The run name in the parameter file cannot be an empty string.");
}
/* Setup the timestep if non-cosmological */
if (!(e->policy & engine_policy_cosmology)) {
e->time_begin =
parser_get_param_double(params, "TimeIntegration:time_begin");
e->time_end = parser_get_param_double(params, "TimeIntegration:time_end");
e->time_old = e->time_begin;
e->time = e->time_begin;
e->time_base = (e->time_end - e->time_begin) / max_nr_timesteps;
e->time_base_inv = 1.0 / e->time_base;
e->ti_current = 0;
} else {
e->time_begin = e->cosmology->time_begin;
e->time_end = e->cosmology->time_end;
e->time_old = e->time_begin;
e->time = e->time_begin;
/* Copy the relevent information from the cosmology model */
e->time_base = e->cosmology->time_base;
e->time_base_inv = e->cosmology->time_base_inv;
e->ti_current = 0;
}
/* Initialise VELOCIraptor output. */
if (e->policy & engine_policy_structure_finding) {
parser_get_param_string(params, "StructureFinding:basename",
e->stf_base_name);
parser_get_opt_param_string(params, "StructureFinding:subdir_per_output",
e->stf_subdir_per_output,
engine_default_stf_subdir_per_output);
parser_get_param_string(params, "StructureFinding:config_file_name",
e->stf_config_file_name);
e->time_first_stf_output =
parser_get_opt_param_double(params, "StructureFinding:time_first", 0.);
e->a_first_stf_output = parser_get_opt_param_double(
params, "StructureFinding:scale_factor_first", 0.1);
e->delta_time_stf =
parser_get_opt_param_double(params, "StructureFinding:delta_time", -1.);
}
/* Initialise line of sight output. */
if (e->policy & engine_policy_line_of_sight) {
e->time_first_los =
parser_get_opt_param_double(params, "LineOfSight:time_first", 0.);
e->a_first_los = parser_get_opt_param_double(
params, "LineOfSight:scale_factor_first", 0.1);
e->delta_time_los =
parser_get_opt_param_double(params, "LineOfSight:delta_time", -1.);
}
/* Initialise power spectrum output. */
if (e->policy & engine_policy_power_spectra) {
e->time_first_ps_output =
parser_get_opt_param_double(params, "PowerSpectrum:time_first", 0.);
e->a_first_ps_output = parser_get_opt_param_double(
params, "PowerSpectrum:scale_factor_first", 0.1);
e->delta_time_ps =
parser_get_opt_param_double(params, "PowerSpectrum:delta_time", -1.);
}
/* Initialise FoF calls frequency. */
if (e->policy & engine_policy_fof) {
e->time_first_fof_call =
parser_get_opt_param_double(params, "FOF:time_first", 0.);
e->a_first_fof_call =
parser_get_opt_param_double(params, "FOF:scale_factor_first", 0.1);
e->delta_time_fof =
parser_get_opt_param_double(params, "FOF:delta_time", -1.);
} else {
if (e->snapshot_invoke_fof)
error("Error: Must run with --fof if Snapshots::invoke_fof=1\n");
}
/* Initialize the star formation history structure */
if (e->policy & engine_policy_star_formation) {
star_formation_logger_accumulator_init(&e->sfh);
}
/* Initialize the neutrino mass conversion factor */
if (Nnuparts > 0) {
const double neutrino_volume = s->dim[0] * s->dim[1] * s->dim[2];
e->neutrino_mass_conversion_factor =
neutrino_mass_factor(cosmo, internal_units, physical_constants,
neutrino_volume, e->total_nr_neutrino_gparts);
} else {
e->neutrino_mass_conversion_factor = 0.f;
}
if (engine_rank == 0) {
clocks_gettime(&toc);
message("took %.3f %s.", clocks_diff(&tic, &toc), clocks_getunit());
fflush(stdout);
}
/* Initialize the CSDS (already timed, not need to include it) */
#if defined(WITH_CSDS)
if (e->policy & engine_policy_csds) {
e->csds = (struct csds_writer *)malloc(sizeof(struct csds_writer));
csds_init(e->csds, e, params);
}
#endif
}
/**
* @brief Prints the current policy of an engine
*
* @param e The engine to print information about
*/
void engine_print_policy(struct engine *e) {
#ifdef WITH_MPI
if (e->nodeID == 0) {
printf("[0000] %s engine_policy: engine policies are [ ",
clocks_get_timesincestart());
for (int k = 0; k <= engine_maxpolicy; k++)
if (e->policy & (1 << k)) printf(" '%s' ", engine_policy_names[k + 1]);
printf(" ]\n");
fflush(stdout);
}
#else
printf("%s engine_policy: engine policies are [ ",
clocks_get_timesincestart());
for (int k = 0; k <= engine_maxpolicy; k++)
if (e->policy & (1 << k)) printf(" '%s' ", engine_policy_names[k + 1]);
printf(" ]\n");
fflush(stdout);
#endif
}
/**
* @brief Computes the maximal time-step allowed by the max RMS displacement
* condition.
*
* @param e The #engine.
*/
void engine_recompute_displacement_constraint(struct engine *e) {
const ticks tic = getticks();
/* Get the cosmological information */
const int with_cosmology = e->policy & engine_policy_cosmology;
const struct cosmology *cosmo = e->cosmology;
const float Ocdm = cosmo->Omega_cdm;
const float Ob = cosmo->Omega_b;
const float H0 = cosmo->H0;
const float a = cosmo->a;
const float G_newton = e->physical_constants->const_newton_G;
const float rho_crit0 = 3.f * H0 * H0 / (8.f * M_PI * G_newton);
if (with_cosmology) {
/* Start by reducing the minimal mass of each particle type */
float min_mass[swift_type_count] = {e->s->min_part_mass,
e->s->min_gpart_mass,
FLT_MAX,
e->s->min_sink_mass,
e->s->min_spart_mass,
e->s->min_bpart_mass,
FLT_MAX};
#ifdef WITH_MPI
MPI_Allreduce(MPI_IN_PLACE, min_mass, swift_type_count, MPI_FLOAT, MPI_MIN,
MPI_COMM_WORLD);
#endif
#ifdef SWIFT_DEBUG_CHECKS
/* Check that the minimal mass collection worked */
float min_part_mass_check = FLT_MAX;
for (size_t i = 0; i < e->s->nr_parts; ++i) {
if (e->s->parts[i].time_bin >= num_time_bins) continue;
min_part_mass_check =
min(min_part_mass_check, hydro_get_mass(&e->s->parts[i]));
}
if (min_part_mass_check < min_mass[swift_type_gas])
error("Error collecting minimal mass of gas particles.");
#endif
/* Do the same for the velocity norm sum */
float vel_norm[swift_type_count] = {e->s->sum_part_vel_norm,
e->s->sum_gpart_vel_norm,
0.f,
e->s->sum_sink_vel_norm,
e->s->sum_spart_vel_norm,
e->s->sum_spart_vel_norm,
0.f};
#ifdef WITH_MPI
MPI_Allreduce(MPI_IN_PLACE, vel_norm, swift_type_count, MPI_FLOAT, MPI_SUM,
MPI_COMM_WORLD);
#endif
/* Get the counts of each particle types */
const long long total_nr_baryons =
e->total_nr_parts + e->total_nr_sparts + e->total_nr_bparts;
const long long total_nr_dm_gparts =
e->total_nr_gparts - e->total_nr_DM_background_gparts -
e->total_nr_neutrino_gparts - total_nr_baryons;
float count_parts[swift_type_count] = {
(float)e->total_nr_parts,
(float)total_nr_dm_gparts,
(float)e->total_nr_DM_background_gparts,
(float)e->total_nr_sinks,
(float)e->total_nr_sparts,
(float)e->total_nr_bparts,
(float)e->total_nr_neutrino_gparts};
/* Count of particles for the two species */
const float N_dm = count_parts[1];
const float N_b =
count_parts[0] + count_parts[3] + count_parts[4] + count_parts[5];
/* Peculiar motion norm for the two species */
const float vel_norm_dm = vel_norm[1];
const float vel_norm_b =
vel_norm[0] + vel_norm[3] + vel_norm[4] + vel_norm[5];
/* Mesh forces smoothing scale */
float r_s;
if ((e->policy & engine_policy_self_gravity) && e->s->periodic)
r_s = e->mesh->r_s;
else
r_s = FLT_MAX;
float dt_dm = FLT_MAX, dt_b = FLT_MAX;
/* DM case */
if (N_dm > 0.f) {
/* Minimal mass for the DM */
const float min_mass_dm = min_mass[1];
/* Inter-particle sepration for the DM */
const float d_dm = cbrtf(min_mass_dm / (Ocdm * rho_crit0));
/* RMS peculiar motion for the DM */
const float rms_vel_dm = vel_norm_dm / N_dm;
/* Time-step based on maximum displacement */
dt_dm = a * a * min(r_s, d_dm) / sqrtf(rms_vel_dm);
}
/* Baryon case */
if (N_b > 0.f) {
/* Minimal mass for the bayons */
float min_mass_b;
if (e->max_RMS_dt_use_only_gas)
min_mass_b = min_mass[0];
else
min_mass_b = min4(min_mass[0], min_mass[3], min_mass[4], min_mass[5]);
/* Inter-particle sepration for the baryons */
const float d_b = cbrtf(min_mass_b / (Ob * rho_crit0));
/* RMS peculiar motion for the baryons */
const float rms_vel_b = vel_norm_b / N_b;
/* Time-step based on maximum displacement */
dt_b = a * a * min(r_s, d_b) / sqrtf(rms_vel_b);
}
/* Use the minimum */
const float dt = min(dt_dm, dt_b);
/* Apply the dimensionless factor */
e->dt_max_RMS_displacement = dt * e->max_RMS_displacement_factor;
if (e->dt_max_RMS_displacement == 0.f) {
error("Setting dt_max_RMS_displacement to 0!");
}
if (e->verbose)
message("max_dt_RMS_displacement = %e", e->dt_max_RMS_displacement);
}
/* Now, update the mesh time-step */
/* Store the previous time-step size */
e->mesh->ti_end_mesh_last = e->mesh->ti_end_mesh_next;
e->mesh->ti_beg_mesh_last = e->mesh->ti_beg_mesh_next;
const integertime_t old_dti =
e->mesh->ti_end_mesh_last - e->mesh->ti_beg_mesh_last;
#ifdef SWIFT_DEBUG_CHECKS
if (e->step > 1 && e->mesh->ti_end_mesh_last != e->ti_current)
error("Weird time integration issue");
#endif
/* What is the allowed time-step size
* Note: The cosmology factor is 1 in non-cosmo runs */
double dt_mesh = e->dt_max_RMS_displacement * e->cosmology->time_step_factor;
dt_mesh = min(dt_mesh, e->dt_max);
/* Convert to integer time */
integertime_t new_dti = (integertime_t)(dt_mesh * e->time_base_inv);
/* Find the max integer time-step on the timeline below new_dti */
integertime_t dti_timeline = max_nr_timesteps;
while (new_dti < dti_timeline) dti_timeline /= ((integertime_t)2);
new_dti = dti_timeline;
/* Make sure we are allowed to increase the timestep size */
const integertime_t current_dti = e->step > 0 ? old_dti : max_nr_timesteps;
if (new_dti > current_dti) {
if ((max_nr_timesteps - e->ti_current) % new_dti > 0) {
new_dti = current_dti;
}
}
e->mesh->ti_beg_mesh_next = e->ti_current;
e->mesh->ti_end_mesh_next = e->ti_current + new_dti;
const timebin_t bin = get_time_bin(new_dti);
if (e->verbose && new_dti != old_dti)
message("Mesh time-step changed to %e (time-bin %d)",
get_timestep(bin, e->time_base), bin);
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Frees up the memory allocated for this #engine
*
* @param e The #engine to clean.
* @param fof Was this a stand-alone FOF run?
* @param restart Was this a run that was restarted from check-point files?
*/
void engine_clean(struct engine *e, const int fof, const int restart) {
/* Start by telling the runners to stop. */
e->step_props = engine_step_prop_done;
swift_barrier_wait(&e->run_barrier);
/* Wait for each runner to come home. */
for (int k = 0; k < e->nr_threads; k++) {
if (pthread_join(e->runners[k].thread, /*retval=*/NULL) != 0)
error("Failed to join runner %i.", k);
#ifdef WITH_VECTORIZATION
cache_clean(&e->runners[k].ci_cache);
cache_clean(&e->runners[k].cj_cache);
#endif
gravity_cache_clean(&e->runners[k].ci_gravity_cache);
gravity_cache_clean(&e->runners[k].cj_gravity_cache);
}
swift_free("runners", e->runners);
free(e->snapshot_units);
output_list_clean(&e->output_list_snapshots);
output_list_clean(&e->output_list_stats);
output_list_clean(&e->output_list_stf);
output_list_clean(&e->output_list_los);
output_list_clean(&e->output_list_ps);
output_options_clean(e->output_options);
ic_info_clean(e->ics_metadata);
swift_free("links", e->links);
#if defined(WITH_CSDS)
if (e->policy & engine_policy_csds) {
csds_free(e->csds);
free(e->csds);
}
#endif
scheduler_clean(&e->sched);
space_clean(e->s);
threadpool_clean(&e->threadpool);
#if defined(WITH_MPI)
for (int i = 0; i < e->nr_proxies; ++i) {
proxy_clean(&e->proxies[i]);
}
free(e->proxy_ind);
free(e->proxies);
/* Free types */
part_free_mpi_types();
multipole_free_mpi_types();
stats_free_mpi_type();
proxy_free_mpi_type();
task_free_mpi_comms();
if (!fof) mpicollect_free_MPI_type();
#endif
/* Close files */
if (!fof && e->nodeID == 0) {
fclose(e->file_timesteps);
fclose(e->file_stats);
if (e->policy & engine_policy_star_formation) {
fclose(e->sfh_logger);
}
#ifndef RT_NONE
fclose(e->file_rt_subcycles);
#endif
}
/* If the run was restarted, we should also free the memory allocated
in engine_struct_restore() */
if (restart) {
free((void *)e->parameter_file);
free((void *)e->output_options);
free((void *)e->external_potential);
free((void *)e->forcing_terms);
free((void *)e->black_holes_properties);
free((void *)e->pressure_floor_props);
free((void *)e->rt_props);
free((void *)e->sink_properties);
free((void *)e->stars_properties);
free((void *)e->gravity_properties);
free((void *)e->neutrino_properties);
free((void *)e->hydro_properties);
free((void *)e->physical_constants);
free((void *)e->internal_units);
free((void *)e->cosmology);
free((void *)e->mesh);
free((void *)e->power_data);
free((void *)e->chemistry);
free((void *)e->entropy_floor);
free((void *)e->cooling_func);
free((void *)e->star_formation);
free((void *)e->feedback_props);
free((void *)e->io_extra_props);
#ifdef WITH_FOF
free((void *)e->fof_properties);
#endif
free((void *)e->los_properties);
free((void *)e->lightcone_array_properties);
free((void *)e->ics_metadata);
#ifdef WITH_MPI
free((void *)e->reparttype);
#endif
if (e->output_list_snapshots) free((void *)e->output_list_snapshots);
if (e->output_list_stats) free((void *)e->output_list_stats);
if (e->output_list_stf) free((void *)e->output_list_stf);
if (e->output_list_los) free((void *)e->output_list_los);
if (e->output_list_ps) free((void *)e->output_list_ps);
#ifdef WITH_CSDS
if (e->policy & engine_policy_csds) free((void *)e->csds);
#endif
free(e->s);
}
}
/**
* @brief Write the engine struct and its contents to the given FILE as a
* stream of bytes.
*
* @param e the engine
* @param stream the file stream
*/
void engine_struct_dump(struct engine *e, FILE *stream) {
/* Dump the engine. Save the current tasks_per_cell estimate. */
e->restart_max_tasks = engine_estimate_nr_tasks(e);
restart_write_blocks(e, sizeof(struct engine), 1, stream, "engine",
"engine struct");
/* And all the engine pointed data, these use their own dump functions. */
space_struct_dump(e->s, stream);
units_struct_dump(e->internal_units, stream);
units_struct_dump(e->snapshot_units, stream);
cosmology_struct_dump(e->cosmology, stream);
#ifdef WITH_MPI
/* Save the partition for restoration. */
partition_store_celllist(e->s, e->reparttype);
partition_struct_dump(e->reparttype, stream);
#endif
phys_const_struct_dump(e->physical_constants, stream);
hydro_props_struct_dump(e->hydro_properties, stream);
entropy_floor_struct_dump(e->entropy_floor, stream);
gravity_props_struct_dump(e->gravity_properties, stream);
stars_props_struct_dump(e->stars_properties, stream);
pm_mesh_struct_dump(e->mesh, stream);
power_spectrum_struct_dump(e->power_data, stream);
potential_struct_dump(e->external_potential, stream);
forcing_terms_struct_dump(e->forcing_terms, stream);
cooling_struct_dump(e->cooling_func, stream);
starformation_struct_dump(e->star_formation, stream);
feedback_struct_dump(e->feedback_props, stream);
pressure_floor_struct_dump(e->pressure_floor_props, stream);
rt_struct_dump(e->rt_props, stream);
black_holes_struct_dump(e->black_holes_properties, stream);
sink_struct_dump(e->sink_properties, stream);
neutrino_struct_dump(e->neutrino_properties, stream);
neutrino_response_struct_dump(e->neutrino_response, stream);
chemistry_struct_dump(e->chemistry, stream);
extra_io_struct_dump(e->io_extra_props, stream);
#ifdef WITH_FOF
fof_struct_dump(e->fof_properties, stream);
#endif
los_struct_dump(e->los_properties, stream);
lightcone_array_struct_dump(e->lightcone_array_properties, stream);
ic_info_struct_dump(e->ics_metadata, stream);
parser_struct_dump(e->parameter_file, stream);
output_options_struct_dump(e->output_options, stream);
#ifdef WITH_CSDS
if (e->policy & engine_policy_csds) {
csds_struct_dump(e->csds, stream);
}
#endif
}
/**
* @brief Re-create an engine struct and its contents from the given FILE
* stream.
*
* @param e the engine
* @param stream the file stream
*/
void engine_struct_restore(struct engine *e, FILE *stream) {
/* Read the engine. */
restart_read_blocks(e, sizeof(struct engine), 1, stream, NULL,
"engine struct");
/* Re-initializations as necessary for our struct and its members. */
e->sched.tasks = NULL;
e->sched.tasks_ind = NULL;
e->sched.tid_active = NULL;
e->sched.size = 0;
/* Now for the other pointers, these use their own restore functions. */
/* Note all this memory leaks, but is used once. */
struct space *s = (struct space *)malloc(sizeof(struct space));
space_struct_restore(s, stream);
e->s = s;
s->e = e;
struct unit_system *internal_us =
(struct unit_system *)malloc(sizeof(struct unit_system));
units_struct_restore(internal_us, stream);
e->internal_units = internal_us;
struct unit_system *snap_us =
(struct unit_system *)malloc(sizeof(struct unit_system));
units_struct_restore(snap_us, stream);
e->snapshot_units = snap_us;
struct cosmology *cosmo =
(struct cosmology *)malloc(sizeof(struct cosmology));
cosmology_struct_restore(e->policy & engine_policy_cosmology, cosmo, stream);
e->cosmology = cosmo;
#ifdef WITH_MPI
struct repartition *reparttype =
(struct repartition *)malloc(sizeof(struct repartition));
partition_struct_restore(reparttype, stream);
e->reparttype = reparttype;
#endif
struct phys_const *physical_constants =
(struct phys_const *)malloc(sizeof(struct phys_const));
phys_const_struct_restore(physical_constants, stream);
e->physical_constants = physical_constants;
struct hydro_props *hydro_properties =
(struct hydro_props *)malloc(sizeof(struct hydro_props));
hydro_props_struct_restore(hydro_properties, stream);
e->hydro_properties = hydro_properties;
struct entropy_floor_properties *entropy_floor =
(struct entropy_floor_properties *)malloc(
sizeof(struct entropy_floor_properties));
entropy_floor_struct_restore(entropy_floor, stream);
e->entropy_floor = entropy_floor;
struct gravity_props *gravity_properties =
(struct gravity_props *)malloc(sizeof(struct gravity_props));
gravity_props_struct_restore(gravity_properties, stream);
e->gravity_properties = gravity_properties;
struct stars_props *stars_properties =
(struct stars_props *)malloc(sizeof(struct stars_props));
stars_props_struct_restore(stars_properties, stream);
e->stars_properties = stars_properties;
struct pm_mesh *mesh = (struct pm_mesh *)malloc(sizeof(struct pm_mesh));
pm_mesh_struct_restore(mesh, stream);
e->mesh = mesh;
struct power_spectrum_data *pow_data =
(struct power_spectrum_data *)malloc(sizeof(struct power_spectrum_data));
power_spectrum_struct_restore(pow_data, stream);
e->power_data = pow_data;
struct external_potential *external_potential =
(struct external_potential *)malloc(sizeof(struct external_potential));
potential_struct_restore(external_potential, stream);
e->external_potential = external_potential;
struct forcing_terms *forcing_terms =
(struct forcing_terms *)malloc(sizeof(struct forcing_terms));
forcing_terms_struct_restore(forcing_terms, stream);
e->forcing_terms = forcing_terms;
struct cooling_function_data *cooling_func =
(struct cooling_function_data *)malloc(
sizeof(struct cooling_function_data));
cooling_struct_restore(cooling_func, stream, e->cosmology);
e->cooling_func = cooling_func;
struct star_formation *star_formation =
(struct star_formation *)malloc(sizeof(struct star_formation));
starformation_struct_restore(star_formation, stream);
e->star_formation = star_formation;
struct feedback_props *feedback_properties =
(struct feedback_props *)malloc(sizeof(struct feedback_props));
feedback_struct_restore(feedback_properties, stream);
e->feedback_props = feedback_properties;
struct pressure_floor_props *pressure_floor_properties =
(struct pressure_floor_props *)malloc(
sizeof(struct pressure_floor_props));
pressure_floor_struct_restore(pressure_floor_properties, stream);
e->pressure_floor_props = pressure_floor_properties;
struct rt_props *rt_properties =
(struct rt_props *)malloc(sizeof(struct rt_props));
rt_struct_restore(rt_properties, stream, e->physical_constants,
e->internal_units, cosmo);
e->rt_props = rt_properties;
struct black_holes_props *black_holes_properties =
(struct black_holes_props *)malloc(sizeof(struct black_holes_props));
black_holes_struct_restore(black_holes_properties, stream);
e->black_holes_properties = black_holes_properties;
struct sink_props *sink_properties =
(struct sink_props *)malloc(sizeof(struct sink_props));
sink_struct_restore(sink_properties, stream);
e->sink_properties = sink_properties;
struct neutrino_props *neutrino_properties =
(struct neutrino_props *)malloc(sizeof(struct neutrino_props));
neutrino_struct_restore(neutrino_properties, stream);
e->neutrino_properties = neutrino_properties;
struct neutrino_response *neutrino_response =
(struct neutrino_response *)malloc(sizeof(struct neutrino_response));
neutrino_response_struct_restore(neutrino_response, stream);
e->neutrino_response = neutrino_response;
struct chemistry_global_data *chemistry =
(struct chemistry_global_data *)malloc(
sizeof(struct chemistry_global_data));
chemistry_struct_restore(chemistry, stream);
e->chemistry = chemistry;
struct extra_io_properties *extra_io_props =
(struct extra_io_properties *)malloc(sizeof(struct extra_io_properties));
extra_io_struct_restore(extra_io_props, stream);
e->io_extra_props = extra_io_props;
#ifdef WITH_FOF
struct fof_props *fof_props =
(struct fof_props *)malloc(sizeof(struct fof_props));
fof_struct_restore(fof_props, stream);
e->fof_properties = fof_props;
#endif
struct los_props *los_properties =
(struct los_props *)malloc(sizeof(struct los_props));
los_struct_restore(los_properties, stream);
e->los_properties = los_properties;
struct lightcone_array_props *lightcone_array_properties =
(struct lightcone_array_props *)malloc(
sizeof(struct lightcone_array_props));
lightcone_array_struct_restore(lightcone_array_properties, stream);
e->lightcone_array_properties = lightcone_array_properties;
struct ic_info *ics_metadata =
(struct ic_info *)malloc(sizeof(struct ic_info));
ic_info_struct_restore(ics_metadata, stream);
e->ics_metadata = ics_metadata;
struct swift_params *parameter_file =
(struct swift_params *)malloc(sizeof(struct swift_params));
parser_struct_restore(parameter_file, stream);
e->parameter_file = parameter_file;
struct output_options *output_options =
(struct output_options *)malloc(sizeof(struct output_options));
output_options_struct_restore(output_options, stream);
e->output_options = output_options;
#ifdef WITH_CSDS
if (e->policy & engine_policy_csds) {
struct csds_writer *log =
(struct csds_writer *)malloc(sizeof(struct csds_writer));
csds_struct_restore(log, stream);
e->csds = log;
}
#endif
#ifdef EOS_PLANETARY
eos_init(&eos, e->physical_constants, e->snapshot_units, e->parameter_file);
#endif
/* Want to force a rebuild before using this engine. Wait to repartition.*/
e->forcerebuild = 1;
e->forcerepart = 0;
}