diff --git a/examples/main.c b/examples/main.c index 6cd5f9fa6eb9b48fef4c040f6b3a90b909fd0d3b..d0061b019eb5c01f2f042735891a361fbebaf04d 100644 --- a/examples/main.c +++ b/examples/main.c @@ -32,6 +32,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/stat.h> #include <unistd.h> /* MPI headers. */ @@ -92,6 +93,7 @@ void print_help_message() { printf(" %2s %14s %s\n", "-P", "{sec:par:val}", "Set parameter value and overwrites values read from the parameters " "file. Can be used more than once."); + printf(" %2s %14s %s\n", "-r", "", "Continue using restart files."); printf(" %2s %14s %s\n", "-s", "", "Run with hydrodynamics."); printf(" %2s %14s %s\n", "-S", "", "Run with stars."); printf(" %2s %14s %s\n", "-t", "{int}", @@ -118,6 +120,7 @@ void print_help_message() { int main(int argc, char *argv[]) { struct clocks_time tic, toc; + struct engine e; int nr_nodes = 1, myrank = 0; @@ -161,6 +164,7 @@ int main(int argc, char *argv[]) { int dump_tasks = 0; int dump_threadpool = 0; int nsteps = -2; + int restart = 0; int with_cosmology = 0; int with_external_gravity = 0; int with_sourceterms = 0; @@ -177,11 +181,12 @@ int main(int argc, char *argv[]) { int nparams = 0; char *cmdparams[PARSER_MAX_NO_OF_PARAMS]; char paramFileName[200] = ""; + char restart_file[200] = ""; unsigned long long cpufreq = 0; /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "acCdDef:FgGhMn:P:sSt:Tv:y:Y:")) != -1) + while ((c = getopt(argc, argv, "acCdDef:FgGhMn:P:rsSt:Tv:y:Y:")) != -1) switch (c) { case 'a': #if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) @@ -242,6 +247,9 @@ int main(int argc, char *argv[]) { cmdparams[nparams] = optarg; nparams++; break; + case 'r': + restart = 1; + break; case 's': with_hydro = 1; break; @@ -458,228 +466,330 @@ int main(int argc, char *argv[]) { } #endif - /* Initialize unit system and constants */ - struct unit_system us; - struct phys_const prog_const; - units_init(&us, params, "InternalUnitSystem"); - phys_const_init(&us, &prog_const); - if (myrank == 0 && verbose > 0) { - message("Internal unit system: U_M = %e g.", us.UnitMass_in_cgs); - message("Internal unit system: U_L = %e cm.", us.UnitLength_in_cgs); - message("Internal unit system: U_t = %e s.", us.UnitTime_in_cgs); - message("Internal unit system: U_I = %e A.", us.UnitCurrent_in_cgs); - message("Internal unit system: U_T = %e K.", us.UnitTemperature_in_cgs); - phys_const_print(&prog_const); + /* Common variables for restart and IC sections. */ + int clean_h_values = 0; + int flag_entropy_ICs = 0; + + /* Work out where we will read and write restart files. */ + char restart_dir[PARSER_MAX_LINE_SIZE]; + parser_get_opt_param_string(params, "Restarts:subdir", restart_dir, + "restart"); + + /* The directory must exist. */ + if (myrank == 0) { + if (access(restart_dir, W_OK | X_OK) != 0) { + if (restart) { + error("Cannot restart as no restart subdirectory: %s (%s)", restart_dir, + strerror(errno)); + } else { + if (mkdir(restart_dir, 0777) != 0) + error("Failed to create restart directory: %s (%s)", restart_dir, + strerror(errno)); + } + } } - /* Initialise the hydro properties */ - struct hydro_props hydro_properties; - if (with_hydro) hydro_props_init(&hydro_properties, params); - if (with_hydro) eos_init(&eos, params); - - /* Initialise the gravity properties */ - struct gravity_props gravity_properties; - if (with_self_gravity) gravity_props_init(&gravity_properties, params); - - /* Read particles and space information from (GADGET) ICs */ - char ICfileName[200] = ""; - parser_get_param_string(params, "InitialConditions:file_name", ICfileName); - const int replicate = - parser_get_opt_param_int(params, "InitialConditions:replicate", 1); - const int clean_h_values = - parser_get_opt_param_int(params, "InitialConditions:cleanup_h", 0); - if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); - fflush(stdout); + /* Basename for any restart files. */ + char restart_name[PARSER_MAX_LINE_SIZE]; + parser_get_opt_param_string(params, "Restarts:basename", restart_name, + "swift"); - /* Get ready to read particles of all kinds */ - struct part *parts = NULL; - struct gpart *gparts = NULL; - struct spart *sparts = NULL; - size_t Ngas = 0, Ngpart = 0, Nspart = 0; - double dim[3] = {0., 0., 0.}; - int periodic = 0; - int flag_entropy_ICs = 0; - if (myrank == 0) clocks_gettime(&tic); + /* How often to check for the stop file and dump restarts and exit the + * application. */ + int restart_stop_steps = + parser_get_opt_param_int(params, "Restarts:stop_steps", 100); + + /* If restarting, look for the restart files. */ + if (restart) { + + /* Attempting a restart. */ + char **restart_files = NULL; + int restart_nfiles = 0; + + if (myrank == 0) { + message("Restarting SWIFT"); + + /* Locate the restart files. */ + restart_files = + restart_locate(restart_dir, restart_name, &restart_nfiles); + if (restart_nfiles == 0) + error("Failed to locate any restart files in %s", restart_dir); + + /* We need one file per rank. */ + if (restart_nfiles != nr_nodes) + error("Incorrect number of restart files, expected %d found %d", + nr_nodes, restart_nfiles); + + if (verbose > 0) + for (int i = 0; i < restart_nfiles; i++) + message("found restart file: %s", restart_files[i]); + } + +#ifdef WITH_MPI + /* Distribute the restart files, need one for each rank. */ + if (myrank == 0) { + + for (int i = 1; i < nr_nodes; i++) { + strcpy(restart_file, restart_files[i]); + MPI_Send(restart_file, 200, MPI_BYTE, i, 0, MPI_COMM_WORLD); + } + + /* Keep local file. */ + strcpy(restart_file, restart_files[0]); + + /* Finished with the list. */ + restart_locate_free(restart_nfiles, restart_files); + + } else { + MPI_Recv(restart_file, 200, MPI_BYTE, 0, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + if (verbose > 1) message("local restart file = %s", restart_file); +#else + + /* Just one restart file. */ + strcpy(restart_file, restart_files[0]); +#endif + + /* Now read it. */ + restart_read(&e, restart_file); + + /* And initialize the engine with the space and policies. */ + if (myrank == 0) clocks_gettime(&tic); + engine_config(1, &e, params, nr_nodes, myrank, nr_threads, with_aff, + talking, restart_file); + if (myrank == 0) { + clocks_gettime(&toc); + message("engine_config took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); + fflush(stdout); + } + + /* Check if we are already done when given steps on the command-line. */ + if (e.step >= nsteps && nsteps > 0) + error("Not restarting, already completed %d steps", e.step); + + } else { + + /* Not restarting so look for the ICs. */ + /* Initialize unit system and constants */ + struct unit_system us; + struct phys_const prog_const; + units_init(&us, params, "InternalUnitSystem"); + phys_const_init(&us, &prog_const); + if (myrank == 0 && verbose > 0) { + message("Internal unit system: U_M = %e g.", us.UnitMass_in_cgs); + message("Internal unit system: U_L = %e cm.", us.UnitLength_in_cgs); + message("Internal unit system: U_t = %e s.", us.UnitTime_in_cgs); + message("Internal unit system: U_I = %e A.", us.UnitCurrent_in_cgs); + message("Internal unit system: U_T = %e K.", us.UnitTemperature_in_cgs); + phys_const_print(&prog_const); + } + + /* Initialise the hydro properties */ + struct hydro_props hydro_properties; + if (with_hydro) hydro_props_init(&hydro_properties, params); + if (with_hydro) eos_init(&eos, params); + + /* Initialise the gravity properties */ + struct gravity_props gravity_properties; + if (with_self_gravity) gravity_props_init(&gravity_properties, params); + + /* Read particles and space information from (GADGET) ICs */ + char ICfileName[200] = ""; + parser_get_param_string(params, "InitialConditions:file_name", ICfileName); + const int replicate = + parser_get_opt_param_int(params, "InitialConditions:replicate", 1); + clean_h_values = + parser_get_opt_param_int(params, "InitialConditions:cleanup_h", 0); + if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); + fflush(stdout); + + /* Get ready to read particles of all kinds */ + struct part *parts = NULL; + struct gpart *gparts = NULL; + struct spart *sparts = NULL; + size_t Ngas = 0, Ngpart = 0, Nspart = 0; + double dim[3] = {0., 0., 0.}; + int periodic = 0; + if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) - read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, + read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, + &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, with_hydro, + (with_external_gravity || with_self_gravity), with_stars, + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, + nr_threads, dry_run); +#else + read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, with_hydro, (with_external_gravity || with_self_gravity), with_stars, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, nr_threads, dry_run); -#else - read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, &Ngpart, - &Nspart, &periodic, &flag_entropy_ICs, with_hydro, - (with_external_gravity || with_self_gravity), with_stars, - myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, nr_threads, - dry_run); #endif #else - read_ic_single(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, &Ngpart, - &Nspart, &periodic, &flag_entropy_ICs, with_hydro, - (with_external_gravity || with_self_gravity), with_stars, - nr_threads, dry_run); + read_ic_single(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, + &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, with_hydro, + (with_external_gravity || with_self_gravity), with_stars, + nr_threads, dry_run); #endif - if (myrank == 0) { - clocks_gettime(&toc); - message("Reading initial conditions took %.3f %s.", clocks_diff(&tic, &toc), - clocks_getunit()); - fflush(stdout); - } + if (myrank == 0) { + clocks_gettime(&toc); + message("Reading initial conditions took %.3f %s.", + clocks_diff(&tic, &toc), clocks_getunit()); + fflush(stdout); + } #ifdef SWIFT_DEBUG_CHECKS - /* Check once and for all that we don't have unwanted links */ - if (!with_stars) { - for (size_t k = 0; k < Ngpart; ++k) - if (gparts[k].type == swift_type_star) error("Linking problem"); - } - if (!with_hydro) { - for (size_t k = 0; k < Ngpart; ++k) - if (gparts[k].type == swift_type_gas) error("Linking problem"); - } + /* Check once and for all that we don't have unwanted links */ + if (!with_stars) { + for (size_t k = 0; k < Ngpart; ++k) + if (gparts[k].type == swift_type_star) error("Linking problem"); + } + if (!with_hydro) { + for (size_t k = 0; k < Ngpart; ++k) + if (gparts[k].type == swift_type_gas) error("Linking problem"); + } #endif - /* Get the total number of particles across all nodes. */ - long long N_total[3] = {0, 0, 0}; + /* Get the total number of particles across all nodes. */ + long long N_total[3] = {0, 0, 0}; #if defined(WITH_MPI) - long long N_long[3] = {Ngas, Ngpart, Nspart}; - MPI_Allreduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, - MPI_COMM_WORLD); + long long N_long[3] = {Ngas, Ngpart, Nspart}; + MPI_Allreduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD); #else - N_total[0] = Ngas; - N_total[1] = Ngpart; - N_total[2] = Nspart; + N_total[0] = Ngas; + N_total[1] = Ngpart; + N_total[2] = Nspart; #endif - if (myrank == 0) - message( - "Read %lld gas particles, %lld star particles and %lld gparts from the " - "ICs.", - N_total[0], N_total[2], N_total[1]); - - /* Initialize the space with these data. */ - if (myrank == 0) clocks_gettime(&tic); - struct space s; - space_init(&s, params, dim, parts, gparts, sparts, Ngas, Ngpart, Nspart, - periodic, replicate, with_self_gravity, talking, dry_run); - if (myrank == 0) { - clocks_gettime(&toc); - message("space_init took %.3f %s.", clocks_diff(&tic, &toc), - clocks_getunit()); - fflush(stdout); - } - /* Also update the total counts (in case of changes due to replication) */ + if (myrank == 0) + message( + "Read %lld gas particles, %lld star particles and %lld gparts from " + "the " + "ICs.", + N_total[0], N_total[2], N_total[1]); + + /* Initialize the space with these data. */ + if (myrank == 0) clocks_gettime(&tic); + struct space s; + space_init(&s, params, dim, parts, gparts, sparts, Ngas, Ngpart, Nspart, + periodic, replicate, with_self_gravity, talking, dry_run); + + if (myrank == 0) { + clocks_gettime(&toc); + message("space_init took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); + fflush(stdout); + } + +/* Also update the total counts (in case of changes due to replication) */ #if defined(WITH_MPI) - N_long[0] = s.nr_parts; - N_long[1] = s.nr_gparts; - N_long[2] = s.nr_sparts; - MPI_Allreduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, - MPI_COMM_WORLD); + N_long[0] = s.nr_parts; + N_long[1] = s.nr_gparts; + N_long[2] = s.nr_sparts; + MPI_Allreduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, + MPI_COMM_WORLD); #else - N_total[0] = s.nr_parts; - N_total[1] = s.nr_gparts; - N_total[2] = s.nr_sparts; + N_total[0] = s.nr_parts; + N_total[1] = s.nr_gparts; + N_total[2] = s.nr_sparts; #endif - /* Say a few nice things about the space we just created. */ - if (myrank == 0) { - message("space dimensions are [ %.3f %.3f %.3f ].", s.dim[0], s.dim[1], - s.dim[2]); - message("space %s periodic.", s.periodic ? "is" : "isn't"); - message("highest-level cell dimensions are [ %i %i %i ].", s.cdim[0], - s.cdim[1], s.cdim[2]); - message("%zi parts in %i cells.", s.nr_parts, s.tot_cells); - message("%zi gparts in %i cells.", s.nr_gparts, s.tot_cells); - message("%zi sparts in %i cells.", s.nr_sparts, s.tot_cells); - message("maximum depth is %d.", s.maxdepth); - fflush(stdout); - } - - /* Verify that each particle is in it's proper cell. */ - if (talking && !dry_run) { - int icount = 0; - space_map_cells_pre(&s, 0, &map_cellcheck, &icount); - message("map_cellcheck picked up %i parts.", icount); - } - - /* Verify the maximal depth of cells. */ - if (talking && !dry_run) { - int data[2] = {s.maxdepth, 0}; - space_map_cells_pre(&s, 0, &map_maxdepth, data); - message("nr of cells at depth %i is %i.", data[0], data[1]); - } + /* Say a few nice things about the space we just created. */ + if (myrank == 0) { + message("space dimensions are [ %.3f %.3f %.3f ].", s.dim[0], s.dim[1], + s.dim[2]); + message("space %s periodic.", s.periodic ? "is" : "isn't"); + message("highest-level cell dimensions are [ %i %i %i ].", s.cdim[0], + s.cdim[1], s.cdim[2]); + message("%zi parts in %i cells.", s.nr_parts, s.tot_cells); + message("%zi gparts in %i cells.", s.nr_gparts, s.tot_cells); + message("%zi sparts in %i cells.", s.nr_sparts, s.tot_cells); + message("maximum depth is %d.", s.maxdepth); + fflush(stdout); + } - /* Initialise the table of Ewald corrections for the gravity checks */ -#ifdef SWIFT_GRAVITY_FORCE_CHECKS - if (periodic) gravity_exact_force_ewald_init(dim[0]); -#endif + /* Verify that each particle is in it's proper cell. */ + if (talking && !dry_run) { + int icount = 0; + space_map_cells_pre(&s, 0, &map_cellcheck, &icount); + message("map_cellcheck picked up %i parts.", icount); + } - /* Initialise the external potential properties */ - struct external_potential potential; - if (with_external_gravity) - potential_init(params, &prog_const, &us, &s, &potential); - if (myrank == 0) potential_print(&potential); - - /* Initialise the cooling function properties */ - struct cooling_function_data cooling_func; - if (with_cooling) cooling_init(params, &us, &prog_const, &cooling_func); - if (myrank == 0) cooling_print(&cooling_func); - - /* Initialise the chemistry */ - struct chemistry_data chemistry; - chemistry_init(params, &us, &prog_const, &chemistry); - if (myrank == 0) chemistry_print(&chemistry); - - /* Initialise the feedback properties */ - struct sourceterms sourceterms; - if (with_sourceterms) sourceterms_init(params, &us, &sourceterms); - if (with_sourceterms && myrank == 0) sourceterms_print(&sourceterms); - - /* Construct the engine policy */ - int engine_policies = ENGINE_POLICY | engine_policy_steal; - if (with_drift_all) engine_policies |= engine_policy_drift_all; - if (with_mpole_reconstruction) - engine_policies |= engine_policy_reconstruct_mpoles; - if (with_hydro) engine_policies |= engine_policy_hydro; - if (with_self_gravity) engine_policies |= engine_policy_self_gravity; - if (with_external_gravity) engine_policies |= engine_policy_external_gravity; - if (with_cosmology) engine_policies |= engine_policy_cosmology; - if (with_cooling) engine_policies |= engine_policy_cooling; - if (with_sourceterms) engine_policies |= engine_policy_sourceterms; - if (with_stars) engine_policies |= engine_policy_stars; - - /* Initialize the engine with the space and policies. */ - if (myrank == 0) clocks_gettime(&tic); - struct engine e; - engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, N_total[0], - N_total[1], with_aff, engine_policies, talking, &reparttype, &us, - &prog_const, &hydro_properties, &gravity_properties, &potential, - &cooling_func, &chemistry, &sourceterms); - if (myrank == 0) { - clocks_gettime(&toc); - message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), - clocks_getunit()); - fflush(stdout); - } + /* Verify the maximal depth of cells. */ + if (talking && !dry_run) { + int data[2] = {s.maxdepth, 0}; + space_map_cells_pre(&s, 0, &map_maxdepth, data); + message("nr of cells at depth %i is %i.", data[0], data[1]); + } -/* Init the runner history. */ -#ifdef HIST - for (k = 0; k < runner_hist_N; k++) runner_hist_bins[k] = 0; -#endif + /* Initialise the external potential properties */ + struct external_potential potential; + if (with_external_gravity) + potential_init(params, &prog_const, &us, &s, &potential); + if (myrank == 0) potential_print(&potential); + + /* Initialise the cooling function properties */ + struct cooling_function_data cooling_func; + if (with_cooling) cooling_init(params, &us, &prog_const, &cooling_func); + if (myrank == 0) cooling_print(&cooling_func); + + /* Initialise the chemistry */ + struct chemistry_data chemistry; + chemistry_init(params, &us, &prog_const, &chemistry); + if (myrank == 0) chemistry_print(&chemistry); + + /* Initialise the feedback properties */ + struct sourceterms sourceterms; + if (with_sourceterms) sourceterms_init(params, &us, &sourceterms); + if (with_sourceterms && myrank == 0) sourceterms_print(&sourceterms); + + /* Construct the engine policy */ + int engine_policies = ENGINE_POLICY | engine_policy_steal; + if (with_drift_all) engine_policies |= engine_policy_drift_all; + if (with_mpole_reconstruction) + engine_policies |= engine_policy_reconstruct_mpoles; + if (with_hydro) engine_policies |= engine_policy_hydro; + if (with_self_gravity) engine_policies |= engine_policy_self_gravity; + if (with_external_gravity) + engine_policies |= engine_policy_external_gravity; + if (with_cosmology) engine_policies |= engine_policy_cosmology; + if (with_cooling) engine_policies |= engine_policy_cooling; + if (with_sourceterms) engine_policies |= engine_policy_sourceterms; + if (with_stars) engine_policies |= engine_policy_stars; + + /* Initialize the engine with the space and policies. */ + if (myrank == 0) clocks_gettime(&tic); + engine_init(&e, &s, params, N_total[0], N_total[1], engine_policies, + talking, &reparttype, &us, &prog_const, &hydro_properties, + &gravity_properties, &potential, &cooling_func, &chemistry, + &sourceterms); + engine_config(0, &e, params, nr_nodes, myrank, nr_threads, with_aff, + talking, restart_file); + if (myrank == 0) { + clocks_gettime(&toc); + message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); + fflush(stdout); + } - /* Get some info to the user. */ - if (myrank == 0) { - long long N_DM = N_total[1] - N_total[2] - N_total[0]; - message( - "Running on %lld gas particles, %lld star particles and %lld DM " - "particles (%lld gravity particles)", - N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]); - message( - "from t=%.3e until t=%.3e with %d threads and %d queues (dt_min=%.3e, " - "dt_max=%.3e)...", - e.timeBegin, e.timeEnd, e.nr_threads, e.sched.nr_queues, e.dt_min, - e.dt_max); - fflush(stdout); + /* Get some info to the user. */ + if (myrank == 0) { + long long N_DM = N_total[1] - N_total[2] - N_total[0]; + message( + "Running on %lld gas particles, %lld star particles and %lld DM " + "particles (%lld gravity particles)", + N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]); + message( + "from t=%.3e until t=%.3e with %d threads and %d queues " + "(dt_min=%.3e, " + "dt_max=%.3e)...", + e.timeBegin, e.timeEnd, e.nr_threads, e.sched.nr_queues, e.dt_min, + e.dt_max); + fflush(stdout); + } } /* Time to say good-bye if this was not a serious run. */ @@ -695,18 +805,31 @@ int main(int argc, char *argv[]) { return 0; } +/* Initialise the table of Ewald corrections for the gravity checks */ +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + if (periodic) gravity_exact_force_ewald_init(e.s->dim[0]); +#endif + +/* Init the runner history. */ +#ifdef HIST + for (k = 0; k < runner_hist_N; k++) runner_hist_bins[k] = 0; +#endif + + if (!restart) { + #ifdef WITH_MPI - /* Split the space. */ - engine_split(&e, &initial_partition); - engine_redistribute(&e); + /* Split the space. */ + engine_split(&e, &initial_partition); + engine_redistribute(&e); #endif - /* Initialise the particles */ - engine_init_particles(&e, flag_entropy_ICs, clean_h_values); + /* Initialise the particles */ + engine_init_particles(&e, flag_entropy_ICs, clean_h_values); - /* Write the state of the system before starting time integration. */ - engine_dump_snapshot(&e); - engine_print_stats(&e); + /* Write the state of the system before starting time integration. */ + engine_dump_snapshot(&e); + engine_print_stats(&e); + } /* Legend */ if (myrank == 0) @@ -717,8 +840,16 @@ int main(int argc, char *argv[]) { /* File for the timers */ if (with_verbose_timers) timers_open_file(myrank); + /* Create a name for restart file of this rank. */ + if (restart_genname(restart_dir, restart_name, e.nodeID, restart_file, 200) != + 0) + error("Failed to generate restart filename"); + /* Main simulation loop */ - for (int j = 0; !engine_is_done(&e) && e.step - 1 != nsteps; j++) { + /* ==================== */ + int force_stop = 0; + for (int j = 0; !engine_is_done(&e) && e.step - 1 != nsteps && !force_stop; + j++) { /* Reset timers */ timers_reset_all(); @@ -729,6 +860,21 @@ int main(int argc, char *argv[]) { /* Print the timers. */ if (with_verbose_timers) timers_print(e.step); + /* Every so often allow the user to stop the application and dump the + * restart + * files. */ + if (j % restart_stop_steps == 0) { + force_stop = restart_stop_now(restart_dir, 0); + if (myrank == 0 && force_stop) + message("Forcing application exit, dumping restart files..."); + } + + /* Also if using nsteps to exit, will not have saved any restarts on exit, + * make + * sure we do that (useful in testing only). */ + if (force_stop || (e.restart_onexit && e.step - 1 == nsteps)) + engine_dump_restarts(&e, 0, 1); + #ifdef SWIFT_DEBUG_TASKS /* Dump the task data using the given frequency. */ if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) { @@ -853,6 +999,10 @@ int main(int argc, char *argv[]) { error("call to MPI_Finalize failed with error %i.", res); #endif + /* Remove the stop file if used. Do this anyway, we could have missed the + * stop file if normal exit happened first. */ + if (myrank == 0) force_stop = restart_stop_now(restart_dir, 1); + /* Clean everything */ if (with_verbose_timers) timers_close_file(); engine_clean(&e); diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 19c5c95be1f1a9fcaa4e790fb24fd0f60d1ef4c2..36da7a1e9971e03124927d0dbb7fe0ca52772eb1 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -75,6 +75,15 @@ InitialConditions: shift_z: 0. replicate: 2 # (Optional) Replicate all particles along each axis a given number of times. Default 1. +# Parameters controlling restarts +Restarts: + enable: 1 # (Optional) whether to enable dumping restarts at fixed intervals. + onexit: 0 # (Optional) whether to dump restarts on exit (*needs enable*) + subdir: restart # (Optional) name of subdirectory for restart files. + basename: swift # (Optional) prefix used in naming restart files. + delta_hours: 6.0 # (Optional) decimal hours between dumps of restart files. + stop_steps: 100 # (Optional) how many steps to process before checking if the <subdir>/stop file exists. When present the application will attempt to exit early, dumping restart files first. + # Parameters governing domain decomposition DomainDecomposition: initial_type: simple_metis # (Optional) The initial decomposition strategy: "grid", diff --git a/src/Makefile.am b/src/Makefile.am index 690c5a0589e7cbd71ec40c2dd2bcdd53d848bd4b..82b7aa73e10c1fa0b369886c48322985aadbc618 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -47,7 +47,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h \ - chemistry.h chemistry_io.h chemistry_struct.h + chemistry.h chemistry_io.h chemistry_struct.h restart.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ @@ -59,8 +59,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ statistics.c runner_doiact_vec.c profiler.c dump.c logger.c \ part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ - chemistry.c - + chemistry.c restart.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/clocks.c b/src/clocks.c index d5f0e571fe0a4694c4088bb05014fafa99d60488..4b8b269143bb3dba606c77a00eaf12b3b614eb8f 100644 --- a/src/clocks.c +++ b/src/clocks.c @@ -45,7 +45,7 @@ static unsigned long long clocks_cpufreq = 0; static ticks clocks_start = 0; /* The units of any returned times. */ -static char *clocks_units[] = {"ms", "ticks"}; +static char *clocks_units[] = {"ms", "~ms"}; static int clocks_units_index = 0; static double clocks_units_scale = 1000.0; @@ -176,11 +176,12 @@ static void clocks_estimate_cpufreq() { } #endif - /* If all fails just report ticks in any times. */ + /* If all fails just report ticks for a notional 2.6GHz machine. */ if (clocks_cpufreq == 0) { - clocks_cpufreq = 1; + unsigned long long maxfreq = 2600000; + clocks_cpufreq = maxfreq * 1000; clocks_units_index = 1; - clocks_units_scale = 1.0; + clocks_units_scale = 1000.0; } } @@ -216,6 +217,22 @@ double clocks_from_ticks(ticks tics) { return ((double)tics / (double)clocks_get_cpufreq() * clocks_units_scale); } +/** + * @brief Convert a number of milli seconds into ticks, if possible. + * + * Only an approximation as based on how well we have estimated the + * rtc frequency. Should be good for machines that support constant_rtc + * and clock_gettime(), and reasonable for most Linux machines, otherwise + * a guess will just be returned. See clocks_getunit() for the actual units. + * + * @param ms a number of "milliseconds" to convert to ticks + * + * @result the number of ticks, if possible. + */ +ticks clocks_to_ticks(double ms) { + return (ticks)(ms * (double)clocks_get_cpufreq() / clocks_units_scale); +} + /** * @brief return the time units. * diff --git a/src/clocks.h b/src/clocks.h index 4c69e130d9bfc5e61fb03fc9820ffc76d2440dc4..bdb3a6651e52f5b165e644015b91f96aa5812d57 100644 --- a/src/clocks.h +++ b/src/clocks.h @@ -39,6 +39,7 @@ const char *clocks_getunit(); void clocks_set_cpufreq(unsigned long long freq); unsigned long long clocks_get_cpufreq(); double clocks_from_ticks(ticks tics); +ticks clocks_to_ticks(double interval); double clocks_diff_ticks(ticks tic, ticks toc); const char *clocks_get_timesincestart(); diff --git a/src/cooling.c b/src/cooling.c index 9452857117367cfc1d4d3eab262c73eba555d4f5..89a04b30f8ce3f106d90a6db1364a3caadba6deb 100644 --- a/src/cooling.c +++ b/src/cooling.c @@ -22,6 +22,7 @@ /* This object's header. */ #include "cooling.h" +#include "restart.h" /** * @brief Initialises the cooling properties. @@ -52,3 +53,28 @@ void cooling_print(const struct cooling_function_data* cooling) { cooling_print_backend(cooling); } + +/** + * @brief Write a hydro_props struct to the given FILE as a stream of bytes. + * + * @param cooling the struct + * @param stream the file stream + */ +void cooling_struct_dump(const struct cooling_function_data* cooling, + FILE* stream) { + restart_write_blocks((void*)cooling, sizeof(struct cooling_function_data), 1, + stream, "cooling", "cooling function"); +} + +/** + * @brief Restore a hydro_props struct from the given FILE as a stream of + * bytes. + * + * @param cooling the struct + * @param stream the file stream + */ +void cooling_struct_restore(const struct cooling_function_data* cooling, + FILE* stream) { + restart_read_blocks((void*)cooling, sizeof(struct cooling_function_data), 1, + stream, NULL, "cooling function"); +} diff --git a/src/cooling.h b/src/cooling.h index ff15d95d39f857b04ed08db43b488d5f4d8ed31f..9d1001d360a1816837381e9aa52b17ba47f50fce 100644 --- a/src/cooling.h +++ b/src/cooling.h @@ -50,4 +50,10 @@ void cooling_init(const struct swift_params* parameter_file, void cooling_print(const struct cooling_function_data* cooling); +/* Dump/restore. */ +void cooling_struct_dump(const struct cooling_function_data* cooling, + FILE* stream); +void cooling_struct_restore(const struct cooling_function_data* cooling, + FILE* stream); + #endif /* SWIFT_COOLING_H */ diff --git a/src/engine.c b/src/engine.c index 11fac9f057e4dc95c325aa207f4fc786db9acf58..7967bdf01aa726655326998eb020a0ed0492ba31 100644 --- a/src/engine.c +++ b/src/engine.c @@ -52,6 +52,7 @@ #include "atomic.h" #include "cell.h" #include "clocks.h" +#include "cooling.h" #include "cycle.h" #include "debug.h" #include "error.h" @@ -64,10 +65,12 @@ #include "partition.h" #include "profiler.h" #include "proxy.h" +#include "restart.h" #include "runner.h" #include "serial_io.h" #include "single_io.h" #include "sort_part.h" +#include "sourceterms.h" #include "statistics.h" #include "timers.h" #include "tools.h" @@ -3726,10 +3729,11 @@ void engine_prepare(struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS if (e->forcerepart || e->forcerebuild) { /* Check that all cells have been drifted to the current time. - * That can include cells that have not - * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_old, - e->policy & engine_policy_self_gravity); + * That can include cells that have not previously been active on this + * rank. Skip if haven't got any cells (yet). */ + if (e->s->cells_top != NULL) + space_check_drift_point(e->s, e->ti_old, + e->policy & engine_policy_self_gravity); } #endif @@ -3742,7 +3746,7 @@ void engine_prepare(struct engine *e) { /* Unskip active tasks and check for rebuild */ engine_unskip(e); - /* Re-rank the tasks every now and then. */ + /* Re-rank the tasks every now and then. XXX this never executes. */ if (e->tasks_age % engine_tasksreweight == 1) { scheduler_reweight(&e->sched, e->verbose); } @@ -4440,7 +4444,7 @@ void engine_step(struct engine *e) { gravity_exact_force_check(e->s, e, 1e-1); #endif - /* Let's trigger a rebuild every-so-often for good measure */ + /* Let's trigger a non-SPH rebuild every-so-often for good measure */ if (!(e->policy & engine_policy_hydro) && // MATTHIEU improve this (e->policy & engine_policy_self_gravity) && e->step % 20 == 0) e->forcerebuild = 1; @@ -4453,9 +4457,8 @@ void engine_step(struct engine *e) { e->forcerebuild = e->collect_group1.forcerebuild; /* Save some statistics ? */ - if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) { + if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) e->save_stats = 1; - } /* Do we want a snapshot? */ if (e->ti_end_min >= e->ti_nextSnapshot && e->ti_nextSnapshot > 0) @@ -4463,8 +4466,9 @@ void engine_step(struct engine *e) { /* Drift everybody (i.e. what has not yet been drifted) */ /* to the current time */ - if (e->dump_snapshot || e->forcerebuild || e->forcerepart || e->save_stats) - engine_drift_all(e); + int drifted_all = + (e->dump_snapshot || e->forcerebuild || e->forcerepart || e->save_stats); + if (drifted_all) engine_drift_all(e); /* Write a snapshot ? */ if (e->dump_snapshot) { @@ -4504,6 +4508,48 @@ void engine_step(struct engine *e) { /* Time in ticks at the end of this step. */ e->toc_step = getticks(); #endif + + /* Final job is to create a restart file if needed. */ + engine_dump_restarts(e, drifted_all, e->restart_onexit && engine_is_done(e)); +} + +/** + * @brief dump restart files if it is time to do so and dumps are enabled. + * + * @param e the engine. + * @param drifted_all true if a drift_all has just been performed. + * @param force force a dump, if dumping is enabled. + */ +void engine_dump_restarts(struct engine *e, int drifted_all, int force) { + + if (e->restart_dump) { + ticks tic = getticks(); + + /* Dump when the time has arrived, or we are told to. */ + int dump = ((tic > e->restart_next) || force); + +#ifdef WITH_MPI + /* Synchronize this action from rank 0 (ticks may differ between + * machines). */ + MPI_Bcast(&dump, 1, MPI_INT, 0, MPI_COMM_WORLD); +#endif + if (dump) { + + /* Drift all particles first (may have just been done). */ + if (!drifted_all) engine_drift_all(e); + restart_write(e, e->restart_file); + + if (e->verbose) + message("Dumping restart files took %.3f %s", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + /* Time after which next dump will occur. */ + e->restart_next += e->restart_dt; + + /* Flag that we dumped the restarts */ + e->step_props |= engine_step_prop_restarts; + } + } } /** @@ -5098,18 +5144,17 @@ void engine_unpin() { } /** - * @brief init an engine with the given number of threads, queues, and - * the given policy. + * @brief init an engine struct with the necessary properties for the + * simulation. + * + * Note do not use when restarting. Engine initialisation + * is completed by a call to engine_config(). * * @param e The #engine. * @param s The #space in which this #runner will run. * @param params The parsed parameter file. - * @param nr_nodes The number of MPI ranks. - * @param nodeID The MPI rank of this node. - * @param nr_threads The number of threads per MPI rank. * @param Ngas total number of gas particles in the simulation. * @param Ndm total number of gravity particles in the simulation. - * @param with_aff use processor affinity, if supported. * @param policy The queuing policy to use. * @param verbose Is this #engine talkative ? * @param reparttype What type of repartition algorithm are we using ? @@ -5122,41 +5167,28 @@ void engine_unpin() { * @param chemistry The chemistry information. * @param sourceterms The properties of the source terms function. */ -void engine_init(struct engine *e, struct space *s, - const struct swift_params *params, int nr_nodes, int nodeID, - int nr_threads, long long Ngas, long long Ndm, int with_aff, - int policy, int verbose, struct repartition *reparttype, - const struct unit_system *internal_units, - const struct phys_const *physical_constants, - const struct hydro_props *hydro, - const struct gravity_props *gravity, - const struct external_potential *potential, - const struct cooling_function_data *cooling_func, - const struct chemistry_data *chemistry, - struct sourceterms *sourceterms) { +void engine_init( + struct engine *e, struct space *s, const struct swift_params *params, + long long Ngas, long long Ndm, int policy, int verbose, + struct repartition *reparttype, const struct unit_system *internal_units, + const struct phys_const *physical_constants, + const struct hydro_props *hydro, const struct gravity_props *gravity, + const struct external_potential *potential, + const struct cooling_function_data *cooling_func, + const struct chemistry_data *chemistry, struct sourceterms *sourceterms) { /* Clean-up everything */ bzero(e, sizeof(struct engine)); - /* Store the values. */ + /* Store the all values in the fields of the engine. */ e->s = s; - e->nr_threads = nr_threads; e->policy = policy; e->step = 0; - e->nr_nodes = nr_nodes; - e->nodeID = nodeID; e->total_nr_parts = Ngas; e->total_nr_gparts = Ndm; e->proxy_ind = NULL; e->nr_proxies = 0; - e->forcerebuild = 1; - e->forcerepart = 0; e->reparttype = reparttype; - e->dump_snapshot = 0; - e->save_stats = 0; - e->step_props = engine_step_prop_none; - e->links = NULL; - e->nr_links = 0; e->timeBegin = parser_get_param_double(params, "TimeIntegration:time_begin"); e->timeEnd = parser_get_param_double(params, "TimeIntegration:time_end"); e->timeOld = e->timeBegin; @@ -5178,10 +5210,9 @@ void engine_init(struct engine *e, struct space *s, parser_get_opt_param_int(params, "Snapshots:compression", 0); e->snapshotUnits = malloc(sizeof(struct unit_system)); units_init_default(e->snapshotUnits, params, "Snapshots", internal_units); + e->snapshotOutputCount = 0; e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min"); e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max"); - e->file_stats = NULL; - e->file_timesteps = NULL; e->deltaTimeStatistics = parser_get_param_double(params, "Statistics:delta_time"); e->timeLastStatistics = 0; @@ -5200,18 +5231,74 @@ void engine_init(struct engine *e, struct space *s, e->cputime_last_step = 0; e->last_repartition = 0; #endif - engine_rank = nodeID; /* Make the space link back to the engine. */ s->e = e; + /* Setup the timestep */ + e->timeBase = (e->timeEnd - e->timeBegin) / max_nr_timesteps; + e->timeBase_inv = 1.0 / e->timeBase; + e->ti_current = 0; +} + +/** + * @brief configure an engine with the given number of threads, queues + * and core affinity. Also initialises the scheduler and opens various + * output files, computes the next timestep and initialises the + * threadpool. + * + * Assumes the engine is correctly initialised i.e. is restored from a restart + * file or has been setup by engine_init(). When restarting any output log + * files are positioned so that further output is appended. Note that + * parameters are not read from the engine, just the parameter file, this + * allows values derived in this function to be changed between runs. + * When not restarting params should be the same as given to engine_init(). + * + * @param restart true when restarting the application. + * @param e The #engine. + * @param params The parsed parameter file. + * @param nr_nodes The number of MPI ranks. + * @param nodeID The MPI rank of this node. + * @param nr_threads The number of threads per MPI rank. + * @param with_aff use processor affinity, if supported. + * @param verbose Is this #engine talkative ? + * @param restart_file The name of our restart file. + */ +void engine_config(int restart, struct engine *e, + const struct swift_params *params, int nr_nodes, int nodeID, + int nr_threads, int with_aff, int verbose, + const char *restart_file) { + + /* Store the values and initialise global fields. */ + e->nodeID = nodeID; + e->nr_threads = nr_threads; + e->nr_nodes = nr_nodes; + e->proxy_ind = NULL; + e->nr_proxies = 0; + e->forcerebuild = 1; + e->forcerepart = 0; + e->dump_snapshot = 0; + e->save_stats = 0; + e->step_props = engine_step_prop_none; + e->links = NULL; + e->nr_links = 0; + e->file_stats = NULL; + e->file_timesteps = NULL; + e->verbose = verbose; + e->wallclock_time = 0.f; + e->restart_dump = 0; + e->restart_file = restart_file; + e->restart_next = 0; + e->restart_dt = 0; + engine_rank = nodeID; + /* Get the number of queues */ int nr_queues = parser_get_opt_param_int(params, "Scheduler:nr_queues", nr_threads); if (nr_queues <= 0) nr_queues = e->nr_threads; if (nr_queues != nr_threads) message("Number of task queues set to %d", nr_queues); - s->nr_queues = nr_queues; + e->s->nr_queues = nr_queues; /* Deal with affinity. For now, just figure out the number of cores. */ #if defined(HAVE_SETAFFINITY) @@ -5249,7 +5336,7 @@ void engine_init(struct engine *e, struct space *s, } #if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) - if ((policy & engine_policy_cputight) != engine_policy_cputight) { + if ((e->policy & engine_policy_cputight) != engine_policy_cputight) { if (numa_available() >= 0) { if (nodeID == 0) message("prefer NUMA-distant CPUs"); @@ -5345,19 +5432,31 @@ void engine_init(struct engine *e, struct space *s, /* Open some files */ if (e->nodeID == 0) { + + /* When restarting append to these files. */ + char *mode; + if (restart) + mode = "a"; + else + mode = "w"; + char energyfileName[200] = ""; parser_get_opt_param_string(params, "Statistics:energy_file_name", energyfileName, engine_default_energy_file_name); sprintf(energyfileName + strlen(energyfileName), ".txt"); - e->file_stats = fopen(energyfileName, "w"); - fprintf(e->file_stats, - "#%14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s " - "%14s %14s %14s %14s %14s %14s\n", - "Time", "Mass", "E_tot", "E_kin", "E_int", "E_pot", "E_pot_self", - "E_pot_ext", "E_radcool", "Entropy", "p_x", "p_y", "p_z", "ang_x", - "ang_y", "ang_z", "com_x", "com_y", "com_z"); - fflush(e->file_stats); + e->file_stats = fopen(energyfileName, mode); + + if (!restart) { + fprintf( + e->file_stats, + "#%14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s %14s " + "%14s %14s %14s %14s %14s %14s\n", + "Time", "Mass", "E_tot", "E_kin", "E_int", "E_pot", "E_pot_self", + "E_pot_ext", "E_radcool", "Entropy", "p_x", "p_y", "p_z", "ang_x", + "ang_y", "ang_z", "com_x", "com_y", "com_z"); + fflush(e->file_stats); + } char timestepsfileName[200] = ""; parser_get_opt_param_string(params, "Statistics:timestep_file_name", @@ -5366,30 +5465,35 @@ void engine_init(struct engine *e, struct space *s, sprintf(timestepsfileName + strlen(timestepsfileName), "_%d.txt", nr_nodes * nr_threads); - e->file_timesteps = fopen(timestepsfileName, "w"); - fprintf(e->file_timesteps, - "# Host: %s\n# Branch: %s\n# Revision: %s\n# Compiler: %s, " - "Version: %s \n# " - "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic " - "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f " - "+/- %.4f\n# Eta: %f\n", - hostname(), git_branch(), git_revision(), compiler_name(), - compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, - kernel_name, e->hydro_properties->target_neighbours, - e->hydro_properties->delta_neighbours, - e->hydro_properties->eta_neighbours); - - fprintf(e->file_timesteps, - "# Step Properties: Rebuild=%d, Redistribute=%d, Repartition=%d, " - "Statistics=%d, Snapshot=%d\n", - engine_step_prop_rebuild, engine_step_prop_redistribute, - engine_step_prop_repartition, engine_step_prop_statistics, - engine_step_prop_snapshot); - - fprintf(e->file_timesteps, "# %6s %14s %14s %12s %12s %12s %16s [%s] %6s\n", - "Step", "Time", "Time-step", "Updates", "g-Updates", "s-Updates", - "Wall-clock time", clocks_getunit(), "Props"); - fflush(e->file_timesteps); + e->file_timesteps = fopen(timestepsfileName, mode); + + if (!restart) { + fprintf( + e->file_timesteps, + "# Host: %s\n# Branch: %s\n# Revision: %s\n# Compiler: %s, " + "Version: %s \n# " + "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic " + "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f " + "+/- %.4f\n# Eta: %f\n", + hostname(), git_branch(), git_revision(), compiler_name(), + compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, + kernel_name, e->hydro_properties->target_neighbours, + e->hydro_properties->delta_neighbours, + e->hydro_properties->eta_neighbours); + + fprintf(e->file_timesteps, + "# Step Properties: Rebuild=%d, Redistribute=%d, Repartition=%d, " + "Statistics=%d, Snapshot=%d, Restarts=%d\n", + engine_step_prop_rebuild, engine_step_prop_redistribute, + engine_step_prop_repartition, engine_step_prop_statistics, + engine_step_prop_snapshot, engine_step_prop_restarts); + + fprintf(e->file_timesteps, + "# %6s %14s %14s %12s %12s %12s %16s [%s] %6s\n", "Step", "Time", + "Time-step", "Updates", "g-Updates", "s-Updates", + "Wall-clock time", clocks_getunit(), "Props"); + fflush(e->file_timesteps); + } } /* Print policy */ @@ -5418,9 +5522,11 @@ void engine_init(struct engine *e, struct space *s, e->dt_min, e->dt_max); /* Deal with timestep */ - e->timeBase = (e->timeEnd - e->timeBegin) / max_nr_timesteps; - e->timeBase_inv = 1.0 / e->timeBase; - e->ti_current = 0; + if (!restart) { + e->timeBase = (e->timeEnd - e->timeBegin) / max_nr_timesteps; + e->timeBase_inv = 1.0 / e->timeBase; + e->ti_current = 0; + } /* Info about time-steps */ if (e->nodeID == 0) { @@ -5460,6 +5566,34 @@ void engine_init(struct engine *e, struct space *s, /* Find the time of the first output */ engine_compute_next_snapshot_time(e); + /* Whether restarts are enabled. Yes by default. Can be changed on restart. */ + e->restart_dump = parser_get_opt_param_int(params, "Restarts:enable", 1); + + /* Whether restarts should be dumped on exit. Not by default. Can be changed + * on restart. */ + e->restart_onexit = parser_get_opt_param_int(params, "Restarts:onexit", 0); + + /* Hours between restart dumps. Can be changed on restart. */ + float dhours = + parser_get_opt_param_float(params, "Restarts:delta_hours", 6.0); + if (e->nodeID == 0) { + if (e->restart_dump) + message("Restarts will be dumped every %f hours", dhours); + else + message("WARNING: restarts will not be dumped"); + + if (e->verbose && e->restart_onexit) + message("Restarts will be dumped after the final step"); + } + + /* Internally we use ticks, so convert into a delta ticks. Assumes we can + * convert from ticks into milliseconds. */ + e->restart_dt = clocks_to_ticks(dhours * 60.0 * 60.0 * 1000.0); + + /* The first dump will happen no sooner than restart_dt ticks in the + * future. */ + e->restart_next = getticks() + e->restart_dt; + /* Construct types for MPI communications */ #ifdef WITH_MPI part_create_mpi_types(); @@ -5479,16 +5613,24 @@ void engine_init(struct engine *e, struct space *s, /* Expected average for tasks per cell. If set to zero we use a heuristic * guess based on the numbers of cells and how many tasks per cell we expect. + * On restart this number cannot be estimated (no cells yet), so we recover + * from the end of the dumped run. Can be changed on restart. */ e->tasks_per_cell = parser_get_opt_param_int(params, "Scheduler:tasks_per_cell", 0); + int maxtasks = 0; + if (restart) + maxtasks = e->restart_max_tasks; + else + maxtasks = engine_estimate_nr_tasks(e); /* Init the scheduler. */ - scheduler_init(&e->sched, e->s, engine_estimate_nr_tasks(e), nr_queues, - (policy & scheduler_flag_steal), e->nodeID, &e->threadpool); + scheduler_init(&e->sched, e->s, maxtasks, nr_queues, + (e->policy & scheduler_flag_steal), e->nodeID, &e->threadpool); /* Maximum size of MPI task messages, in KB, that should not be buffered, - * that is sent using MPI_Issend, not MPI_Isend. 4Mb by default. + * that is sent using MPI_Issend, not MPI_Isend. 4Mb by default. Can be + * changed on restart. */ e->sched.mpi_message_limit = parser_get_opt_param_int(params, "Scheduler:mpi_message_limit", 4) * 1024; @@ -5645,3 +5787,113 @@ void engine_clean(struct engine *e) { space_clean(e->s); threadpool_clean(&e->threadpool); } + +/** + * @brief Write the engine struct and its contents to the given FILE as a + * stream of bytes. + * + * @param e the engine + * @param stream the file stream + */ +void engine_struct_dump(struct engine *e, FILE *stream) { + + /* Dump the engine. Save the current tasks_per_cell estimate. */ + e->restart_max_tasks = engine_estimate_nr_tasks(e); + restart_write_blocks(e, sizeof(struct engine), 1, stream, "engine", + "engine struct"); + + /* And all the engine pointed data, these use their own dump functions. */ + space_struct_dump(e->s, stream); + units_struct_dump(e->internal_units, stream); + units_struct_dump(e->snapshotUnits, stream); + +#ifdef WITH_MPI + /* Save the partition for restoration. */ + partition_store_celllist(e->s, e->reparttype); + partition_struct_dump(e->reparttype, stream); +#endif + + phys_const_struct_dump(e->physical_constants, stream); + hydro_props_struct_dump(e->hydro_properties, stream); + gravity_props_struct_dump(e->gravity_properties, stream); + potential_struct_dump(e->external_potential, stream); + cooling_struct_dump(e->cooling_func, stream); + sourceterms_struct_dump(e->sourceterms, stream); + parser_struct_dump(e->parameter_file, stream); +} + +/** + * @brief Re-create an engine struct and its contents from the given FILE + * stream. + * + * @param e the engine + * @param stream the file stream + */ +void engine_struct_restore(struct engine *e, FILE *stream) { + + /* Read the engine. */ + restart_read_blocks(e, sizeof(struct engine), 1, stream, NULL, + "engine struct"); + + /* Re-initializations as necessary for our struct and its members. */ + e->sched.tasks = NULL; + e->sched.tasks_ind = NULL; + e->sched.tid_active = NULL; + e->sched.size = 0; + + /* Now for the other pointers, these use their own restore functions. */ + /* Note all this memory leaks, but is used once. */ + struct space *s = malloc(sizeof(struct space)); + space_struct_restore(s, stream); + e->s = s; + s->e = e; + + struct unit_system *us = malloc(sizeof(struct unit_system)); + units_struct_restore(us, stream); + e->internal_units = us; + + us = malloc(sizeof(struct unit_system)); + units_struct_restore(us, stream); + e->snapshotUnits = us; + +#ifdef WITH_MPI + struct repartition *reparttype = malloc(sizeof(struct repartition)); + partition_struct_restore(reparttype, stream); + e->reparttype = reparttype; +#endif + + struct phys_const *physical_constants = malloc(sizeof(struct phys_const)); + phys_const_struct_restore(physical_constants, stream); + e->physical_constants = physical_constants; + + struct hydro_props *hydro_properties = malloc(sizeof(struct hydro_props)); + hydro_props_struct_restore(hydro_properties, stream); + e->hydro_properties = hydro_properties; + + struct gravity_props *gravity_properties = + malloc(sizeof(struct gravity_props)); + gravity_props_struct_restore(gravity_properties, stream); + e->gravity_properties = gravity_properties; + + struct external_potential *external_potential = + malloc(sizeof(struct external_potential)); + potential_struct_restore(external_potential, stream); + e->external_potential = external_potential; + + struct cooling_function_data *cooling_func = + malloc(sizeof(struct cooling_function_data)); + cooling_struct_restore(cooling_func, stream); + e->cooling_func = cooling_func; + + struct sourceterms *sourceterms = malloc(sizeof(struct sourceterms)); + sourceterms_struct_restore(sourceterms, stream); + e->sourceterms = sourceterms; + + struct swift_params *parameter_file = malloc(sizeof(struct swift_params)); + parser_struct_restore(parameter_file, stream); + e->parameter_file = parameter_file; + + /* Want to force a rebuild before using this engine. Wait to repartition.*/ + e->forcerebuild = 1; + e->forcerepart = 0; +} diff --git a/src/engine.h b/src/engine.h index 02103022c6d7e38ae5e8d48054b45d60edf1a45b..25d82a5705aa78757df4bf6b2c880b71f111375b 100644 --- a/src/engine.h +++ b/src/engine.h @@ -83,7 +83,8 @@ enum engine_step_properties { engine_step_prop_redistribute = (1 << 1), engine_step_prop_repartition = (1 << 2), engine_step_prop_statistics = (1 << 3), - engine_step_prop_snapshot = (1 << 4) + engine_step_prop_snapshot = (1 << 4), + engine_step_prop_restarts = (1 << 5) }; /* Some constants */ @@ -194,6 +195,7 @@ struct engine { char snapshotBaseName[PARSER_MAX_LINE_SIZE]; int snapshotCompression; struct unit_system *snapshotUnits; + int snapshotOutputCount; /* Statistics information */ FILE *file_stats; @@ -290,6 +292,24 @@ struct engine { /* Temporary struct to hold a group of deferable properties (in MPI mode * these are reduced together, but may not be required just yet). */ struct collectgroup1 collect_group1; + + /* Whether to dump restart files. */ + int restart_dump; + + /* Whether to dump restart files after the last step. */ + int restart_onexit; + + /* Name of the restart file. */ + const char *restart_file; + + /* Ticks between restart dumps. */ + ticks restart_dt; + + /* Time after which next dump will occur. */ + ticks restart_next; + + /* Maximum number of tasks needed for restarting. */ + int restart_max_tasks; }; /* Function prototypes. */ @@ -301,18 +321,19 @@ void engine_drift_top_multipoles(struct engine *e); void engine_reconstruct_multipoles(struct engine *e); void engine_print_stats(struct engine *e); void engine_dump_snapshot(struct engine *e); -void engine_init(struct engine *e, struct space *s, - const struct swift_params *params, int nr_nodes, int nodeID, - int nr_threads, long long Ngas, long long Ndm, int with_aff, - int policy, int verbose, struct repartition *reparttype, - const struct unit_system *internal_units, - const struct phys_const *physical_constants, - const struct hydro_props *hydro, - const struct gravity_props *gravity, - const struct external_potential *potential, - const struct cooling_function_data *cooling_func, - const struct chemistry_data *chemistry, - struct sourceterms *sourceterms); +void engine_init( + struct engine *e, struct space *s, const struct swift_params *params, + long long Ngas, long long Ndm, int policy, int verbose, + struct repartition *reparttype, const struct unit_system *internal_units, + const struct phys_const *physical_constants, + const struct hydro_props *hydro, const struct gravity_props *gravity, + const struct external_potential *potential, + const struct cooling_function_data *cooling_func, + const struct chemistry_data *chemistry, struct sourceterms *sourceterms); +void engine_config(int restart, struct engine *e, + const struct swift_params *params, int nr_nodes, int nodeID, + int nr_threads, int with_aff, int verbose, + const char *restart_file); void engine_launch(struct engine *e); void engine_prepare(struct engine *e); void engine_init_particles(struct engine *e, int flag_entropy_ICs, @@ -337,4 +358,9 @@ void engine_unpin(); void engine_clean(struct engine *e); int engine_estimate_nr_tasks(struct engine *e); +/* Struct dump/restore support. */ +void engine_struct_dump(struct engine *e, FILE *stream); +void engine_struct_restore(struct engine *e, FILE *stream); +void engine_dump_restarts(struct engine *e, int drifted_all, int final_step); + #endif /* SWIFT_ENGINE_H */ diff --git a/src/gravity_properties.c b/src/gravity_properties.c index 27a5de0a4102cae4ca787c10c60cf3bbc3a983ee..1df421a8b21424dc99e52bc9dbc59560e54d14a5 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -96,3 +96,26 @@ void gravity_props_print_snapshot(hid_t h_grpgrav, io_write_attribute_f(h_grpgrav, "Mesh r_cut_min", p->r_cut_min); } #endif + +/** + * @brief Write a gravity_props struct to the given FILE as a stream of bytes. + * + * @param p the struct + * @param stream the file stream + */ +void gravity_props_struct_dump(const struct gravity_props *p, FILE *stream) { + restart_write_blocks((void *)p, sizeof(struct gravity_props), 1, stream, + "gravity", "gravity props"); +} + +/** + * @brief Restore a gravity_props struct from the given FILE as a stream of + * bytes. + * + * @param p the struct + * @param stream the file stream + */ +void gravity_props_struct_restore(const struct gravity_props *p, FILE *stream) { + restart_read_blocks((void *)p, sizeof(struct gravity_props), 1, stream, NULL, + "gravity props"); +} diff --git a/src/gravity_properties.h b/src/gravity_properties.h index f7b9950052b302a003e5d128191c9dbe68fe875f..826ffd0de05d376b930523c8a5d937e457c6d795 100644 --- a/src/gravity_properties.h +++ b/src/gravity_properties.h @@ -28,6 +28,7 @@ /* Local includes. */ #include "parser.h" +#include "restart.h" /** * @brief Contains all the constants and parameters of the self-gravity scheme @@ -79,4 +80,8 @@ void gravity_props_print_snapshot(hid_t h_grpsph, const struct gravity_props *p); #endif +/* Dump/restore. */ +void gravity_props_struct_dump(const struct gravity_props *p, FILE *stream); +void gravity_props_struct_restore(const struct gravity_props *p, FILE *stream); + #endif /* SWIFT_GRAVITY_PROPERTIES */ diff --git a/src/hydro_properties.c b/src/hydro_properties.c index 995610acb2d46cb62a4fa5fc7ef76b0d9474f504..31e18913b7414110db68c6202c512de5fb90a3c1 100644 --- a/src/hydro_properties.c +++ b/src/hydro_properties.c @@ -127,3 +127,26 @@ void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p) { p->max_smoothing_iterations); } #endif + +/** + * @brief Write a hydro_props struct to the given FILE as a stream of bytes. + * + * @param p the struct + * @param stream the file stream + */ +void hydro_props_struct_dump(const struct hydro_props *p, FILE *stream) { + restart_write_blocks((void *)p, sizeof(struct hydro_props), 1, stream, + "hydroprops", "hydro props"); +} + +/** + * @brief Restore a hydro_props struct from the given FILE as a stream of + * bytes. + * + * @param p the struct + * @param stream the file stream + */ +void hydro_props_struct_restore(const struct hydro_props *p, FILE *stream) { + restart_read_blocks((void *)p, sizeof(struct hydro_props), 1, stream, NULL, + "hydro props"); +} diff --git a/src/hydro_properties.h b/src/hydro_properties.h index a887ccb6df13b649cd1ef1009059c6f08908669c..6d325c35d6ae6a9bcddbfff9ccc4601e026fc4e6 100644 --- a/src/hydro_properties.h +++ b/src/hydro_properties.h @@ -33,6 +33,7 @@ /* Local includes. */ #include "parser.h" +#include "restart.h" /** * @brief Contains all the constants and parameters of the hydro scheme @@ -71,4 +72,8 @@ void hydro_props_init(struct hydro_props *p, const struct swift_params *params); void hydro_props_print_snapshot(hid_t h_grpsph, const struct hydro_props *p); #endif +/* Dump/restore. */ +void hydro_props_struct_dump(const struct hydro_props *p, FILE *stream); +void hydro_props_struct_restore(const struct hydro_props *p, FILE *stream); + #endif /* SWIFT_HYDRO_PROPERTIES */ diff --git a/src/parallel_io.c b/src/parallel_io.c index 3e8cadf18a9468d52c0b32c6cb054d668ad5c37d..d17b7e99e5ec0bb97d87eb1dc50e4a5d40a37f01 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -833,7 +833,6 @@ void write_output_parallel(struct engine* e, const char* baseName, struct gpart* gparts = e->s->gparts; struct gpart* dmparts = NULL; struct spart* sparts = e->s->sparts; - static int outputCount = 0; FILE* xmfFile = 0; /* Number of unassociated gparts */ @@ -842,10 +841,10 @@ void write_output_parallel(struct engine* e, const char* baseName, /* File name */ char fileName[FILENAME_BUFFER_SIZE]; snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, - outputCount); + e->snapshotOutputCount); /* First time, we need to create the XMF file */ - if (outputCount == 0 && mpi_rank == 0) xmf_create_file(baseName); + if (e->snapshotOutputCount == 0 && mpi_rank == 0) xmf_create_file(baseName); /* Prepare the XMF file for the new entry */ if (mpi_rank == 0) xmfFile = xmf_prepare_file(baseName); @@ -1158,7 +1157,8 @@ void write_output_parallel(struct engine* e, const char* baseName, #endif /* Write LXMF file descriptor */ - if (mpi_rank == 0) xmf_write_outputfooter(xmfFile, outputCount, e->time); + if (mpi_rank == 0) + xmf_write_outputfooter(xmfFile, e->snapshotOutputCount, e->time); #ifdef IO_SPEED_MEASUREMENT MPI_Barrier(MPI_COMM_WORLD); @@ -1193,7 +1193,7 @@ void write_output_parallel(struct engine* e, const char* baseName, clocks_getunit()); #endif - ++outputCount; + e->snapshotOutputCount++; } #endif /* HAVE_HDF5 */ diff --git a/src/parser.c b/src/parser.c index 0b608b29263342240af68fd99d2fdd3241e2a1e6..cbd63e913010066cbd39418b81609f4669404213 100644 --- a/src/parser.c +++ b/src/parser.c @@ -33,6 +33,7 @@ /* Local headers. */ #include "common_io.h" #include "error.h" +#include "restart.h" #define PARSER_COMMENT_STRING "#" #define PARSER_COMMENT_CHAR '#' @@ -784,3 +785,26 @@ void parser_write_params_to_hdf5(const struct swift_params *params, hid_t grp) { io_write_attribute_s(grp, params->data[i].name, params->data[i].value); } #endif + +/** + * @brief Write a swift_params struct to the given FILE as a stream of bytes. + * + * @param params the struct + * @param stream the file stream + */ +void parser_struct_dump(const struct swift_params *params, FILE *stream) { + restart_write_blocks((void *)params, sizeof(struct swift_params), 1, stream, + "parameters", "parameters"); +} + +/** + * @brief Restore a swift_params struct from the given FILE as a stream of + * bytes. + * + * @param params the struct + * @param stream the file stream + */ +void parser_struct_restore(const struct swift_params *params, FILE *stream) { + restart_read_blocks((void *)params, sizeof(struct swift_params), 1, stream, + NULL, "parameters"); +} diff --git a/src/parser.h b/src/parser.h index bab6d8b25f5334546ac2aaf39a3f25ef7fb6ff57..4e61b16ab53b3688e60f85a42e786c44b095120a 100644 --- a/src/parser.h +++ b/src/parser.h @@ -83,4 +83,8 @@ void parser_get_opt_param_string(const struct swift_params *params, void parser_write_params_to_hdf5(const struct swift_params *params, hid_t grp); #endif +/* Dump/restore. */ +void parser_struct_dump(const struct swift_params *params, FILE *stream); +void parser_struct_restore(const struct swift_params *params, FILE *stream); + #endif /* SWIFT_PARSER_H */ diff --git a/src/partition.c b/src/partition.c index 297ac71ac75ea4a5c5446033418b1bb4352b116b..b46172d859e1e19221ee86ecf2c28650715281ef 100644 --- a/src/partition.c +++ b/src/partition.c @@ -50,6 +50,7 @@ #include "debug.h" #include "error.h" #include "partition.h" +#include "restart.h" #include "space.h" #include "tools.h" @@ -1161,6 +1162,10 @@ void partition_init(struct partition *partition, "Invalid DomainDecomposition:minfrac, must be greater than 0 and less " "than equal to 1"); + /* Clear the celllist for use. */ + repartition->ncelllist = 0; + repartition->celllist = NULL; + #else error("SWIFT was not compiled with MPI support"); #endif @@ -1189,7 +1194,8 @@ static int check_complete(struct space *s, int verbose, int nregions) { if (s->cells_top[i].nodeID <= nregions) present[s->cells_top[i].nodeID]++; else - message("Bad nodeID: %d", s->cells_top[i].nodeID); + message("Bad nodeID: s->cells_top[%d].nodeID = %d", i, + s->cells_top[i].nodeID); } for (int i = 0; i < nregions; i++) { if (!present[i]) { @@ -1250,3 +1256,87 @@ int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeIDs, /* Check we have all nodeIDs present in the resample. */ return check_complete(s, 1, nr_nodes + 1); } + +/** + * @brief save the nodeIDs of the current top-level cells by adding them to a + * repartition struct. Used when restarting application. + * + * @param s the space with the top-level cells. + * @param reparttype struct to update with the a list of nodeIDs. + * + */ +void partition_store_celllist(struct space *s, struct repartition *reparttype) { + if (reparttype->celllist != NULL) free(reparttype->celllist); + reparttype->celllist = malloc(sizeof(int) * s->nr_cells); + reparttype->ncelllist = s->nr_cells; + if (reparttype->celllist == NULL) error("Failed to allocate celllist"); + + for (int i = 0; i < s->nr_cells; i++) { + reparttype->celllist[i] = s->cells_top[i].nodeID; + } +} + +/** + * @brief restore the saved list of nodeIDs by applying them to the + * top-level cells of a space. Used when restarting application. + * + * @param s the space with the top-level cells. + * @param reparttype struct with the list of nodeIDs saved, + * + */ +void partition_restore_celllist(struct space *s, + struct repartition *reparttype) { + if (reparttype->ncelllist > 0) { + if (reparttype->ncelllist == s->nr_cells) { + for (int i = 0; i < s->nr_cells; i++) { + s->cells_top[i].nodeID = reparttype->celllist[i]; + } + if (!check_complete(s, 1, s->e->nr_nodes)) { + error("Not all ranks are present in the restored partition"); + } + } else { + error( + "Cannot apply the saved partition celllist as the number of" + "top-level cells (%d) is different to the saved number (%d)", + s->nr_cells, reparttype->ncelllist); + } + } +} + +/** + * @brief Write a repartition struct to the given FILE as a stream of bytes. + * + * @param reparttype the struct + * @param stream the file stream + */ +void partition_struct_dump(struct repartition *reparttype, FILE *stream) { + restart_write_blocks(reparttype, sizeof(struct repartition), 1, stream, + "repartition", "repartition params"); + + /* Also save the celllist, if we have one. */ + if (reparttype->ncelllist > 0) + restart_write_blocks(reparttype->celllist, + sizeof(int) * reparttype->ncelllist, 1, stream, + "celllist", "repartition celllist"); +} + +/** + * @brief Restore a repartition struct from the given FILE as a stream of + * bytes. + * + * @param reparttype the struct + * @param stream the file stream + */ +void partition_struct_restore(struct repartition *reparttype, FILE *stream) { + restart_read_blocks(reparttype, sizeof(struct repartition), 1, stream, NULL, + "repartition params"); + + /* Also restore the celllist, if we have one. */ + if (reparttype->ncelllist > 0) { + reparttype->celllist = malloc(sizeof(int) * reparttype->ncelllist); + if (reparttype->celllist == NULL) error("Failed to allocate celllist"); + restart_read_blocks(reparttype->celllist, + sizeof(int) * reparttype->ncelllist, 1, stream, NULL, + "repartition celllist"); + } +} diff --git a/src/partition.h b/src/partition.h index c3eade190c9514efb4c44011e3990745e20846fd..3ad479c6b1b343106ac736e2d9c77aa9bc93cf60 100644 --- a/src/partition.h +++ b/src/partition.h @@ -57,6 +57,10 @@ struct repartition { enum repartition_type type; float trigger; float minfrac; + + /* The partition as a cell list, if used. */ + int ncelllist; + int *celllist; }; /* Simple descriptions of types for reports. */ @@ -74,4 +78,11 @@ void partition_init(struct partition *partition, struct repartition *repartition, const struct swift_params *params, int nr_nodes); +/* Dump/restore. */ +void partition_store_celllist(struct space *s, struct repartition *reparttype); +void partition_restore_celllist(struct space *s, + struct repartition *reparttype); +void partition_struct_dump(struct repartition *reparttype, FILE *stream); +void partition_struct_restore(struct repartition *reparttype, FILE *stream); + #endif /* SWIFT_PARTITION_H */ diff --git a/src/physical_constants.c b/src/physical_constants.c index c851578c96e146261e9512f6899c7b82a8d91097..b70b80574e7f73fe3568e18f0809350548564c4b 100644 --- a/src/physical_constants.c +++ b/src/physical_constants.c @@ -27,6 +27,7 @@ /* Local headers. */ #include "error.h" #include "physical_constants_cgs.h" +#include "restart.h" /** * @brief Converts physical constants to the internal unit system @@ -34,8 +35,8 @@ * @param us The current internal system of units. * @param internal_const The physical constants to initialize. */ -void phys_const_init(struct unit_system* us, - struct phys_const* internal_const) { +void phys_const_init(struct unit_system *us, + struct phys_const *internal_const) { /* Units are declared as {U_M, U_L, U_t, U_I, U_T} */ @@ -105,7 +106,7 @@ void phys_const_init(struct unit_system* us, units_general_cgs_conversion_factor(us, dimension_length); } -void phys_const_print(struct phys_const* internal_const) { +void phys_const_print(struct phys_const *internal_const) { message("%25s = %e", "Gravitational constant", internal_const->const_newton_G); @@ -121,3 +122,28 @@ void phys_const_print(struct phys_const* internal_const) { message("%25s = %e", "Parsec", internal_const->const_parsec); message("%25s = %e", "Solar mass", internal_const->const_solar_mass); } + +/** + * @brief Write a phys_const struct to the given FILE as a stream of bytes. + * + * @param internal_const the struct + * @param stream the file stream + */ +void phys_const_struct_dump(const struct phys_const *internal_const, + FILE *stream) { + restart_write_blocks((void *)internal_const, sizeof(struct phys_const), 1, + stream, "physconst", "phys_const params"); +} + +/** + * @brief Restore a phys_const struct from the given FILE as a stream of + * bytes. + * + * @param internal_const the struct + * @param stream the file stream + */ +void phys_const_struct_restore(const struct phys_const *internal_const, + FILE *stream) { + restart_read_blocks((void *)internal_const, sizeof(struct phys_const), 1, + stream, NULL, "phys_const params"); +} diff --git a/src/physical_constants.h b/src/physical_constants.h index 3731c0ef565c6159592ad2de96a222efc6cf43f2..39c6c52cba311228d6152fd59e8a6001d11a3ac1 100644 --- a/src/physical_constants.h +++ b/src/physical_constants.h @@ -82,4 +82,10 @@ void phys_const_init(struct unit_system* us, struct phys_const* internal_const); void phys_const_print(struct phys_const* internal_const); +/* Dump/restore. */ +void phys_const_struct_dump(const struct phys_const* internal_const, + FILE* stream); +void phys_const_struct_restore(const struct phys_const* internal_const, + FILE* stream); + #endif /* SWIFT_PHYSICAL_CONSTANTS_H */ diff --git a/src/potential.c b/src/potential.c index 94c2a6cc9412d1fc76b70ddebb2edba7878e6209..1fda6fc8752ff626a5262d7824ea68fd3bc16d46 100644 --- a/src/potential.c +++ b/src/potential.c @@ -23,6 +23,7 @@ /* This object's header. */ #include "potential.h" +#include "restart.h" /** * @brief Initialises the external potential properties in the internal system @@ -51,3 +52,29 @@ void potential_print(const struct external_potential* potential) { potential_print_backend(potential); } + +/** + * @brief Write an external_potential struct to the given FILE as a stream of + * bytes. + * + * @param potential the struct + * @param stream the file stream + */ +void potential_struct_dump(const struct external_potential* potential, + FILE* stream) { + restart_write_blocks((void*)potential, sizeof(struct external_potential), 1, + stream, "externalpotential", "external potential"); +} + +/** + * @brief Restore a external_potential struct from the given FILE as a stream of + * bytes. + * + * @param potential the struct + * @param stream the file stream + */ +void potential_struct_restore(const struct external_potential* potential, + FILE* stream) { + restart_read_blocks((void*)potential, sizeof(struct external_potential), 1, + stream, NULL, "external potential"); +} diff --git a/src/potential.h b/src/potential.h index e6ab9a5bd6bd91801eae0b3f1e3d8f65778f5065..bcb3fd284021fba339ba494c90b81c91bd2ce72f 100644 --- a/src/potential.h +++ b/src/potential.h @@ -50,4 +50,10 @@ void potential_init(const struct swift_params* parameter_file, void potential_print(const struct external_potential* potential); +/* Dump/restore. */ +void potential_struct_dump(const struct external_potential* potential, + FILE* stream); +void potential_struct_restore(const struct external_potential* potential, + FILE* stream); + #endif /* SWIFT_POTENTIAL_H */ diff --git a/src/restart.c b/src/restart.c new file mode 100644 index 0000000000000000000000000000000000000000..a507555fe62277f2bec117299f9bc12d87985153 --- /dev/null +++ b/src/restart.c @@ -0,0 +1,285 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/** + * @file restart.c + * @brief support for SWIFT restarts + */ + +/* Config parameters. */ +#include "../config.h" + +/* Standard headers. */ +#include <errno.h> +#include <glob.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "engine.h" +#include "error.h" +#include "restart.h" +#include "version.h" + +/* The signature for restart files. */ +#define SWIFT_RESTART_SIGNATURE "SWIFT-restart-file" +#define SWIFT_RESTART_END_SIGNATURE "SWIFT-restart-file:end" + +#define FNAMELEN 200 +#define LABLEN 20 + +/* Structure for a dumped header. */ +struct header { + size_t len; /* Total length of data in bytes. */ + char label[LABLEN + 1]; /* A label for data */ +}; + +/** + * @brief generate a name for a restart file. + * + * @param dir the directory of restart files. + * @param basename the basename of the restart files. + * @param nodeID a unique integer, usually the nodeID of the engine. + * @param name pointer to a string to hold the result. + * @param size length of name. + * + * @result 0 if the string was large enough. + */ +int restart_genname(const char *dir, const char *basename, int nodeID, + char *name, int size) { + int n = snprintf(name, size, "%s/%s_%06d.rst", dir, basename, nodeID); + return (n >= size); +} + +/** + * @brief locate all the restart files in the given directory with the given + * basename. + * @param dir the directory of restart files. + * @param basename the basename of the restart files. + * @param nfiles the number of restart files located. + * + * @result pointer to an array of strings with all the filenames found, + * these should be collated using the current locale, i.e. sorted + * alphabetically (so make sure the filenames are zero padded to get + * numeric ordering). Release by calling restart_locate_free(). + */ +char **restart_locate(const char *dir, const char *basename, int *nfiles) { + *nfiles = 0; + + /* Construct the glob pattern for locating files. */ + char pattern[FNAMELEN]; + if (snprintf(pattern, FNAMELEN, "%s/%s_[0-9]*.rst", dir, basename) < + FNAMELEN) { + + glob_t globbuf; + char **files = NULL; + if (glob(pattern, 0, NULL, &globbuf) == 0) { + *nfiles = globbuf.gl_pathc; + files = malloc(sizeof(char *) * *nfiles); + for (int i = 0; i < *nfiles; i++) { + files[i] = strdup(globbuf.gl_pathv[i]); + } + } + + globfree(&globbuf); + return files; + } + error("Failed to construct pattern to locate restart files"); + + return NULL; +} + +/** + * @brief Release the memory allocated to hold the restart file names. + * + * @param nfiles the number of restart files located. + * @param files the list of filenames found in call to restart_locate(). + */ +void restart_locate_free(int nfiles, char **files) { + for (int i = 0; i < nfiles; i++) { + free(files[i]); + } + free(files); +} + +/** + * @brief Write a restart file for the state of the given engine struct. + * + * @param e the engine with our state information. + * @param filename name of the file to write the restart data to. + */ +void restart_write(struct engine *e, const char *filename) { + + FILE *stream = fopen(filename, "w"); + if (stream == NULL) + error("Failed to open restart file: %s (%s)", filename, strerror(errno)); + + /* Dump our signature and version. */ + restart_write_blocks(SWIFT_RESTART_SIGNATURE, strlen(SWIFT_RESTART_SIGNATURE), + 1, stream, "signature", "SWIFT signature"); + restart_write_blocks((void *)package_version(), strlen(package_version()), 1, + stream, "version", "SWIFT version"); + + engine_struct_dump(e, stream); + + /* Just an END statement to spot truncated files. */ + restart_write_blocks(SWIFT_RESTART_END_SIGNATURE, + strlen(SWIFT_RESTART_END_SIGNATURE), + 1, stream, "endsignature", "SWIFT end signature"); + + fclose(stream); +} + +/** + * @brief Read a restart file to construct a saved engine struct state. + * + * @param e the engine to recover from the saved state. + * @param filename name of the file containing the staved state. + */ +void restart_read(struct engine *e, const char *filename) { + + FILE *stream = fopen(filename, "r"); + if (stream == NULL) + error("Failed to open restart file: %s (%s)", filename, strerror(errno)); + + /* Get our version and signature back. These should match. */ + char signature[strlen(SWIFT_RESTART_SIGNATURE) + 1]; + int len = strlen(SWIFT_RESTART_SIGNATURE); + restart_read_blocks(signature, len, 1, stream, NULL, "SWIFT signature"); + signature[len] = '\0'; + if (strncmp(signature, SWIFT_RESTART_SIGNATURE, len) != 0) + error( + "Do not recognise this as a SWIFT restart file, found '%s' " + "expected '%s'", + signature, SWIFT_RESTART_SIGNATURE); + + char version[FNAMELEN]; + len = strlen(package_version()); + restart_read_blocks(version, len, 1, stream, NULL, "SWIFT version"); + version[len] = '\0'; + + /* It might work! */ + if (strncmp(version, package_version(), len) != 0) + message( + "WARNING: restoring from a different version of SWIFT.\n You have:" + " '%s' and the restarts files are from: '%s'. This may fail" + " badly.", + package_version(), version); + + engine_struct_restore(e, stream); + fclose(stream); +} + +/** + * @brief Read blocks of memory from a file stream into a memory location. + * Exits the application if the read fails and does nothing if the + * size is zero. + * + * @param ptr pointer to the memory + * @param size size of a block + * @param nblocks number of blocks to read + * @param stream the file stream + * @param label the label recovered for the block, needs to be at least 20 + * characters, set to NULL if not required + * @param errstr a context string to qualify any errors. + */ +void restart_read_blocks(void *ptr, size_t size, size_t nblocks, FILE *stream, + char *label, const char *errstr) { + if (size > 0) { + struct header head; + size_t nread = fread(&head, sizeof(struct header), 1, stream); + if (nread != 1) + error("Failed to read the %s header from restart file (%s)", errstr, + strerror(errno)); + + /* Check that the stored length is the same as the expected one. */ + if (head.len != nblocks * size) + error("Mismatched data length in restart file for %s (%zu != %zu)", + errstr, head.len, nblocks * size); + + /* Return label, if required. */ + if (label != NULL) strncpy(label, head.label, LABLEN); + + nread = fread(ptr, size, nblocks, stream); + if (nread != nblocks) + error("Failed to restore %s from restart file (%s)", errstr, + ferror(stream) ? strerror(errno) : "unexpected end of file"); + } +} + +/** + * @brief Write blocks of memory to a file stream from a memory location. + * Exits the application if the write fails and does nothing + * if the size is zero. + * + * @param ptr pointer to the memory + * @param size the blocks + * @param nblocks number of blocks to write + * @param stream the file stream + * @param label a label for the content, can only be 20 characters. + * @param errstr a context string to qualify any errors. + */ +void restart_write_blocks(void *ptr, size_t size, size_t nblocks, FILE *stream, + const char *label, const char *errstr) { + if (size > 0) { + + /* Add a preamble header. */ + struct header head; + head.len = nblocks * size; + strncpy(head.label, label, LABLEN); + head.label[LABLEN] = '\0'; + + /* Now dump it and the data. */ + size_t nwrite = fwrite(&head, sizeof(struct header), 1, stream); + if (nwrite != 1) + error("Failed to save %s header to restart file (%s)", errstr, + strerror(errno)); + + nwrite = fwrite(ptr, size, nblocks, stream); + if (nwrite != nblocks) + error("Failed to save %s to restart file (%s)", errstr, strerror(errno)); + } +} + +/** + * @brief check if the stop file exists in the given directory and optionally + * remove it if found. + * + * @param dir the directory of restart files. + * @param cleanup remove the file if found. Should only do this from one rank + * once all ranks have tested this file. + * + * @result 1 if the file was found. + */ +int restart_stop_now(const char *dir, int cleanup) { + static struct stat buf; + char filename[FNAMELEN]; + strcpy(filename, dir); + strcat(filename, "/stop"); + if (stat(filename, &buf) == 0) { + if (cleanup && unlink(filename) != 0) { + /* May not be fatal, so press on. */ + message("Failed to delete restart stop file (%s)", strerror(errno)); + } + return 1; + } + return 0; +} diff --git a/src/restart.h b/src/restart.h new file mode 100644 index 0000000000000000000000000000000000000000..80e6c6755c09be4d3bda84860774b7da9e1a07cd --- /dev/null +++ b/src/restart.h @@ -0,0 +1,42 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_RESTART_H +#define SWIFT_RESTART_H + +#include <stdio.h> + +struct engine; + +void restart_write(struct engine *e, const char *filename); +void restart_read(struct engine *e, const char *filename); + +char **restart_locate(const char *dir, const char *basename, int *nfiles); +void restart_locate_free(int nfiles, char **files); +int restart_genname(const char *dir, const char *basename, int nodeID, + char *name, int size); + +void restart_read_blocks(void *ptr, size_t size, size_t nblocks, FILE *stream, + char *label, const char *errstr); + +void restart_write_blocks(void *ptr, size_t size, size_t nblocks, FILE *stream, + const char *label, const char *errstr); + +int restart_stop_now(const char *dir, int cleanup); + +#endif /* SWIFT_RESTART_H */ diff --git a/src/serial_io.c b/src/serial_io.c index 38dd3b3e4d278535fa3f042fb3302bdefb5b5793..047c3d3f88ba56eca8720fa06bba9bf5c3db07b7 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -712,7 +712,6 @@ void write_output_serial(struct engine* e, const char* baseName, struct gpart* gparts = e->s->gparts; struct gpart* dmparts = NULL; struct spart* sparts = e->s->sparts; - static int outputCount = 0; FILE* xmfFile = 0; /* Number of unassociated gparts */ @@ -721,7 +720,7 @@ void write_output_serial(struct engine* e, const char* baseName, /* File name */ char fileName[FILENAME_BUFFER_SIZE]; snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, - outputCount); + e->snapshotOutputCount); /* Compute offset in the file and total number of particles */ size_t N[swift_type_count] = {Ngas, Ndm, 0, 0, Nstars, 0}; @@ -741,7 +740,7 @@ void write_output_serial(struct engine* e, const char* baseName, if (mpi_rank == 0) { /* First time, we need to create the XMF file */ - if (outputCount == 0) xmf_create_file(baseName); + if (e->snapshotOutputCount == 0) xmf_create_file(baseName); /* Prepare the XMF file for the new entry */ xmfFile = xmf_prepare_file(baseName); @@ -1002,10 +1001,11 @@ void write_output_serial(struct engine* e, const char* baseName, } /* Write footer of LXMF file descriptor */ - if (mpi_rank == 0) xmf_write_outputfooter(xmfFile, outputCount, e->time); + if (mpi_rank == 0) + xmf_write_outputfooter(xmfFile, e->snapshotOutputCount, e->time); /* message("Done writing particles..."); */ - ++outputCount; + e->snapshotOutputCount++; } #endif /* HAVE_HDF5 && HAVE_MPI */ diff --git a/src/single_io.c b/src/single_io.c index e8e281b68b183dd6c9aa9aa1a4fdfe33f56b08bf..79dc6d43d645862edb3e0f8a9a6480f368265a50 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -577,7 +577,6 @@ void write_output_single(struct engine* e, const char* baseName, struct gpart* gparts = e->s->gparts; struct gpart* dmparts = NULL; struct spart* sparts = e->s->sparts; - static int outputCount = 0; /* Number of unassociated gparts */ const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; @@ -587,10 +586,10 @@ void write_output_single(struct engine* e, const char* baseName, /* File name */ char fileName[FILENAME_BUFFER_SIZE]; snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName, - outputCount); + e->snapshotOutputCount); /* First time, we need to create the XMF file */ - if (outputCount == 0) xmf_create_file(baseName); + if (e->snapshotOutputCount == 0) xmf_create_file(baseName); /* Prepare the XMF file for the new entry */ FILE* xmfFile = 0; @@ -807,14 +806,14 @@ void write_output_single(struct engine* e, const char* baseName, } /* Write LXMF file descriptor */ - xmf_write_outputfooter(xmfFile, outputCount, e->time); + xmf_write_outputfooter(xmfFile, e->snapshotOutputCount, e->time); /* message("Done writing particles..."); */ /* Close file */ H5Fclose(h_file); - ++outputCount; + e->snapshotOutputCount++; } #endif /* HAVE_HDF5 */ diff --git a/src/sourceterms.c b/src/sourceterms.c index f12071cf912eae3aa8d0e25f0f3b4c5e139de667..994658740a50a764edd3988ef7d6b78e00546f8f 100644 --- a/src/sourceterms.c +++ b/src/sourceterms.c @@ -36,8 +36,8 @@ * @param us The current internal system of units * @param source the structure that has all the source term properties */ -void sourceterms_init(const struct swift_params* parameter_file, - struct unit_system* us, struct sourceterms* source) { +void sourceterms_init(const struct swift_params *parameter_file, + struct unit_system *us, struct sourceterms *source) { #ifdef SOURCETERMS_SN_FEEDBACK supernova_init(parameter_file, us, source); #endif /* SOURCETERMS_SN_FEEDBACK */ @@ -47,7 +47,7 @@ void sourceterms_init(const struct swift_params* parameter_file, * @brief Prints the properties of the source terms to stdout * @param source the structure that has all the source term properties */ -void sourceterms_print(struct sourceterms* source) { +void sourceterms_print(struct sourceterms *source) { #ifdef SOURCETERMS_NONE error(" no sourceterms defined yet you ran with -F"); #ifdef SOURCETERMS_SN_FEEDBACK @@ -58,3 +58,28 @@ void sourceterms_print(struct sourceterms* source) { supernova_print(source); #endif /* SOURCETERMS_SN_FEEDBACK */ }; + +/** + * @brief Write a sourceterms struct to the given FILE as a stream of bytes. + * + * @param sourceterms the struct + * @param stream the file stream + */ +void sourceterms_struct_dump(const struct sourceterms *sourceterms, + FILE *stream) { + restart_write_blocks((void *)sourceterms, sizeof(struct sourceterms), 1, + stream, "sourceterms", "sourceterms"); +} + +/** + * @brief Restore a sourceterms struct from the given FILE as a stream of + * bytes. + * + * @param sourceterms the struct + * @param stream the file stream + */ +void sourceterms_struct_restore(const struct sourceterms *sourceterms, + FILE *stream) { + restart_read_blocks((void *)sourceterms, sizeof(struct sourceterms), 1, + stream, NULL, "sourceterms"); +} diff --git a/src/sourceterms.h b/src/sourceterms.h index 1445bcb777ff634d1e3a2312cb0a49ac155e1020..a5d0c3c727d70d50fb3388d5e5e3cc3d6362276f 100644 --- a/src/sourceterms.h +++ b/src/sourceterms.h @@ -45,6 +45,10 @@ void sourceterms_init(const struct swift_params* parameter_file, struct unit_system* us, struct sourceterms* source); void sourceterms_print(struct sourceterms* source); +/* Dump/restore. */ +void sourceterms_struct_dump(const struct sourceterms* source, FILE* stream); +void sourceterms_struct_restore(const struct sourceterms* source, FILE* stream); + /** * @brief Routines related to source terms * @param cell_min: corner of cell to test diff --git a/src/space.c b/src/space.c index b2f9240b72b6dcc73c48548c902de008e4b80444..c0381b619f1aa8b1847fda84e3b15255cdfc9f85 100644 --- a/src/space.c +++ b/src/space.c @@ -53,6 +53,7 @@ #include "memswap.h" #include "minmax.h" #include "multipole.h" +#include "restart.h" #include "runner.h" #include "sort_part.h" #include "stars.h" @@ -384,6 +385,9 @@ void space_regrid(struct space *s, int verbose) { } } + /* Are we about to allocate new top level cells without a regrid? + * Can happen when restarting the application. */ + int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL); #endif /* Do we need to re-build the upper-level cells? */ @@ -513,6 +517,21 @@ void space_regrid(struct space *s, int verbose) { /* Finished with these. */ free(oldnodeIDs); + + } else if (no_regrid && s->e != NULL) { + /* If we have created the top-levels cells and not done an initial + * partition (can happen when restarting), then the top-level cells + * are not assigned to a node, we must do that and then associate the + * particles with the cells. Note requires that + * partition_store_celllist() was called once before, or just before + * dumping the restart files.*/ + partition_restore_celllist(s, s->e->reparttype); + + /* Now re-distribute the particles, should just add to cells? */ + engine_redistribute(s->e); + + /* Make the proxies. */ + engine_makeproxies(s->e); } #endif /* WITH_MPI */ @@ -3219,3 +3238,112 @@ void space_clean(struct space *s) { free(s->gparts); free(s->sparts); } + +/** + * @brief Write the space struct and its contents to the given FILE as a + * stream of bytes. + * + * @param s the space + * @param stream the file stream + */ +void space_struct_dump(struct space *s, FILE *stream) { + + restart_write_blocks(s, sizeof(struct space), 1, stream, "space", + "space struct"); + + /* More things to write. */ + if (s->nr_parts > 0) { + restart_write_blocks(s->parts, s->nr_parts, sizeof(struct part), stream, + "parts", "parts"); + restart_write_blocks(s->xparts, s->nr_parts, sizeof(struct xpart), stream, + "xparts", "xparts"); + } + if (s->nr_gparts > 0) + restart_write_blocks(s->gparts, s->nr_gparts, sizeof(struct gpart), stream, + "gparts", "gparts"); + + if (s->nr_sparts > 0) + restart_write_blocks(s->sparts, s->nr_sparts, sizeof(struct spart), stream, + "sparts", "sparts"); +} + +/** + * @brief Re-create a space struct and its contents from the given FILE + * stream. + * + * @param s the space + * @param stream the file stream + */ +void space_struct_restore(struct space *s, FILE *stream) { + + restart_read_blocks(s, sizeof(struct space), 1, stream, NULL, "space struct"); + + /* Things that should be reconstructed in a rebuild. */ + s->cells_top = NULL; + s->cells_sub = NULL; + s->multipoles_top = NULL; + s->multipoles_sub = NULL; + s->local_cells_top = NULL; + s->grav_top_level = NULL; +#ifdef WITH_MPI + s->parts_foreign = NULL; + s->size_parts_foreign = 0; + s->gparts_foreign = NULL; + s->size_gparts_foreign = 0; + s->sparts_foreign = NULL; + s->size_sparts_foreign = 0; +#endif + + /* More things to read. */ + s->parts = NULL; + s->xparts = NULL; + if (s->nr_parts > 0) { + + /* Need the memory for these. */ + if (posix_memalign((void *)&s->parts, part_align, + s->size_parts * sizeof(struct part)) != 0) + error("Failed to allocate restore part array."); + if (posix_memalign((void *)&s->xparts, xpart_align, + s->size_parts * sizeof(struct xpart)) != 0) + error("Failed to allocate restore xpart array."); + + restart_read_blocks(s->parts, s->nr_parts, sizeof(struct part), stream, + NULL, "parts"); + restart_read_blocks(s->xparts, s->nr_parts, sizeof(struct xpart), stream, + NULL, "xparts"); + } + s->gparts = NULL; + if (s->nr_gparts > 0) { + if (posix_memalign((void *)&s->gparts, gpart_align, + s->size_gparts * sizeof(struct gpart)) != 0) + error("Failed to allocate restore gpart array."); + + restart_read_blocks(s->gparts, s->nr_gparts, sizeof(struct gpart), stream, + NULL, "gparts"); + } + + s->sparts = NULL; + if (s->nr_sparts > 0) { + if (posix_memalign((void *)&s->sparts, spart_align, + s->size_sparts * sizeof(struct spart)) != 0) + error("Failed to allocate restore spart array."); + + restart_read_blocks(s->sparts, s->nr_sparts, sizeof(struct spart), stream, + NULL, "sparts"); + } + + /* Need to reconnect the gravity parts to their hydro and star particles. */ + /* Re-link the parts. */ + if (s->nr_parts > 0 && s->nr_gparts > 0) + part_relink_parts_to_gparts(s->gparts, s->nr_gparts, s->parts); + + /* Re-link the sparts. */ + if (s->nr_sparts > 0 && s->nr_gparts > 0) + part_relink_sparts_to_gparts(s->gparts, s->nr_gparts, s->sparts); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that everything is correct */ + part_verify_links(s->parts, s->gparts, s->sparts, s->nr_parts, s->nr_gparts, + s->nr_sparts, 1); +#endif +} diff --git a/src/space.h b/src/space.h index 39e6e153a0dabb106384910a50550134fa74a07a..11cbaabdc9fc3ae17024c042ae868d464954d501 100644 --- a/src/space.h +++ b/src/space.h @@ -243,4 +243,7 @@ void space_reset_task_counters(struct space *s); void space_clean(struct space *s); void space_free_cells(struct space *s); +void space_struct_dump(struct space *s, FILE *stream); +void space_struct_restore(struct space *s, FILE *stream); + #endif /* SWIFT_SPACE_H */ diff --git a/src/swift.h b/src/swift.h index eda566b19697ceeb1d66b266b44f4769be227585..bd3c7fd5741b8c93f2931e1ec0f148d659249752 100644 --- a/src/swift.h +++ b/src/swift.h @@ -54,6 +54,7 @@ #include "potential.h" #include "profiler.h" #include "queue.h" +#include "restart.h" #include "runner.h" #include "scheduler.h" #include "serial_io.h" diff --git a/src/units.c b/src/units.c index c9038924fa540f7df86f44db885cc2ec28672a4e..5c50bb063b9411f47357407678458e6094f69e2b 100644 --- a/src/units.c +++ b/src/units.c @@ -39,6 +39,7 @@ /* Includes. */ #include "adiabatic_index.h" #include "error.h" +#include "restart.h" /** * @brief Initialises the unit_system structure with CGS system @@ -602,3 +603,25 @@ void units_print(const struct unit_system* us) { message("\tUnit Current: %g", us->UnitCurrent_in_cgs); message("\tUnit Temperature: %g", us->UnitTemperature_in_cgs); } + +/** + * @brief Write a units struct to the given FILE as a stream of bytes. + * + * @param us the units + * @param stream the file stream + */ +void units_struct_dump(const struct unit_system* us, FILE* stream) { + restart_write_blocks((void*)us, sizeof(struct unit_system), 1, stream, + "units", "units"); +} + +/** + * @brief Restore a units struct from the given FILE as a stream of bytes. + * + * @param us the units + * @param stream the file stream + */ +void units_struct_restore(const struct unit_system* us, FILE* stream) { + restart_read_blocks((void*)us, sizeof(struct unit_system), 1, stream, NULL, + "units"); +} diff --git a/src/units.h b/src/units.h index 657b29c070f3816293c18edfc46ad6b960ec9b33..5ac70a909a77146ba5f7a441d7747acfc80c3dfa 100644 --- a/src/units.h +++ b/src/units.h @@ -139,4 +139,8 @@ double units_conversion_factor(const struct unit_system* from, void units_print(const struct unit_system* us); +/* Dump/restore. */ +void units_struct_dump(const struct unit_system* us, FILE* stream); +void units_struct_restore(const struct unit_system* us, FILE* stream); + #endif /* SWIFT_UNITS_H */