diff --git a/examples/main.c b/examples/main.c index 24a9412ecc76bb5262b9c402cf86661233d454dd..a0cd85c34e72c00253b331d4122959feac0b17d0 100644 --- a/examples/main.c +++ b/examples/main.c @@ -92,6 +92,7 @@ void print_help_message() { printf(" %2s %14s %s\n", "-P", "{sec:par:val}", "Set parameter value and overwrites values read from the parameters " "file. Can be used more than once."); + printf(" %2s %14s %s\n", "-r", "", "Continue using restart files."); printf(" %2s %14s %s\n", "-s", "", "Run with hydrodynamics."); printf(" %2s %14s %s\n", "-S", "", "Run with stars."); printf(" %2s %14s %s\n", "-t", "{int}", @@ -161,6 +162,7 @@ int main(int argc, char *argv[]) { int dump_tasks = 0; int dump_threadpool = 0; int nsteps = -2; + int restart = 0; int with_cosmology = 0; int with_external_gravity = 0; int with_sourceterms = 0; @@ -177,11 +179,12 @@ int main(int argc, char *argv[]) { int nparams = 0; char *cmdparams[PARSER_MAX_NO_OF_PARAMS]; char paramFileName[200] = ""; + char restartfile[200] = ""; unsigned long long cpufreq = 0; /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "acCdDef:FgGhMn:P:sSt:Tv:y:Y:")) != -1) + while ((c = getopt(argc, argv, "acCdDef:FgGhMn:P:rsSt:Tv:y:Y:")) != -1) switch (c) { case 'a': #if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) @@ -242,6 +245,9 @@ int main(int argc, char *argv[]) { cmdparams[nparams] = optarg; nparams++; break; + case 'r': + restart = 1; + break; case 's': with_hydro = 1; break; @@ -458,6 +464,72 @@ int main(int argc, char *argv[]) { } #endif + /* Time to check if this is a restart and if so that we have all the + * necessary files and the restart conditions are acceptable. */ + char restartname[PARSER_MAX_LINE_SIZE]; + parser_get_param_string(params, "Restarts:basename", restartname); + char restartdir[PARSER_MAX_LINE_SIZE]; + parser_get_param_string(params, "Restarts:subdir", restartdir); + + if (restart) { + + /* The directory must exist. */ + if (access(restartdir, W_OK | X_OK) != 0) { + error("Cannot restart as no restart subdirectory: %s (%s)", restartdir, + strerror(errno)); + } + + /* Restart files. */ + char **restartfiles = NULL; + int nrestartfiles = 0; + + if (myrank == 0) { + /* Locate the restart files. These are defined in the parameter file + * (one reason for defering until now. */ + + /* And enumerate all the restart files. */ + restartfiles = restart_locate(restartdir, restartname, &nrestartfiles); + if (nrestartfiles == 0) + error("Failed to locate any restart files in %s", restartdir); + + /* We need one file per rank. */ + if (nrestartfiles != nr_nodes) + error("Incorrect number of restart files, expected %d found %d", + nr_nodes, nrestartfiles); + + if (verbose > 0) + for (int i = 0; i < nrestartfiles; i++) + message("found restart file: %s", restartfiles[i]); + } + + /* Distribute the restart files, need one for each rank. */ +#ifdef WITH_MPI + if (myrank == 0) { + + for (int i = 1; i < nr_nodes; i++) { + strcpy(restartfile, restartfiles[i]); + MPI_Send(restartfile, 200, MPI_BYTE, i, 0, MPI_COMM_WORLD); + } + + /* Keep local file. */ + strcpy(restartfile, restartfiles[0]); + + /* Finished with the list. */ + restart_locate_free(nrestartfiles, restartfiles); + + } else { + MPI_Recv(restartfile, 200, MPI_BYTE, 0, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + if (verbose > 1) + message("local restart file = %s", restartfile); +#else + /* Just one restart file. */ + strcpy(restartfile, restartfiles[0]); +#endif + + } + /* Initialize unit system and constants */ struct unit_system us; struct phys_const prog_const; @@ -723,6 +795,15 @@ int main(int argc, char *argv[]) { /* Print the timers. */ if (with_verbose_timers) timers_print(e.step); + + /* Create restart files if required. */ + if (restart_genname(restartdir, restartname, + e.nodeID, restartfile, 200) == 0) { + restart_write(&e, restartfile); + } else { + error("Failed to generate restart filename"); + } + #ifdef SWIFT_DEBUG_TASKS /* Dump the task data using the given frequency. */ if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) { diff --git a/src/Makefile.am b/src/Makefile.am index df1ed0a670892ecd2a41b229a8707ffb993a7cc3..67f34458a1d783561dee4beaff3af4bdd65aa5f6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -46,7 +46,8 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \ sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ - gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h + gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h\ + restart.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ @@ -57,8 +58,8 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \ statistics.c runner_doiact_vec.c profiler.c dump.c logger.c \ part_type.c xmf.c gravity_properties.c gravity.c \ - collectgroup.c hydro_space.c equation_of_state.c - + collectgroup.c hydro_space.c equation_of_state.c \ + restart.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/engine.c b/src/engine.c index cef4e3e6f3aa00c40ba92e3980b9fc472b6898bc..e4655e94c95dad593d5f6b66e37af4a67e3a3bf1 100644 --- a/src/engine.c +++ b/src/engine.c @@ -64,6 +64,7 @@ #include "partition.h" #include "profiler.h" #include "proxy.h" +#include "restart.h" #include "runner.h" #include "serial_io.h" #include "single_io.h" @@ -5624,3 +5625,78 @@ void engine_clean(struct engine *e) { space_clean(e->s); threadpool_clean(&e->threadpool); } + +/** + * @brief Write the engine struct and its contents to the given FILE as a + * stream of bytes. + * + * @param e the engine + * @param stream the file stream + */ +void engine_struct_dump(struct engine *e, FILE *stream) { + + /* The engine. */ + restart_write_block(e, sizeof(struct engine), stream, "engine struct"); + + /* Now for the other pointers, these use their own save functions. */ + + /* space */ + /* internal units */ + /* snapshot units */ + /* repartition */ + /* physical constants */ + /* hydro props */ + /* gravity props */ + /* external potential props */ + /* cooling props */ + /* sourceterm props */ + /* parameters */ + + +} + +/** + * @brief Re-create an engine struct and its contents from the given FILE + * stream. + * + * @param e the engine + * @param stream the file stream + */ +void engine_struct_restore(struct engine *e, FILE *stream) { + + /* The engine. */ + restart_read_block(e, sizeof(struct engine), stream, "engine struct"); + + /* Re-initializations as necessary. */ + + /* runners */ + /* scheduler */ + /* threadpool */ + + /* stats file */ + /* timesteps file */ + + /* barriers */ + + /* proxies */ + + /* links */ + + + /* Now for the other pointers, these use their own save functions. */ + /* space */ + /* internal units */ + /* snapshot units */ + /* repartition */ + /* physical constants */ + /* hydro props */ + /* gravity props */ + /* external potential props */ + /* cooling props */ + /* sourceterm props */ + /* parameters */ + + /* Want to force a rebuild before using this engine. Wait to repartition.*/ + e->forcerebuild = 1; + e->forcerepart = 0; +} diff --git a/src/engine.h b/src/engine.h index a571f2b24d57b2720c3c77ebd7600a3830e4d2a3..97a0c0521ab898c21e4dba1ea20019989609bb92 100644 --- a/src/engine.h +++ b/src/engine.h @@ -332,4 +332,9 @@ void engine_unpin(); void engine_clean(struct engine *e); int engine_estimate_nr_tasks(struct engine *e); +/* Struct dump/restore support. */ +void engine_struct_dump(struct engine *e, FILE *stream); +void engine_struct_restore(struct engine *e, FILE *stream); + + #endif /* SWIFT_ENGINE_H */ diff --git a/src/restart.c b/src/restart.c new file mode 100644 index 0000000000000000000000000000000000000000..6358908df84831c7b8fc1f1d948753851fd833ab --- /dev/null +++ b/src/restart.c @@ -0,0 +1,158 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/** + * @file restart.c + * @brief support for SWIFT restarts + */ + +/* Config parameters. */ +#include "../config.h" + +/* Standard headers. */ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <glob.h> + +#include "engine.h" +#include "error.h" + +/** + * @brief generate a name for a restart file. + * + * @param dir the directory of restart files. + * @param basename the basename of the restart files. + * @param nodeID a unique integer, usually the nodeID of the engine. + * @param name pointer to a string to hold the result. + * @param size length of name. + * + * @result 0 if the string was large enough. + */ +int restart_genname(const char *dir, const char *basename, + int nodeID, char *name, int size) { + int n = snprintf(name, size, "%s/%s_%04d.hdf5", dir, basename, nodeID); + message("name = %s", name); + return (n >= size); +} + +/** + * @brief locate all the restart files in the given directory with the given + * basename. + * @param dir the directory of restart files. + * @param basename the basename of the restart files. + * @param nfiles the number of restart files located. + * + * @result pointer to an array of strings with all the filenames found, + * release by calling restart_locate_free(). + */ +char **restart_locate(const char *dir, const char *basename, + int *nfiles) { + *nfiles = 0; + + /* Construct the glob pattern for locating files. */ + char pattern[200]; + if (snprintf(pattern, 200, "%s/%s_[0-9]*.hdf5", dir, basename) > 200) { + + glob_t globbuf; + char **files = NULL; + if (glob(pattern, 0, NULL, &globbuf) == 0) { + *nfiles = globbuf.gl_pathc; + files = malloc(sizeof(char *) * *nfiles); + for (int i = 0; i < *nfiles; i++) { + files[i] = strdup(globbuf.gl_pathv[i]); + } + } + + globfree(&globbuf); + return files; + } + error("Failed to construct pattern to locate restart files"); + + return NULL; +} + + +/** + * @brief Release the memory allocated to hold the restart file names. + * + * @param nfiles the number of restart files located. + * @param files the list of filenames found in call to restart_locate(). + */ +void restart_locate_free(int nfiles, char **files) { + for (int i = 0; i < nfiles; i++) { + free(files[i]); + } + free(files); +} + + +/** + * @brief Write a restart file for the given engine struct. + */ +void restart_write(struct engine *e, const char *filename) { + + FILE *stream = fopen(filename, "w"); + if (stream == NULL) + error("Failed to open restart file: %s (%s)", filename, strerror(errno)); + + engine_struct_dump(e, stream); +} + +/** + * @brief Read a restart file to construct an saved engine. + */ +void restart_read(struct engine *e, const char *filename) { + + FILE *stream = fopen(filename, "r"); + if (stream == NULL) + error("Failed to open restart file: %s (%s)", filename, strerror(errno)); + + engine_struct_restore(e, stream); +} + +/* @brief Read a block of memory from a file stream into a memory location. + * Exits the application if the read fails. + * + * @param ptr pointer to the memory + * @param size the number of bytes to read + * @param stream the file stream + * @param errstr a context string to qualify any errors. + */ +void restart_read_block(void *ptr, size_t size, FILE *stream, const char *errstr) { + size_t nread = fread(ptr, size, 1, stream); + if (nread != size) + error("Failed to restore %s from restart file (%s)", errstr, + ferror(stream) ? strerror(errno) : "unexpected end of file"); +} + +/* @brief Write a block of memory to a file stream from a memory location. + * Exits the application if the write fails. + * + * @param ptr pointer to the memory + * @param size the number of bytes to write + * @param stream the file stream + * @param errstr a context string to qualify any errors. + */ +void restart_write_block(void *ptr, size_t size, FILE *stream, const char *errstr) { + size_t nwrite = fwrite(ptr, size , 1, stream); + if (nwrite != size) + error("Failed to save %s to restart file (%s)", errstr, strerror(errno)); +} diff --git a/src/restart.h b/src/restart.h new file mode 100644 index 0000000000000000000000000000000000000000..226e65f6b760090818cbf10614827860e40cc512 --- /dev/null +++ b/src/restart.h @@ -0,0 +1,32 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_RESTART_H +#define SWIFT_RESTART_H + +void restart_write(struct engine *e, const char *filename); +void restart_read(struct engine *e, const char *filename); + +char **restart_locate(const char *dir, const char *basename, int *nfiles); +void restart_locate_free(int nfiles, char **files); +int restart_genname(const char *dir, const char *basename, + int nodeID, char *name, int size); +void restart_read_block(void *ptr, size_t size, FILE* stream, const char *errstr); +void restart_write_block(void *ptr, size_t size, FILE* stream, const char *errstr); + +#endif /* SWIFT_RESTART_H */ diff --git a/src/swift.h b/src/swift.h index 33a0425154d45e030443bc7f2c405377ef6a39e2..d8acaa45754a898fbaa3116015417717bf6f8ca1 100644 --- a/src/swift.h +++ b/src/swift.h @@ -53,6 +53,7 @@ #include "potential.h" #include "profiler.h" #include "queue.h" +#include "restart.h" #include "runner.h" #include "scheduler.h" #include "serial_io.h"