diff --git a/configure.ac b/configure.ac index 44d21dc30d408260a11a7b7b100df455b98295d5..6abf5a35bb102c6bcfd1716b2506a4ed0ec82905 100644 --- a/configure.ac +++ b/configure.ac @@ -236,6 +236,7 @@ AX_PTHREAD([LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # Check for metis. Note AX_LIB_METIS exists, but cannot be configured # to be default off (i.e. given no option it tries to locate METIS), so we # don't use that. +have_metis="no" AC_ARG_WITH([metis], [AS_HELP_STRING([--with-metis=PATH], [root directory where metis is installed @<:@yes/no@:>@] @@ -251,6 +252,7 @@ if test "x$with_metis" != "xno"; then METIS_LIBS="-lmetis" METIS_INCS="" fi + have_metis="yes" AC_CHECK_LIB([metis],[METIS_PartGraphKway], AC_DEFINE([HAVE_METIS],1,[The metis library appears to be present.]), AC_MSG_ERROR(something is wrong with the metis library!),$METIS_LIBS) @@ -380,7 +382,7 @@ AC_MSG_RESULT([ MPI enabled : $enable_mpi HDF5 enabled : $with_hdf5 - parallel : $have_parallel_hdf5 - Metis enabled : $with_metis + Metis enabled : $have_metis libNUMA enabled : $have_numa ]) diff --git a/examples/Makefile.am b/examples/Makefile.am index 44d1b22333767f0d4b4282c7c054a70aa7a114be..b2c09e54800c556ba03bdb1e2436af3144263dcb 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -20,7 +20,7 @@ MYFLAGS = -DTIMER # Add the source directory and debug to CFLAGS -AM_CFLAGS = -I../src -DCPU_TPS=2.67e9 $(HDF5_CPPFLAGS) +AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) AM_LDFLAGS = diff --git a/examples/main.c b/examples/main.c index a28faa628b856b3f1917331aecc6b95835c628b3..77cb1fbd494c1746d9c7c8be496e51eb5546ce9a 100644 --- a/examples/main.c +++ b/examples/main.c @@ -46,11 +46,6 @@ /* Local headers. */ #include "swift.h" -/* Ticks per second on this machine. */ -#ifndef CPU_TPS -#define CPU_TPS 2.40e9 -#endif - /* Engine policy flags. */ #ifndef ENGINE_POLICY #define ENGINE_POLICY engine_policy_none @@ -75,13 +70,14 @@ int main(int argc, char *argv[]) { struct space s; struct engine e; struct UnitSystem us; + struct clocks_time tic, toc; char ICfileName[200] = ""; char dumpfile[30]; float dt_max = 0.0f, dt_min = 0.0f; - ticks tic; int nr_nodes = 1, myrank = 0; FILE *file_thread; int with_outputs = 1; + unsigned long long cpufreq = 0; #ifdef WITH_MPI struct partition initial_partition; @@ -154,7 +150,7 @@ int main(int argc, char *argv[]) { bzero(&s, sizeof(struct space)); /* Parse the options */ - while ((c = getopt(argc, argv, "a:c:d:e:f:m:oP:q:R:s:t:w:y:z:")) != -1) + while ((c = getopt(argc, argv, "a:c:d:e:f:h:m:oP:q:R:s:t:w:y:z:")) != -1) switch (c) { case 'a': if (sscanf(optarg, "%lf", &scaling) != 1) @@ -171,7 +167,7 @@ int main(int argc, char *argv[]) { case 'd': if (sscanf(optarg, "%f", &dt_min) != 1) error("Error parsing minimal timestep."); - if (myrank == 0) message("dt_min set to %e.", dt_max); + if (myrank == 0) message("dt_min set to %e.", dt_min); fflush(stdout); break; case 'e': @@ -183,6 +179,12 @@ int main(int argc, char *argv[]) { case 'f': if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name."); break; + case 'h': + if (sscanf(optarg, "%llu", &cpufreq) != 1) + error("Error parsing CPU frequency."); + if (myrank == 0) message("CPU frequency set to %llu.", cpufreq); + fflush(stdout); + break; case 'm': if (sscanf(optarg, "%lf", &h_max) != 1) error("Error parsing h_max."); if (myrank == 0) message("maximum h set to %e.", h_max); @@ -192,8 +194,8 @@ int main(int argc, char *argv[]) { with_outputs = 0; break; case 'P': - /* Partition type is one of "g", "m", "w", or "v"; "g" can be - * followed by three numbers defining the grid. */ +/* Partition type is one of "g", "m", "w", or "v"; "g" can be + * followed by three numbers defining the grid. */ #ifdef WITH_MPI switch (optarg[0]) { case 'g': @@ -224,7 +226,7 @@ int main(int argc, char *argv[]) { error("Error parsing number of queues."); break; case 'R': - /* Repartition type "n", "b", "v", "e" or "x". + /* Repartition type "n", "b", "v", "e" or "x". * Note only none is available without METIS. */ #ifdef WITH_MPI switch (optarg[0]) { @@ -323,6 +325,12 @@ int main(int argc, char *argv[]) { aFactor(&us, UNIT_CONV_ENTROPY), hFactor(&us, UNIT_CONV_ENTROPY)); } + /* Initialize CPU frequency. */ + clocks_set_cpufreq(cpufreq); + cpufreq = clocks_get_cpufreq(); + if (myrank == 0) + message("CPU frequency used for tick conversion: %llu Hz", cpufreq); + /* Check we have sensible time step bounds */ if (dt_min > dt_max) error("Minimal time step size must be large than maximal time step size "); @@ -332,7 +340,9 @@ int main(int argc, char *argv[]) { error("An IC file name must be provided via the option -f"); /* Read particles and space information from (GADGET) IC */ - tic = getticks(); + + if (myrank == 0) + clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) read_ic_parallel(ICfileName, dim, &parts, &N, &periodic, myrank, nr_nodes, @@ -345,10 +355,12 @@ int main(int argc, char *argv[]) { read_ic_single(ICfileName, dim, &parts, &N, &periodic); #endif - if (myrank == 0) - message("reading particle properties took %.3f ms.", - ((double)(getticks() - tic)) / CPU_TPS * 1000); - fflush(stdout); + if (myrank == 0) { + clocks_gettime(&toc); + message("reading particle properties took %.3f %s.", + clocks_diff(&tic, &toc), clocks_getunit()); + fflush(stdout); + } #if defined(WITH_MPI) long long N_long = N; @@ -374,12 +386,15 @@ int main(int argc, char *argv[]) { if (nr_queues < 0) nr_queues = nr_threads; /* Initialize the space with this data. */ - tic = getticks(); - space_init(&s, dim, parts, N, periodic, h_max, myrank == 0); if (myrank == 0) - message("space_init took %.3f ms.", - ((double)(getticks() - tic)) / CPU_TPS * 1000); - fflush(stdout); + clocks_gettime(&tic); + space_init(&s, dim, parts, N, periodic, h_max, myrank == 0); + if (myrank == 0) { + clocks_gettime(&toc); + message("space_init took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); + fflush(stdout); + } /* Say a few nice things about the space we just created. */ if (myrank == 0) { @@ -408,15 +423,18 @@ int main(int argc, char *argv[]) { } /* Initialize the engine with this space. */ - tic = getticks(); + if (myrank == 0) + clocks_gettime(&tic); if (myrank == 0) message("nr_nodes is %i.", nr_nodes); engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank, ENGINE_POLICY | engine_policy_steal | engine_policy_hydro, 0, time_end, dt_min, dt_max); - if (myrank == 0) - message("engine_init took %.3f ms.", - ((double)(getticks() - tic)) / CPU_TPS * 1000); - fflush(stdout); + if (myrank == 0) { + clocks_gettime(&toc); + message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); + fflush(stdout); + } #ifdef WITH_MPI /* Split the space. */ @@ -427,7 +445,8 @@ int main(int argc, char *argv[]) { if (with_outputs) { /* Write the state of the system as it is before starting time integration. */ - tic = getticks(); + if (myrank == 0) + clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, @@ -439,10 +458,12 @@ int main(int argc, char *argv[]) { #else write_output_single(&e, &us); #endif - if (myrank == 0) - message("writing particle properties took %.3f ms.", - ((double)(getticks() - tic)) / CPU_TPS * 1000); - fflush(stdout); + if (myrank == 0) { + clocks_gettime(&toc); + message("writing particle properties took %.3f %s.", + clocks_diff(&tic, &toc), clocks_getunit()); + fflush(stdout); + } } /* Init the runner history. */ @@ -465,10 +486,10 @@ int main(int argc, char *argv[]) { if (myrank == 0) printf( "# Step Time time-step Number of updates CPU Wall-clock time " - "[ms]\n"); + "[%s]\n", clocks_getunit()); /* Let loose a runner on the space. */ - for (j = 0; e.time < time_end; j++) { + for (j = 0; !engine_is_done(&e); j++) { /* Repartition the space amongst the nodes? */ #ifdef WITH_MPI @@ -571,14 +592,14 @@ int main(int argc, char *argv[]) { * e.count_step, */ /* e.dt_min, e.dt_max); */ /* for (k = 0; k < timer_count; k++) */ - /* printf(" %.3f", ((double)timers[k]) / CPU_TPS * 1000); */ + /* printf(" %.3f", clocks_from_ticks(timers[k]); */ /* printf("\n"); */ /* fflush(stdout); */ /* } */ /* if (myrank == 0) { */ /* printf("%i %e", j, e.time); */ - /* printf(" %.3f", ((double)timers[timer_count - 1]) / CPU_TPS * 1000); */ + /* printf(" %.3f", clocks_from_ticks(timers[timer_count - 1]); */ /* printf("\n"); */ /* fflush(stdout); */ /* } */ diff --git a/src/Makefile.am b/src/Makefile.am index f4b293925bfcfdf989889db768d60e3de778c985..8a4e7cdc4b849e8f99bdf9a3dbda9b30fa986332 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -17,7 +17,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the debug flag to the whole thing -AM_CFLAGS = -DTIMER -DCOUNTER -DCPU_TPS=2.30e9 $(HDF5_CPPFLAGS) +AM_CFLAGS = -DTIMER -DCOUNTER $(HDF5_CPPFLAGS) # Assign a "safe" version number AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 # -fsanitize=address @@ -35,13 +35,13 @@ endif # List required headers include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \ - common_io.h single_io.h multipole.h map.h tools.h partition.h + common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ - kernel.c tools.c part.c partition.c + kernel.c tools.c part.c partition.c clocks.c # Include files for distribution, not installation. nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \ diff --git a/src/clocks.c b/src/clocks.c new file mode 100644 index 0000000000000000000000000000000000000000..2d2cac90ddb5e7dca1fb5feddb1c8086c57458d8 --- /dev/null +++ b/src/clocks.c @@ -0,0 +1,224 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/** + * @file clocks.c + * @brief support for measuring intervals in milli seconds, when that + * is possible, otherwise ticks. + * + * Use cycle.h or timers.h for relative times. + */ + +/* Config parameters. */ +#include "../config.h" + +/* Standard headers. */ +#include <stdio.h> +#include <unistd.h> + +/* Local headers. */ +#include "clocks.h" + +/* 0.25 of a second in nanoseconds. */ +#define SLEEPTIME 250000000 + +/* The CPU frequency used to convert ticks to seconds. */ +static unsigned long long clocks_cpufreq = 0; + +/* The units of any returned times. */ +static char *clocks_units[] = {"ms", "ticks"}; +static int clocks_units_index = 0; +static double clocks_units_scale = 1000.0; + +/* Local prototypes. */ +static void clocks_estimate_cpufreq(); + +/** + * @brief Get the current time. + * + * @param time the current time. + */ +void clocks_gettime(struct clocks_time *time) { + +#ifdef HAVE_CLOCK_GETTIME + clock_gettime(CLOCK_REALTIME, &time->time); +#else + time->time = getticks(); +#endif +} + +/** + * @brief Get difference in between two times. + * + * @param start the start time. + * @param end the end time. + * + * @return the difference. + */ +double clocks_diff(struct clocks_time *start, struct clocks_time *end) { +#ifdef HAVE_CLOCK_GETTIME + struct timespec temp; + if ((end->time.tv_nsec - start->time.tv_nsec) < 0) { + temp.tv_sec = end->time.tv_sec - start->time.tv_sec - 1; + temp.tv_nsec = 1000000000 + end->time.tv_nsec - start->time.tv_nsec; + } else { + temp.tv_sec = end->time.tv_sec - start->time.tv_sec; + temp.tv_nsec = end->time.tv_nsec - start->time.tv_nsec; + } + return (double)temp.tv_sec * 1000.0 + (double)temp.tv_nsec * 1.0E-6; +#else + return elapsed(end->time, start->time) / clocks_get_cpufreq() * clocks_units_scale; +#endif +} + +/** + * @brief Set the CPU frequency. + * + * This function should be called at least once to set the CPU frequency. + * To use the builtin estimation techniques give a value of 0. + * + * @param freq the CPU frequency in Hz or 0 to estimate one. + */ +void clocks_set_cpufreq(unsigned long long freq) { + if (freq > 0) { + clocks_cpufreq = freq; + } else { + clocks_estimate_cpufreq(); + } +} + +/** + * @brief Get the CPU frequency in Hz. + * + * @result the CPU frequency. + */ +unsigned long long clocks_get_cpufreq() { + + if (clocks_cpufreq > 0) return clocks_cpufreq; + + /* It not already set estimate it. */ + clocks_estimate_cpufreq(); + return clocks_cpufreq; +} + +/** + * @brief Estimate the CPU frequency in Hz. + * + * If already set return the CPU frequency, then estimate the CPU frequency. + * + * The technique is either use a clock timed nanosleep (this was the best + * method on i7), to read the value from the cpuinfo_max_freq + * file (probably a overestimate) or finally just use a value of 1 with + * time units of ticks. + */ +static void clocks_estimate_cpufreq() { + +#ifdef HAVE_CLOCK_GETTIME + /* Try to time a nanosleep() in ticks. */ + struct clocks_time time1; + struct clocks_time time2; + + struct timespec sleep; + sleep.tv_sec = 0; + sleep.tv_nsec = SLEEPTIME; + + clocks_gettime(&time1); + ticks tic = getticks(); + + /* Could do some calculation, but constant_tsc should protect us. */ + nanosleep(&sleep, NULL); + + clocks_gettime(&time2); + ticks toc = getticks(); + double realsleep = clocks_diff(&time1, &time2); + + clocks_cpufreq = + (signed long long)(double)(toc - tic) * 1.0 / realsleep * 1000.0; + clocks_units_index = 0; + clocks_units_scale = 1000.0; +#endif + +/* Look for the system value, if available. Tends to be too large. */ +#ifdef __linux__ + if (clocks_cpufreq == 0) { + FILE *file = + fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r"); + if (file != NULL) { + unsigned long long maxfreq; + if (fscanf(file, "%llu", &maxfreq) == 1) { + clocks_cpufreq = maxfreq * 1000; + clocks_units_index = 0; + clocks_units_scale = 1000.0; + } + fclose(file); + } + } +#endif + + /* If all fails just report ticks in any times. */ + if (clocks_cpufreq == 0) { + clocks_cpufreq = 1; + clocks_units_index = 1; + clocks_units_scale = 1.0; + } +} + +/** + * @brief Return the difference between two ticks. + * + * Only an approximation as based on how well we have estimated the + * rtc frequency. Should be good for machines that support constant_rtc + * and clock_gettime(). + * + * @param tic a number of ticks returned by the cycle.h getticks() function. + * @param toc a number of ticks returned by the cycle.h getticks() function. + * + * @result the difference. + */ +double clocks_diff_ticks(ticks tic, ticks toc) { + return clocks_from_ticks(tic - toc); +} + +/** + * @brief Convert a number of ticks into milli seconds, if possible. + * + * Only an approximation as based on how well we have estimated the + * rtc frequency. Should be good for machines that support constant_rtc + * and clock_gettime(), and reasonable for most Linux machines, otherwise + * ticks will just be returned. See clocks_getunit() for the actual units. + * + * @param tics a number of ticks returned by the cycle.h getticks() function. + * + * @result the milli seconds, if possible. + */ +double clocks_from_ticks(ticks tics) { + return ((double)tics / (double)clocks_get_cpufreq() * clocks_units_scale); +} + +/** + * @brief return the time units. + * + * Normally "ms" for milliseconds, but can be "ticks" when no conversion + * factor is available. + * + * @result the current time units. + */ +const char *clocks_getunit() { + return clocks_units[clocks_units_index]; +} diff --git a/src/clocks.h b/src/clocks.h new file mode 100644 index 0000000000000000000000000000000000000000..59b9f52b0303ac74dd237c7fd19cf689cac99cc3 --- /dev/null +++ b/src/clocks.h @@ -0,0 +1,43 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_CLOCKS_H +#define SWIFT_CLOCKS_H + +#include <time.h> +#include "cycle.h" + +/* Struct to record a time for the clocks functions. */ +struct clocks_time { +#ifdef HAVE_CLOCK_GETTIME + struct timespec time; +#else + ticks time; +#endif +}; + +void clocks_gettime(struct clocks_time *time); +double clocks_diff(struct clocks_time *start, struct clocks_time *end); +const char *clocks_getunit(); + +void clocks_set_cpufreq(unsigned long long freq); +unsigned long long clocks_get_cpufreq(); +double clocks_from_ticks(ticks tics); +double clocks_diff_ticks(ticks tic, ticks toc); + +#endif /* SWIFT_CLOCKS_H */ diff --git a/src/engine.c b/src/engine.c index 272d5c0c9ff07b98befed3c68810d4c909e78c57..3c8c1f60a1529735d8dfe9992a6538197d90ba9e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -46,6 +46,7 @@ /* Local headers. */ #include "atomic.h" #include "cell.h" +#include "clocks.h" #include "cycle.h" #include "debug.h" #include "error.h" @@ -285,7 +286,6 @@ void engine_redistribute(struct engine *e) { #endif } - /** * @brief Repartition the cells amongst the nodes. * @@ -1116,7 +1116,7 @@ int engine_marktasks(struct engine *e) { } } - // message( "took %.3f ms." , (double)(getticks() - tic)/CPU_TPS*1000 ); + // message( "took %.3f %s." , clocks_from_ticks(getticks() - tic), clocks_getunit()); /* All is well... */ return 0; @@ -1167,29 +1167,29 @@ void engine_rebuild(struct engine *e) { /* Re-build the space. */ // tic = getticks(); space_rebuild(e->s, 0.0, e->nodeID == 0); -// message( "space_rebuild took %.3f ms." , (double)(getticks() - -// tic)/CPU_TPS*1000 ); + // message( "space_rebuild took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); /* If in parallel, exchange the cell structure. */ #ifdef WITH_MPI // tic = getticks(); engine_exchange_cells(e); -// message( "engine_exchange_cells took %.3f ms." , (double)(getticks() - -// tic)/CPU_TPS*1000 ); + // message( "engine_exchange_cells took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); #endif /* Re-build the tasks. */ // tic = getticks(); engine_maketasks(e); - // message( "engine_maketasks took %.3f ms." , (double)(getticks() - - // tic)/CPU_TPS*1000 ); + // message( "engine_maketasks took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Run through the tasks and mark as skip or not. */ // tic = getticks(); if (engine_marktasks(e)) error("engine_marktasks failed after space_rebuild."); - // message( "engine_marktasks took %.3f ms." , (double)(getticks() - - // tic)/CPU_TPS*1000 ); + // message( "engine_marktasks took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Print the status of the system */ engine_print(e); @@ -1210,8 +1210,8 @@ void engine_prepare(struct engine *e) { /* Run through the tasks and mark as skip or not. */ // tic = getticks(); rebuild = (e->forcerebuild || engine_marktasks(e)); -// message( "space_marktasks took %.3f ms." , (double)(getticks() - -// tic)/CPU_TPS*1000 ); + // message( "space_marktasks took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Collect the values of rebuild from all nodes. */ #ifdef WITH_MPI @@ -1221,8 +1221,8 @@ void engine_prepare(struct engine *e) { MPI_SUCCESS) error("Failed to aggregate the rebuild flag across nodes."); rebuild = buff; -// message( "rebuild allreduce took %.3f ms." , (double)(getticks() - -// tic)/CPU_TPS*1000 ); + // message( "rebuild allreduce took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); #endif e->tic_step = getticks(); @@ -1230,16 +1230,16 @@ void engine_prepare(struct engine *e) { if (rebuild) { // tic = getticks(); engine_rebuild(e); - // message( "engine_rebuild took %.3f ms." , (double)(getticks() - - // tic)/CPU_TPS*1000 ); + // message( "engine_rebuild took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); } /* Re-rank the tasks every now and then. */ if (e->tasks_age % engine_tasksreweight == 1) { // tic = getticks(); scheduler_reweight(&e->sched); - // message( "scheduler_reweight took %.3f ms." , (double)(getticks() - - // tic)/CPU_TPS*1000 ); + // message( "scheduler_reweight took %.3f %s." , + //clocks_from_ticks(getticks() -tic), clocks_getunit()); } e->tasks_age += 1; @@ -1393,7 +1393,7 @@ void engine_init_particles(struct engine *e) { struct space *s = e->s; - if(e->nodeID == 0) message("Initialising particles"); + if (e->nodeID == 0) message("Initialising particles"); /* Make sure all particles are ready to go */ /* i.e. clean-up any stupid state in the ICs */ @@ -1482,6 +1482,9 @@ void engine_step(struct engine *e) { TIMER_TIC2; + struct clocks_time time1, time2; + clocks_gettime(&time1); + /* Collect the cell data. */ for (k = 0; k < s->nr_cells; k++) if (s->cells[k].nodeID == e->nodeID) { @@ -1620,11 +1623,20 @@ void engine_step(struct engine *e) { TIMER_TOC2(timer_step); - e->wallclock_time = ((double)timers[timer_count - 1]) / CPU_TPS * 1000; + clocks_gettime(&time2); + + e->wallclock_time = (float) clocks_diff(&time1, &time2); // printParticle(e->s->parts, e->s->xparts,1000, e->s->nr_parts); // printParticle(e->s->parts, e->s->xparts,515050, e->s->nr_parts); } +/** + * @brief Returns 1 if the simulation has reached its end point, 0 otherwise + */ +int engine_is_done(struct engine *e) { + return !(e->ti_current < max_nr_timesteps); +} + /** * @brief Create and fill the proxies. * @@ -1831,7 +1843,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, int home = numa_node_of_cpu(sched_getcpu()), half = nr_cores / 2; bool done = false, swap_hyperthreads = hyperthreads_present(); if (swap_hyperthreads && nodeID == 0) - message("prefer physical cores to hyperthreads"); + message("prefer physical cores to hyperthreads"); while (!done) { done = true; @@ -1928,15 +1940,25 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, engine_print_policy(e); /* Print information about the hydro scheme */ - if (e->nodeID == 0) - message("Hydrodynamic scheme: %s", SPH_IMPLEMENTATION); + if (e->nodeID == 0) message("Hydrodynamic scheme: %s", SPH_IMPLEMENTATION); + + /* Check we have sensible time bounds */ + if (timeBegin >= timeEnd) + error( + "Final simulation time (t_end = %e) must be larger than the start time " + "(t_beg = %e)", + timeEnd, timeBegin); + + /* Check we have sensible time step bounds */ + if (e->dt_min > e->dt_max) + error( + "Minimal time step size must be smaller than maximal time step size "); /* Deal with timestep */ e->timeBase = (timeEnd - timeBegin) / max_nr_timesteps; e->ti_current = 0; - if (e->nodeID == 0) - message("Absolute minimal timestep size: %e", e->timeBase); + /* Fixed time-step case */ if ((e->policy & engine_policy_fixdt) == engine_policy_fixdt) { e->dt_min = e->dt_max; @@ -1947,11 +1969,32 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, e->dt_min = e->dt_max = dti_timeline * e->timeBase; if (e->nodeID == 0) message("Timestep set to %e", e->dt_max); + } else { + + if (e->nodeID == 0) { + message("Absolute minimal timestep size: %e", e->timeBase); + + float dt_min = timeEnd - timeBegin; + while (dt_min > e->dt_min) dt_min /= 2.f; + + message("Minimal timestep size (on time-line): %e", dt_min); + + float dt_max = timeEnd - timeBegin; + while (dt_max > e->dt_max) dt_max /= 2.f; + + message("Maximal timestep size (on time-line): %e", dt_max); + } } if (e->dt_min < e->timeBase && e->nodeID == 0) - error("Minimal timestep smaller than the absolue possible minimum dt=%e", - e->timeBase); + error( + "Minimal time-step size smaller than the absolute possible minimum " + "dt=%e", + e->timeBase); + + if (e->dt_max > (e->timeEnd - e->timeBegin) && e->nodeID == 0) + error("Maximal time-step size larger than the simulation run time t=%e", + e->timeEnd - e->timeBegin); /* Construct types for MPI communications */ #ifdef WITH_MPI diff --git a/src/engine.h b/src/engine.h index 13c7ec40612713d3771543150763da6924144c1a..cd189f6ac64b0809eec7f9db8cba898a6377cf27 100644 --- a/src/engine.h +++ b/src/engine.h @@ -63,7 +63,6 @@ extern const char *engine_policy_names[]; #define engine_maxproxies 64 #define engine_tasksreweight 10 - /* The rank of the engine as a global variable (for messages). */ extern int engine_rank; @@ -186,5 +185,6 @@ void engine_makeproxies(struct engine *e); void engine_redistribute(struct engine *e); struct link *engine_addlink(struct engine *e, struct link *l, struct task *t); void engine_print_policy(struct engine *e); +int engine_is_done(struct engine *e); #endif /* SWIFT_ENGINE_H */ diff --git a/src/error.h b/src/error.h index 14c6518efa99b4c5b2a5862619426f30d7f69b7b..2e9a6804303a9b60123403f7a5bbacfc40f9366c 100644 --- a/src/error.h +++ b/src/error.h @@ -56,25 +56,25 @@ extern int engine_rank; * followed by the MPI error string and aborts. * */ -#define mpi_error(res,s, ...) \ +#define mpi_error(res, s, ...) \ { \ fprintf(stderr, "[%03i] %s:%s():%i: " s "\n", engine_rank, __FILE__, \ __FUNCTION__, __LINE__, ##__VA_ARGS__); \ int len = 1024; \ char buf[len]; \ - MPI_Error_string( res, buf, &len ); \ - fprintf(stderr, "%s\n\n", buf ); \ + MPI_Error_string(res, buf, &len); \ + fprintf(stderr, "%s\n\n", buf); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } -#define mpi_error_string(res,s, ...) \ +#define mpi_error_string(res, s, ...) \ { \ fprintf(stderr, "[%03i] %s:%s():%i: " s "\n", engine_rank, __FILE__, \ __FUNCTION__, __LINE__, ##__VA_ARGS__); \ int len = 1024; \ char buf[len]; \ - MPI_Error_string( res, buf, &len ); \ - fprintf(stderr, "%s\n\n", buf ); \ + MPI_Error_string(res, buf, &len); \ + fprintf(stderr, "%s\n\n", buf); \ } #endif diff --git a/src/partition.c b/src/partition.c index f1fe2b2a8436994de13501dc4ce6714ec1d2e92d..0f8eb3ebe334d71228510307dd9ccc4e56e234b3 100644 --- a/src/partition.c +++ b/src/partition.c @@ -58,11 +58,8 @@ /* Simple descriptions of initial partition types for reports. */ const char *initial_partition_name[] = { - "gridded cells", - "vectorized point associated cells", - "METIS particle weighted cells", - "METIS unweighted cells" -}; + "gridded cells", "vectorized point associated cells", + "METIS particle weighted cells", "METIS unweighted cells"}; /* Simple descriptions of repartition types for reports. */ const char *repartition_name[] = { @@ -70,8 +67,7 @@ const char *repartition_name[] = { "METIS edge and vertex time weighted cells", "METIS particle count vertex weighted cells", "METIS time edge weighted cells", - "METIS particle count vertex and time edge cells" -}; + "METIS particle count vertex and time edge cells"}; /* Local functions, if needed. */ static int check_complete(struct space *s, int verbose, int nregions); @@ -229,7 +225,6 @@ static void graph_init_metis(struct space *s, idx_t *adjncy, idx_t *xadj) { if (xadj != NULL) { xadj[0] = 0; for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26; - } } #endif @@ -283,8 +278,7 @@ static void accumulate_counts(struct space *s, int *counts) { */ static void split_metis(struct space *s, int nregions, int *celllist) { - for (int i = 0; i < s->nr_cells; i++) - s->cells[i].nodeID = celllist[i]; + for (int i = 0; i < s->nr_cells; i++) s->cells[i].nodeID = celllist[i]; } #endif @@ -404,7 +398,6 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew, free(xadj); free(adjncy); free(regionid); - } #endif @@ -423,10 +416,9 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew, * @param tasks the completed tasks from the last engine step for our node. * @param nr_tasks the number of tasks. */ -static void repart_edge_metis(int partweights, int bothweights, - int nodeID, int nr_nodes, struct space *s, - struct task *tasks, int nr_tasks) { - +static void repart_edge_metis(int partweights, int bothweights, int nodeID, + int nr_nodes, struct space *s, struct task *tasks, + int nr_tasks) { /* Create weight arrays using task ticks for vertices and edges (edges * assume the same graph structure as used in the part_ calls). */ @@ -453,7 +445,7 @@ static void repart_edge_metis(int partweights, int bothweights, bzero(weights_v, sizeof(int) * nr_cells); } if ((weights_e = (int *)malloc(sizeof(int) * 26 * nr_cells)) == NULL) - error("Failed to allocate edge weights arrays."); + error("Failed to allocate edge weights arrays."); bzero(weights_e, sizeof(int) * 26 * nr_cells); /* Generate task weights for vertices. */ @@ -503,18 +495,15 @@ static void repart_edge_metis(int partweights, int bothweights, if (t->type == task_type_ghost || t->type == task_type_drift || t->type == task_type_kick) { /* Particle updates add only to vertex weight. */ - if (taskvweights) - weights_v[cid] += w; + if (taskvweights) weights_v[cid] += w; } /* Self interaction? */ else if ((t->type == task_type_self && ci->nodeID == nodeID) || - (t->type == task_type_sub && cj == NULL && - ci->nodeID == nodeID)) { + (t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) { /* Self interactions add only to vertex weight. */ - if (taskvweights) - weights_v[cid] += w; + if (taskvweights) weights_v[cid] += w; } @@ -524,8 +513,7 @@ static void repart_edge_metis(int partweights, int bothweights, /* In-cell pair? */ if (ci == cj) { /* Add weight to vertex for ci. */ - if (taskvweights) - weights_v[cid] += w; + if (taskvweights) weights_v[cid] += w; } @@ -585,8 +573,8 @@ static void repart_edge_metis(int partweights, int bothweights, } if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e, - 26 * nr_cells, MPI_INT, MPI_SUM, 0, - MPI_COMM_WORLD)) != MPI_SUCCESS) + 26 * nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) != + MPI_SUCCESS) mpi_error(res, "Failed to allreduce edge weights."); /* Allocate cell list for the partition. */ @@ -655,8 +643,9 @@ static void repart_edge_metis(int partweights, int bothweights, /* If partition failed continue with the current one, but make this * clear. */ if (failed) { - message("WARNING: METIS repartition has failed, continuing with " - "the current partition, load balance will not be optimal"); + message( + "WARNING: METIS repartition has failed, continuing with " + "the current partition, load balance will not be optimal"); for (int k = 0; k < nr_cells; k++) celllist[k] = cells[k].nodeID; } } @@ -697,16 +686,15 @@ static void repart_vertex_metis(struct space *s, int nodeID, int nr_nodes) { /* Get all the counts from all the nodes. */ int res; - if ((res = MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_INT, - MPI_SUM, MPI_COMM_WORLD)) != MPI_SUCCESS) + if ((res = MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_INT, MPI_SUM, + MPI_COMM_WORLD)) != MPI_SUCCESS) mpi_error(res, "Failed to allreduce particle cell weights."); /* Main node does the partition calculation. */ int *celllist = (int *)malloc(sizeof(int) * s->nr_cells); if (celllist == NULL) error("Failed to allocate celllist"); - if (nodeID == 0) - pick_metis(s, nr_nodes, weights, NULL, celllist); + if (nodeID == 0) pick_metis(s, nr_nodes, weights, NULL, celllist); /* Distribute the celllist partition and apply. */ if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) != @@ -721,14 +709,14 @@ static void repart_vertex_metis(struct space *s, int nodeID, int nr_nodes) { } #endif - /** * @brief Repartition the space using the given repartition type. * * Note that at the end of this process all the cells will be re-distributed * across the nodes, but the particles themselves will not be. * - * @param reparttype the type of repartition to attempt, see the repart_type enum. + * @param reparttype the type of repartition to attempt, see the repart_type + *enum. * @param nodeID our nodeID. * @param nr_nodes the number of nodes. * @param s the space of cells holding our local particles. @@ -797,8 +785,8 @@ void partition_initial_partition(struct partition *initial_partition, struct cell *c; /* If we've got the wrong number of nodes, fail. */ - if (nr_nodes != initial_partition->grid[0] * initial_partition->grid[1] - * initial_partition->grid[2]) + if (nr_nodes != initial_partition->grid[0] * initial_partition->grid[1] * + initial_partition->grid[2]) error("Grid size does not match number of nodes."); /* Run through the cells and set their nodeID. */ @@ -806,9 +794,9 @@ void partition_initial_partition(struct partition *initial_partition, for (k = 0; k < s->nr_cells; k++) { c = &s->cells[k]; for (j = 0; j < 3; j++) - ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j]; + ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j]; c->nodeID = ind[0] + initial_partition->grid[0] * - (ind[1] + initial_partition->grid[1] * ind[2]); + (ind[1] + initial_partition->grid[1] * ind[2]); // message("cell at [%e,%e,%e]: ind = [%i,%i,%i], nodeID = %i", c->loc[0], // c->loc[1], c->loc[2], ind[0], ind[1], ind[2], c->nodeID); } @@ -833,7 +821,7 @@ void partition_initial_partition(struct partition *initial_partition, * not. */ int *weights = NULL; if (initial_partition->type == INITPART_METIS_WEIGHT) { - if ((weights = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) + if ((weights = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) error("Failed to allocate weights buffer."); bzero(weights, sizeof(int) * s->nr_cells); @@ -864,14 +852,12 @@ void partition_initial_partition(struct partition *initial_partition, if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to allreduce particle cell weights."); - } /* Main node does the partition calculation. */ int *celllist = (int *)malloc(sizeof(int) * s->nr_cells); if (celllist == NULL) error("Failed to allocate celllist"); - if (nodeID == 0) - pick_metis(s, nr_nodes, weights, NULL, celllist); + if (nodeID == 0) pick_metis(s, nr_nodes, weights, NULL, celllist); /* Distribute the celllist partition and apply. */ int res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD); @@ -908,8 +894,7 @@ void partition_initial_partition(struct partition *initial_partition, } /* Share the samplecells around all the nodes. */ - int res = - MPI_Bcast(samplecells, nr_nodes * 3, MPI_INT, 0, MPI_COMM_WORLD); + int res = MPI_Bcast(samplecells, nr_nodes * 3, MPI_INT, 0, MPI_COMM_WORLD); if (res != MPI_SUCCESS) mpi_error(res, "Failed to bcast the partition sample cells."); diff --git a/src/partition.h b/src/partition.h index 492af04ac9ef017ceddabb410ee53496e64bb018..3ab5f5a817bf5b77d45c7fb3313158b83d98e251 100644 --- a/src/partition.h +++ b/src/partition.h @@ -51,8 +51,8 @@ enum repartition_type { extern const char *repartition_name[]; void partition_repartition(enum repartition_type reparttype, int nodeID, - int nr_nodes, struct space *s, - struct task *tasks, int nr_tasks); + int nr_nodes, struct space *s, struct task *tasks, + int nr_tasks); void partition_initial_partition(struct partition *initial_partition, int nodeID, int nr_nodes, struct space *s); diff --git a/src/runner.c b/src/runner.c index 1cd52e1dd89f08172c3a3b679f15a096a0007b1b..b2f5eae0ffa39588ff7028c07b81a326e8061dde 100644 --- a/src/runner.c +++ b/src/runner.c @@ -311,20 +311,7 @@ void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) { } } */ -#ifdef TIMER_VERBOSE - message( - "runner %02i: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) " - "took %.3f ms.", - r->id, count, c->depth, (flags & 0x1000) >> 12, (flags & 0x800) >> 11, - (flags & 0x400) >> 10, (flags & 0x200) >> 9, (flags & 0x100) >> 8, - (flags & 0x80) >> 7, (flags & 0x40) >> 6, (flags & 0x20) >> 5, - (flags & 0x10) >> 4, (flags & 0x8) >> 3, (flags & 0x4) >> 2, - (flags & 0x2) >> 1, (flags & 0x1) >> 0, - ((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000); - fflush(stdout); -#else if (clock) TIMER_TOC(timer_dosort); -#endif } void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) { @@ -469,20 +456,7 @@ void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) { } } */ -#ifdef TIMER_VERBOSE - message( - "runner %02i: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) " - "took %.3f ms.", - r->id, count, c->depth, (flags & 0x1000) >> 12, (flags & 0x800) >> 11, - (flags & 0x400) >> 10, (flags & 0x200) >> 9, (flags & 0x100) >> 8, - (flags & 0x80) >> 7, (flags & 0x40) >> 6, (flags & 0x20) >> 5, - (flags & 0x10) >> 4, (flags & 0x8) >> 3, (flags & 0x4) >> 2, - (flags & 0x2) >> 1, (flags & 0x1) >> 0, - ((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000); - fflush(stdout); -#else if (clock) TIMER_TOC(timer_dosort); -#endif } /** @@ -522,15 +496,7 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) { } } - if (timer) { -#ifdef TIMER_VERBOSE - message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count, - c->depth, ((double)TIMER_TOC(timer_init)) / CPU_TPS * 1000); - fflush(stdout); -#else - TIMER_TOC(timer_init); -#endif - } + if (timer) TIMER_TOC(timer_init); } /** @@ -679,13 +645,7 @@ void runner_doghost(struct runner *r, struct cell *c) { if (count) message("Smoothing length failed to converge on %i particles.", count); -#ifdef TIMER_VERBOSE - message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count, - c->depth, ((double)TIMER_TOC(timer_doghost)) / CPU_TPS * 1000); - fflush(stdout); -#else TIMER_TOC(timer_doghost); -#endif } /** @@ -779,15 +739,7 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { c->h_max = h_max; c->dx_max = dx_max; - if (timer) { -#ifdef TIMER_VERBOSE - message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count, - c->depth, ((double)TIMER_TOC(timer_drift)) / CPU_TPS * 1000); - fflush(stdout); -#else - TIMER_TOC(timer_drift); -#endif - } + if (timer) TIMER_TOC(timer_drift); } /** @@ -991,15 +943,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - if (timer) { -#ifdef TIMER_VERBOSE - message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count, - c->depth, ((double)TIMER_TOC(timer_kick)) / CPU_TPS * 1000); - fflush(stdout); -#else - TIMER_TOC(timer_kick); -#endif - } + if (timer) TIMER_TOC(timer_kick); } /** diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 90bd2bb8a7a618f8539d98b91ffdf347f17eca17..cf5d56e94169b44e6cd2974a3422a0bc5e4610ac 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -202,15 +202,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif -#ifdef TIMER_VERBOSE - printf( - "runner_dopair_naive[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) " - "took %.3f ms.\n", - r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max, - ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000); -#else TIMER_TOC(TIMER_DOPAIR); -#endif } void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { @@ -305,12 +297,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif -#ifdef TIMER_VERBOSE - printf("runner_doself[%02i]: %i parts at depth %i took %.3f ms.\n", r->id, - count, c->depth, ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000); -#else TIMER_TOC(TIMER_DOSELF); -#endif } /** @@ -510,15 +497,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif -#ifdef TIMER_VERBOSE - printf( - "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) " - "took %.3f ms.\n", - r->id, count, count_j, ci->depth, ci->h_max, cj->h_max, - ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000); -#else TIMER_TOC(timer_dopair_subset); -#endif } /** @@ -630,15 +609,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif -#ifdef TIMER_VERBOSE - printf( - "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) " - "took %.3f ms.\n", - r->id, count, count_j, ci->depth, ci->h_max, cj->h_max, - ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000); -#else TIMER_TOC(timer_dopair_subset); -#endif } /** @@ -741,13 +712,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif -#ifdef TIMER_VERBOSE - printf("runner_doself_subset[%02i]: %i/%i parts at depth %i took %.3f ms.\n", - r->id, count, ci->count, ci->depth, - ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000); -#else TIMER_TOC(timer_dopair_subset); -#endif } /** @@ -870,8 +835,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ - /* printf( "runner_dopair: first half took %.3f ms...\n" , - ((double)(getticks() - tic)) / CPU_TPS * 1000 ); + /* printf( "runner_dopair: first half took %.3f %s...\n" , + clocks_from_ticks(getticks() - tic), clocks_getunit()); tic = getticks(); */ /* Loop over the parts in cj. */ @@ -941,16 +906,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif -#ifdef TIMER_VERBOSE - printf( - "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) " - "took %.3f ms.\n", - r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max, - fmax(ci->h[0], fmax(ci->h[1], ci->h[2])), - ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000); -#else TIMER_TOC(TIMER_DOPAIR); -#endif } void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { @@ -1182,8 +1138,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ - /* printf( "runner_dopair: first half took %.3f ms...\n" , - ((double)(getticks() - tic)) / CPU_TPS * 1000 ); + /* printf( "runner_dopair: first half took %.3f %s...\n" , + clocks_from_ticks(getticks() - tic), clocks_getunit()); tic = getticks(); */ /* Loop over the parts in cj. */ @@ -1336,16 +1292,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); #endif -#ifdef TIMER_VERBOSE - printf( - "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) " - "took %.3f ms.\n", - r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max, - fmax(ci->h[0], fmax(ci->h[1], ci->h[2])), - ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000); -#else TIMER_TOC(TIMER_DOPAIR); -#endif } /** @@ -1570,12 +1517,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); #endif -#ifdef TIMER_VERBOSE - printf("runner_doself1[%02i]: %i parts at depth %i took %.3f ms.\n", r->id, - count, c->depth, ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000); -#else TIMER_TOC(TIMER_DOSELF); -#endif } void DOSELF2(struct runner *r, struct cell *restrict c) { @@ -1767,12 +1709,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); #endif -#ifdef TIMER_VERBOSE - printf("runner_doself2[%02i]: %i parts at depth %i took %.3f ms.\n", r->id, - count, c->depth, ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000); -#else TIMER_TOC(TIMER_DOSELF); -#endif } /** @@ -2054,13 +1991,7 @@ void DOSUB1(struct runner *r, struct cell *ci, struct cell *cj, int sid, } /* otherwise, pair interaction. */ - if (gettimer) -#ifdef TIMER_VERBOSE - printf("runner_dosub1[%02i]: flags=%i at depth %i took %.3f ms.\n", r->id, - sid, ci->depth, ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000); -#else - TIMER_TOC(TIMER_DOSUB); -#endif + if (gettimer) TIMER_TOC(TIMER_DOSUB); } void DOSUB2(struct runner *r, struct cell *ci, struct cell *cj, int sid, @@ -2329,13 +2260,7 @@ void DOSUB2(struct runner *r, struct cell *ci, struct cell *cj, int sid, } /* otherwise, pair interaction. */ - if (gettimer) -#ifdef TIMER_VERBOSE - printf("runner_dosub2[%02i]: flags=%i at depth %i took %.3f ms.\n", r->id, - sid, ci->depth, ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000); -#else - TIMER_TOC(TIMER_DOSUB); -#endif + if (gettimer) TIMER_TOC(TIMER_DOSUB); } void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, @@ -2933,11 +2858,5 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, } /* otherwise, pair interaction. */ - if (gettimer) -#ifdef TIMER_VERBOSE - printf("runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n", r->id, - sid, ci->depth, ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000); -#else - TIMER_TOC(TIMER_DOSUB); -#endif + if (gettimer) TIMER_TOC(TIMER_DOSUB); } diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 4f4e8557892b554a7d3d4c534f4abe5032de32f5..f374339da75e31b39a5295fcd8bbc23c34d8d67d 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -21,6 +21,7 @@ /* Includes. */ #include "cell.h" +#include "clocks.h" #include "part.h" /** @@ -192,16 +193,7 @@ void runner_dopair_grav_new(struct runner *r, struct cell *ci, } /* loop over the parts in cj and interact with the multipole. */ -#ifdef TIMER_VERBOSE - printf( - "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) " - "took %.3f ms.\n", - r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max, - fmax(ci->h[0], fmax(ci->h[1], ci->h[2])), - ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000); -#else TIMER_TOC(TIMER_DOPAIR); -#endif } /** @@ -429,15 +421,7 @@ void runner_dopair_grav(struct runner *r, struct cell *restrict ci, runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]); #endif -#ifdef TIMER_VERBOSE - printf( - "runner_dopair_naive_grav[%02i]: %i/%i parts at depth %i " - "(r_max=%.3f/%.3f) took %.3f ms.\n", - r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max, - ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000); -#else TIMER_TOC(timer_dopair_grav); -#endif } /** @@ -525,15 +509,7 @@ void runner_doself_grav(struct runner *r, struct cell *restrict c) { runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]); #endif -#ifdef TIMER_VERBOSE - printf( - "runner_doself_grav[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) " - "took %.3f ms.\n", - r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max, - ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000); -#else TIMER_TOC(timer_doself_grav); -#endif } /** @@ -616,13 +592,7 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, runner_dograv_mm(r, ci, cj); } - if (gettimer) -#ifdef TIMER_VERBOSE - printf("runner_dosub_grav[%02i]: at depth %i took %.3f ms.\n", r->id, - ci->depth, ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000); -#else - TIMER_TOC(timer_dosub_grav); -#endif + if (gettimer) TIMER_TOC(timer_dosub_grav); } #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */ diff --git a/src/scheduler.c b/src/scheduler.c index 047a402abbdc6877958ea8e2730472cf0a995ebe..a9a4827e7fa368061ae0a5b1bdfec51940e5d716 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -936,8 +936,8 @@ void scheduler_reweight(struct scheduler *s) { if (t->type == task_type_send) t->weight = INT_MAX / 8; if (t->type == task_type_recv) t->weight *= 1.41; } - // message( "weighting tasks took %.3f ms." , (double)( getticks() - tic ) / - // CPU_TPS * 1000 ); + // message( "weighting tasks took %.3f %s." , + //clocks_from_ticks( getticks() - tic ), clocks_getunit()); /* int min = tasks[0].weight, max = tasks[0].weight; for ( k = 1 ; k < nr_tasks ; k++ ) @@ -973,8 +973,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask, s->tasks[k].wait = 1; s->tasks[k].rid = -1; } - // message( "waiting tasks took %.3f ms." , (double)( getticks() - tic ) / - // CPU_TPS * 1000 ); + // message( "waiting tasks took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit() ); /* Enqueue a set of extraenous tasks to set the task waits. */ struct task *rewait_tasks = &s->tasks[s->nr_tasks]; @@ -1016,8 +1016,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask, pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); } pthread_mutex_unlock(&s->sleep_mutex); - /* message("waiting tasks took %.3f ms.", - (double)(getticks() - tic) / CPU_TPS * 1000); */ + /* message("waiting tasks took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit());*/ s->mask = mask; s->submask = submask | (1 << task_subtype_none); @@ -1033,8 +1033,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask, } } - // message( "enqueueing tasks took %.3f ms." , (double)( getticks() - tic ) / - // CPU_TPS * 1000 ); + // message( "enqueueing tasks took %.3f %s." , + //clocks_from_ticks( getticks() - tic ), clocks_getunit()); } /** diff --git a/src/space.c b/src/space.c index f5674b969fb0ec427320b647b8cae80f3b019b44..0785740335f6e782a875b7ab658a9a0564288aab 100644 --- a/src/space.c +++ b/src/space.c @@ -294,8 +294,8 @@ void space_regrid(struct space *s, double cell_max, int verbose) { } #endif } /* re-build upper-level cells? */ - // message( "rebuilding upper-level cells took %.3f ms." , (double)(getticks() - // - tic) / CPU_TPS * 1000 ); + // message( "rebuilding upper-level cells took %.3f %s." , + // clocks_from_ticks(double)(getticks() - tic), clocks_getunit()); /* Otherwise, just clean up the cells. */ else { @@ -377,8 +377,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count++; } -// message( "getting particle indices took %.3f ms." , (double)(getticks() - -// tic) / CPU_TPS * 1000 ); + // message( "getting particle indices took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()): #ifdef WITH_MPI /* Move non-local parts to the end of the list. */ @@ -430,8 +430,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Sort the parts according to their cells. */ // tic = getticks(); space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1); - // message( "parts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS - // * 1000 ); + // message( "parts_sort took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Re-link the gparts. */ for (k = 0; k < nr_parts; k++) @@ -465,16 +465,16 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); cells[ind[k]].gcount++; } - // message( "getting particle indices took %.3f ms." , (double)(getticks() - - // tic) / CPU_TPS * 1000 ); + // message( "getting particle indices took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); /* TODO: Here we should exchange the gparts as well! */ /* Sort the parts according to their cells. */ // tic = getticks(); gparts_sort(s->gparts, ind, nr_gparts, 0, s->nr_cells - 1); - // message( "gparts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS - // * 1000 ); + // message( "gparts_sort took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Re-link the parts. */ for (k = 0; k < nr_gparts; k++) @@ -497,8 +497,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { xfinger = &xfinger[c->count]; gfinger = &gfinger[c->gcount]; } - // message( "hooking up cells took %.3f ms." , (double)(getticks() - tic) / - // CPU_TPS * 1000 ); + // message( "hooking up cells took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ @@ -509,8 +509,8 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { 0, &cells[k], NULL, 0); engine_launch(s->e, s->e->nr_threads, 1 << task_type_split_cell, 0); - // message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS - // * 1000 ); + // message( "space_split took %.3f %s." , + //clocks_from_ticks(getticks() - tic), clocks_getunit()); } /** diff --git a/src/swift.h b/src/swift.h index 03960e83bf75488b3456327157288160e995951b..9ab090dccd195ff4927d3e614e446b36d273f824 100644 --- a/src/swift.h +++ b/src/swift.h @@ -25,6 +25,7 @@ /* Local headers. */ #include "atomic.h" #include "cell.h" +#include "clocks.h" #include "const.h" #include "const.h" #include "cycle.h" diff --git a/src/timers.h b/src/timers.h index 95db02c5b8ea055e4b8264a7055c678460c29fdf..de2d9edb9ed54717472e5ae1222dfb33235c3e95 100644 --- a/src/timers.h +++ b/src/timers.h @@ -57,11 +57,6 @@ extern ticks timers[timer_count]; #define timers_mask_all ((1 << timer_count) - 1) /* Define the timer macros. */ -#ifdef TIMER_VERBOSE -#ifndef TIMER -#define TIMER -#endif -#endif #ifdef TIMER #define TIMER_TIC_ND tic = getticks(); #define TIMER_TIC2_ND ticks tic2 = getticks(); diff --git a/tests/Makefile.am b/tests/Makefile.am index 7dcabd8f3e35963e5fd6331994791dccc2a94c1b..f0bfbefd3c7f4591134d1707c4ac9bf63278e855 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -15,7 +15,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the source directory and debug to CFLAGS -AM_CFLAGS = -I../src -DCPU_TPS=2.67e9 $(HDF5_CPPFLAGS) -DTIMER +AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) -DTIMER AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) diff --git a/tests/testReading.c b/tests/testReading.c index c8123a26acd12692ccd6cd3d893f94e0f95fb856..17b33d88f6fd702e0bad6280f4adbe7ebc50ef2a 100644 --- a/tests/testReading.c +++ b/tests/testReading.c @@ -31,7 +31,7 @@ int main() { const double boxSize = 1.; const int L = 4; const double rho = 2.; - + /* Read data */ read_ic_single("input.hdf5", dim, &parts, &N, &periodic); diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index 32e7d62815533fb0ad50e2e85ebd10f452ec8d30..984b8ea867250d0bda1bc14d2600279a27321b2c 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -22,43 +22,45 @@ #include <stdlib.h> #include <string.h> -/** +/** * @brief Constructs a cell with N SPH particles */ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { - size_t count = N*N*N; - struct cell *cell = malloc(sizeof( struct cell) ); + size_t count = N * N * N; + struct cell *cell = malloc(sizeof(struct cell)); struct part *part; struct xpart *xpart; float h; size_t x, y, z, size; - size = count*sizeof(struct part); - if (posix_memalign((void**)&cell->parts, 32, size) != 0) { + size = count * sizeof(struct part); + if (posix_memalign((void **)&cell->parts, 32, size) != 0) { error("couldn't allocate particles"); } - size = count*sizeof(struct xpart); - if (posix_memalign((void**)&cell->xparts, 32, size) != 0) { + size = count * sizeof(struct xpart); + if (posix_memalign((void **)&cell->xparts, 32, size) != 0) { error("couldn't allocate extended particles"); } - h = 1.127 * cellSize / N; - + part = cell->parts; xpart = cell->xparts; - memset(part, 0, count*sizeof(struct part)); - memset(xpart, 0, count*sizeof(struct xpart)); + memset(part, 0, count * sizeof(struct part)); + memset(xpart, 0, count * sizeof(struct xpart)); for (x = 0; x < N; ++x) { for (y = 0; y < N; ++y) { for (z = 0; z < N; ++z) { - part->x[0] = offset[0] * cellSize + x * cellSize / N + cellSize / (2*N); - part->x[1] = offset[1] * cellSize + y * cellSize / N + cellSize / (2*N); - part->x[2] = offset[2] * cellSize + z * cellSize / N + cellSize / (2*N); - part->h = h; - part->id = x*N*N + y*N + z + id_offset; - ++part; + part->x[0] = + offset[0] * cellSize + x * cellSize / N + cellSize / (2 * N); + part->x[1] = + offset[1] * cellSize + y * cellSize / N + cellSize / (2 * N); + part->x[2] = + offset[2] * cellSize + z * cellSize / N + cellSize / (2 * N); + part->h = h; + part->id = x * N * N + y * N + z + id_offset; + ++part; } } } @@ -69,7 +71,7 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { cell->h[0] = cellSize; cell->h[1] = cellSize; cell->h[2] = cellSize; - + return cell; } @@ -78,37 +80,38 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { /* Run a full time step integration for one cell */ int main() { - int i,j,k, offset[3]; + int i, j, k, offset[3]; struct part *p; int N = 10; float dim = 1.; float rho = 2.; float P = 1.; - + /* Create cells */ struct cell *cells[27]; - for (i=0; i<3; i++) - for (j=0; j<3; j++) - for (k=0; k<3; k++) { - offset[0] = i; - offset[1] = j; - offset[2] = k; - cells[i*9 + j*3 + k] = make_cell(N, dim, offset, (i*9+j*3+k) * N*N*N); + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + for (k = 0; k < 3; k++) { + offset[0] = i; + offset[1] = j; + offset[2] = k; + cells[i * 9 + j * 3 + k] = + make_cell(N, dim, offset, (i * 9 + j * 3 + k) * N * N * N); } /* Set particle properties */ - for (j=0 ; j < 27; ++j) - for (i=0 ; i< cells[j]->count; ++i) { - cells[j]->parts[i].mass = dim * dim * dim * rho / (N * N * N); - cells[j]->parts[i].u = P / ((const_hydro_gamma - 1.)*rho); + for (j = 0; j < 27; ++j) + for (i = 0; i < cells[j]->count; ++i) { + cells[j]->parts[i].mass = dim * dim * dim * rho / (N * N * N); + cells[j]->parts[i].u = P / ((const_hydro_gamma - 1.) * rho); } message("m=%f", dim * dim * dim * rho / (N * N * N)); /* Pick the central cell */ struct cell *ci = cells[13]; - + /* Create the infrastructure */ struct engine e; struct runner r; @@ -123,40 +126,35 @@ int main() { e.dt_max = 1e10; /* The tracked particle */ - p = &(ci->parts[N*N*N / 2 + N*N / 2 + N / 2]); + p = &(ci->parts[N * N * N / 2 + N * N / 2 + N / 2]); message("Studying particle p->id=%lld", p->id); - /* Initialise the particles */ - for (j=0 ; j<27; ++j) - { - runner_doinit(&r, cells[j]); - } - + for (j = 0; j < 27; ++j) { + runner_doinit(&r, cells[j]); + } + /* Compute density */ runner_doself1_density(&r, ci); runner_doghost(&r, ci); - + message("h=%f rho=%f N_ngb=%f", p->h, p->rho, p->density.wcount); message("c=%f", p->force.c); runner_doself2_force(&r, ci); runner_dokick(&r, ci, 1); - + message("t_end=%f", p->t_end); - + free(ci->parts); free(ci->xparts); - + return 0; } #else -int main() { - - return 0; -} +int main() { return 0; } #endif diff --git a/tests/testSingle.c b/tests/testSingle.c index 1bab13959b5d04f23170b761b953de0fa43561b9..c85b77ff1c5b2285c33fa7787bbd53deab463039 100644 --- a/tests/testSingle.c +++ b/tests/testSingle.c @@ -40,11 +40,6 @@ /* Local headers. */ #include "swift.h" -/* Ticks per second on this machine. */ -#ifndef CPU_TPS -#define CPU_TPS 2.67e9 -#endif - /* Engine policy flags. */ #ifndef ENGINE_POLICY #define ENGINE_POLICY engine_policy_none diff --git a/tests/testTimeIntegration.c b/tests/testTimeIntegration.c index 4fcd253e877ed167b9534117832675f8f6ac5ef4..f3802888bccc40a424d659cde2605d12c9268e47 100644 --- a/tests/testTimeIntegration.c +++ b/tests/testTimeIntegration.c @@ -41,14 +41,13 @@ int main() { float v_max = 30287.; /* [m/s] */ // float v_min = 29291.; /* [m/s] */ - /* Derived quantities */ float e = (r_max - r_min) / (r_max + r_min); /* Eccentricity */ float b = sqrtf(r_max * r_min); /* Semi-minor axis */ float p = b * sqrtf(1 - e * e); /* Semi-lactus rectum */ float a = p / (1 - e * e); /* Semi-major axis */ float T = sqrtf(4 * M_PI * M_PI * a * a * a / - (G * (M_sun + M_earth))); /* Period [s] */ + (G * (M_sun + M_earth))); /* Period [s] */ /* Print some info */ message("Semi-major axis: a=%e [m]", a); @@ -98,8 +97,8 @@ int main() { eng.time = 0.; eng.timeBegin = 0.; eng.timeEnd = N_orbits * T; - eng.dt_min = dt; /* This forces the time-step to be dt */ - eng.dt_max = dt; /* irrespective of the state of the particle */ + eng.dt_min = dt; /* This forces the time-step to be dt */ + eng.dt_max = dt; /* irrespective of the state of the particle */ /* Simulate ! */ for (i = 0; i < N; i++) { diff --git a/tests/testVectorize.c b/tests/testVectorize.c index 1c77e9d54effe34fd2c24e97678d905890b291ba..a18b6e8af5ac3f7b94bd7be3bdf8fd21e49681ff 100644 --- a/tests/testVectorize.c +++ b/tests/testVectorize.c @@ -34,8 +34,8 @@ struct cell *make_cell(size_t n, double *offset, double h, part->h = h; part->id = ++(*partId); part->mass = 1.0f; - part->t_begin = 0.f; - part->t_end = 0.1f; + part->ti_begin = 0; + part->ti_end = 1; ++part; } } @@ -177,13 +177,13 @@ int main(int argc, char *argv[]) { toc = getticks(); time += toc - tic; - + /* Dump if necessary */ if (i % 50 == 0) dump_particle_fields("swift_dopair.dat", ci, cj); } /* Output timing */ - message("SWIFT calculation took %lli ticks." , time / runs); + message("SWIFT calculation took %lli ticks.", time / runs); /* Now perform a brute-force version for accuracy tests */ @@ -202,7 +202,7 @@ int main(int argc, char *argv[]) { dump_particle_fields("brute_force.dat", ci, cj); /* Output timing */ - message("Brute force calculation took %lli ticks." , toc - tic); + message("Brute force calculation took %lli ticks.", toc - tic); /* Clean things to make the sanitizer happy ... */ clean_up(ci);