Commit 83524c88 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'master' into root-level-parts

parents 42c8270a 769c0ddd
......@@ -236,6 +236,7 @@ AX_PTHREAD([LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
# Check for metis. Note AX_LIB_METIS exists, but cannot be configured
# to be default off (i.e. given no option it tries to locate METIS), so we
# don't use that.
have_metis="no"
AC_ARG_WITH([metis],
[AS_HELP_STRING([--with-metis=PATH],
[root directory where metis is installed @<:@yes/no@:>@]
......@@ -251,6 +252,7 @@ if test "x$with_metis" != "xno"; then
METIS_LIBS="-lmetis"
METIS_INCS=""
fi
have_metis="yes"
AC_CHECK_LIB([metis],[METIS_PartGraphKway],
AC_DEFINE([HAVE_METIS],1,[The metis library appears to be present.]),
AC_MSG_ERROR(something is wrong with the metis library!),$METIS_LIBS)
......@@ -380,7 +382,7 @@ AC_MSG_RESULT([
MPI enabled : $enable_mpi
HDF5 enabled : $with_hdf5
- parallel : $have_parallel_hdf5
Metis enabled : $with_metis
Metis enabled : $have_metis
libNUMA enabled : $have_numa
])
......
......@@ -20,7 +20,7 @@
MYFLAGS = -DTIMER
# Add the source directory and debug to CFLAGS
AM_CFLAGS = -I../src -DCPU_TPS=2.67e9 $(HDF5_CPPFLAGS)
AM_CFLAGS = -I../src $(HDF5_CPPFLAGS)
AM_LDFLAGS =
......
......@@ -46,11 +46,6 @@
/* Local headers. */
#include "swift.h"
/* Ticks per second on this machine. */
#ifndef CPU_TPS
#define CPU_TPS 2.40e9
#endif
/* Engine policy flags. */
#ifndef ENGINE_POLICY
#define ENGINE_POLICY engine_policy_none
......@@ -75,13 +70,14 @@ int main(int argc, char *argv[]) {
struct space s;
struct engine e;
struct UnitSystem us;
struct clocks_time tic, toc;
char ICfileName[200] = "";
char dumpfile[30];
float dt_max = 0.0f, dt_min = 0.0f;
ticks tic;
int nr_nodes = 1, myrank = 0;
FILE *file_thread;
int with_outputs = 1;
unsigned long long cpufreq = 0;
#ifdef WITH_MPI
struct partition initial_partition;
......@@ -154,7 +150,7 @@ int main(int argc, char *argv[]) {
bzero(&s, sizeof(struct space));
/* Parse the options */
while ((c = getopt(argc, argv, "a:c:d:e:f:m:oP:q:R:s:t:w:y:z:")) != -1)
while ((c = getopt(argc, argv, "a:c:d:e:f:h:m:oP:q:R:s:t:w:y:z:")) != -1)
switch (c) {
case 'a':
if (sscanf(optarg, "%lf", &scaling) != 1)
......@@ -171,7 +167,7 @@ int main(int argc, char *argv[]) {
case 'd':
if (sscanf(optarg, "%f", &dt_min) != 1)
error("Error parsing minimal timestep.");
if (myrank == 0) message("dt_min set to %e.", dt_max);
if (myrank == 0) message("dt_min set to %e.", dt_min);
fflush(stdout);
break;
case 'e':
......@@ -183,6 +179,12 @@ int main(int argc, char *argv[]) {
case 'f':
if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name.");
break;
case 'h':
if (sscanf(optarg, "%llu", &cpufreq) != 1)
error("Error parsing CPU frequency.");
if (myrank == 0) message("CPU frequency set to %llu.", cpufreq);
fflush(stdout);
break;
case 'm':
if (sscanf(optarg, "%lf", &h_max) != 1) error("Error parsing h_max.");
if (myrank == 0) message("maximum h set to %e.", h_max);
......@@ -192,8 +194,8 @@ int main(int argc, char *argv[]) {
with_outputs = 0;
break;
case 'P':
/* Partition type is one of "g", "m", "w", or "v"; "g" can be
* followed by three numbers defining the grid. */
/* Partition type is one of "g", "m", "w", or "v"; "g" can be
* followed by three numbers defining the grid. */
#ifdef WITH_MPI
switch (optarg[0]) {
case 'g':
......@@ -224,7 +226,7 @@ int main(int argc, char *argv[]) {
error("Error parsing number of queues.");
break;
case 'R':
/* Repartition type "n", "b", "v", "e" or "x".
/* Repartition type "n", "b", "v", "e" or "x".
* Note only none is available without METIS. */
#ifdef WITH_MPI
switch (optarg[0]) {
......@@ -323,6 +325,12 @@ int main(int argc, char *argv[]) {
aFactor(&us, UNIT_CONV_ENTROPY), hFactor(&us, UNIT_CONV_ENTROPY));
}
/* Initialize CPU frequency. */
clocks_set_cpufreq(cpufreq);
cpufreq = clocks_get_cpufreq();
if (myrank == 0)
message("CPU frequency used for tick conversion: %llu Hz", cpufreq);
/* Check we have sensible time step bounds */
if (dt_min > dt_max)
error("Minimal time step size must be large than maximal time step size ");
......@@ -332,7 +340,9 @@ int main(int argc, char *argv[]) {
error("An IC file name must be provided via the option -f");
/* Read particles and space information from (GADGET) IC */
tic = getticks();
if (myrank == 0)
clocks_gettime(&tic);
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
read_ic_parallel(ICfileName, dim, &parts, &N, &periodic, myrank, nr_nodes,
......@@ -345,10 +355,12 @@ int main(int argc, char *argv[]) {
read_ic_single(ICfileName, dim, &parts, &N, &periodic);
#endif
if (myrank == 0)
message("reading particle properties took %.3f ms.",
((double)(getticks() - tic)) / CPU_TPS * 1000);
fflush(stdout);
if (myrank == 0) {
clocks_gettime(&toc);
message("reading particle properties took %.3f %s.",
clocks_diff(&tic, &toc), clocks_getunit());
fflush(stdout);
}
#if defined(WITH_MPI)
long long N_long = N;
......@@ -374,12 +386,15 @@ int main(int argc, char *argv[]) {
if (nr_queues < 0) nr_queues = nr_threads;
/* Initialize the space with this data. */
tic = getticks();
space_init(&s, dim, parts, N, periodic, h_max, myrank == 0);
if (myrank == 0)
message("space_init took %.3f ms.",
((double)(getticks() - tic)) / CPU_TPS * 1000);
fflush(stdout);
clocks_gettime(&tic);
space_init(&s, dim, parts, N, periodic, h_max, myrank == 0);
if (myrank == 0) {
clocks_gettime(&toc);
message("space_init took %.3f %s.", clocks_diff(&tic, &toc),
clocks_getunit());
fflush(stdout);
}
/* Say a few nice things about the space we just created. */
if (myrank == 0) {
......@@ -408,15 +423,18 @@ int main(int argc, char *argv[]) {
}
/* Initialize the engine with this space. */
tic = getticks();
if (myrank == 0)
clocks_gettime(&tic);
if (myrank == 0) message("nr_nodes is %i.", nr_nodes);
engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank,
ENGINE_POLICY | engine_policy_steal | engine_policy_hydro, 0,
time_end, dt_min, dt_max);
if (myrank == 0)
message("engine_init took %.3f ms.",
((double)(getticks() - tic)) / CPU_TPS * 1000);
fflush(stdout);
if (myrank == 0) {
clocks_gettime(&toc);
message("engine_init took %.3f %s.", clocks_diff(&tic, &toc),
clocks_getunit());
fflush(stdout);
}
#ifdef WITH_MPI
/* Split the space. */
......@@ -427,7 +445,8 @@ int main(int argc, char *argv[]) {
if (with_outputs) {
/* Write the state of the system as it is before starting time integration.
*/
tic = getticks();
if (myrank == 0)
clocks_gettime(&tic);
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
......@@ -439,10 +458,12 @@ int main(int argc, char *argv[]) {
#else
write_output_single(&e, &us);
#endif
if (myrank == 0)
message("writing particle properties took %.3f ms.",
((double)(getticks() - tic)) / CPU_TPS * 1000);
fflush(stdout);
if (myrank == 0) {
clocks_gettime(&toc);
message("writing particle properties took %.3f %s.",
clocks_diff(&tic, &toc), clocks_getunit());
fflush(stdout);
}
}
/* Init the runner history. */
......@@ -465,10 +486,10 @@ int main(int argc, char *argv[]) {
if (myrank == 0)
printf(
"# Step Time time-step Number of updates CPU Wall-clock time "
"[ms]\n");
"[%s]\n", clocks_getunit());
/* Let loose a runner on the space. */
for (j = 0; e.time < time_end; j++) {
for (j = 0; !engine_is_done(&e); j++) {
/* Repartition the space amongst the nodes? */
#ifdef WITH_MPI
......@@ -571,14 +592,14 @@ int main(int argc, char *argv[]) {
* e.count_step, */
/* e.dt_min, e.dt_max); */
/* for (k = 0; k < timer_count; k++) */
/* printf(" %.3f", ((double)timers[k]) / CPU_TPS * 1000); */
/* printf(" %.3f", clocks_from_ticks(timers[k]); */
/* printf("\n"); */
/* fflush(stdout); */
/* } */
/* if (myrank == 0) { */
/* printf("%i %e", j, e.time); */
/* printf(" %.3f", ((double)timers[timer_count - 1]) / CPU_TPS * 1000); */
/* printf(" %.3f", clocks_from_ticks(timers[timer_count - 1]); */
/* printf("\n"); */
/* fflush(stdout); */
/* } */
......
......@@ -17,7 +17,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Add the debug flag to the whole thing
AM_CFLAGS = -DTIMER -DCOUNTER -DCPU_TPS=2.30e9 $(HDF5_CPPFLAGS)
AM_CFLAGS = -DTIMER -DCOUNTER $(HDF5_CPPFLAGS)
# Assign a "safe" version number
AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 # -fsanitize=address
......@@ -35,13 +35,13 @@ endif
# List required headers
include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \
common_io.h single_io.h multipole.h map.h tools.h partition.h
common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h
# Common source files
AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
units.c common_io.c single_io.c multipole.c version.c map.c \
kernel.c tools.c part.c partition.c
kernel.c tools.c part.c partition.c clocks.c
# Include files for distribution, not installation.
nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Peter W. Draper (p.w.draper@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/**
* @file clocks.c
* @brief support for measuring intervals in milli seconds, when that
* is possible, otherwise ticks.
*
* Use cycle.h or timers.h for relative times.
*/
/* Config parameters. */
#include "../config.h"
/* Standard headers. */
#include <stdio.h>
#include <unistd.h>
/* Local headers. */
#include "clocks.h"
/* 0.25 of a second in nanoseconds. */
#define SLEEPTIME 250000000
/* The CPU frequency used to convert ticks to seconds. */
static unsigned long long clocks_cpufreq = 0;
/* The units of any returned times. */
static char *clocks_units[] = {"ms", "ticks"};
static int clocks_units_index = 0;
static double clocks_units_scale = 1000.0;
/* Local prototypes. */
static void clocks_estimate_cpufreq();
/**
* @brief Get the current time.
*
* @param time the current time.
*/
void clocks_gettime(struct clocks_time *time) {
#ifdef HAVE_CLOCK_GETTIME
clock_gettime(CLOCK_REALTIME, &time->time);
#else
time->time = getticks();
#endif
}
/**
* @brief Get difference in between two times.
*
* @param start the start time.
* @param end the end time.
*
* @return the difference.
*/
double clocks_diff(struct clocks_time *start, struct clocks_time *end) {
#ifdef HAVE_CLOCK_GETTIME
struct timespec temp;
if ((end->time.tv_nsec - start->time.tv_nsec) < 0) {
temp.tv_sec = end->time.tv_sec - start->time.tv_sec - 1;
temp.tv_nsec = 1000000000 + end->time.tv_nsec - start->time.tv_nsec;
} else {
temp.tv_sec = end->time.tv_sec - start->time.tv_sec;
temp.tv_nsec = end->time.tv_nsec - start->time.tv_nsec;
}
return (double)temp.tv_sec * 1000.0 + (double)temp.tv_nsec * 1.0E-6;
#else
return elapsed(end->time, start->time) / clocks_get_cpufreq() * clocks_units_scale;
#endif
}
/**
* @brief Set the CPU frequency.
*
* This function should be called at least once to set the CPU frequency.
* To use the builtin estimation techniques give a value of 0.
*
* @param freq the CPU frequency in Hz or 0 to estimate one.
*/
void clocks_set_cpufreq(unsigned long long freq) {
if (freq > 0) {
clocks_cpufreq = freq;
} else {
clocks_estimate_cpufreq();
}
}
/**
* @brief Get the CPU frequency in Hz.
*
* @result the CPU frequency.
*/
unsigned long long clocks_get_cpufreq() {
if (clocks_cpufreq > 0) return clocks_cpufreq;
/* It not already set estimate it. */
clocks_estimate_cpufreq();
return clocks_cpufreq;
}
/**
* @brief Estimate the CPU frequency in Hz.
*
* If already set return the CPU frequency, then estimate the CPU frequency.
*
* The technique is either use a clock timed nanosleep (this was the best
* method on i7), to read the value from the cpuinfo_max_freq
* file (probably a overestimate) or finally just use a value of 1 with
* time units of ticks.
*/
static void clocks_estimate_cpufreq() {
#ifdef HAVE_CLOCK_GETTIME
/* Try to time a nanosleep() in ticks. */
struct clocks_time time1;
struct clocks_time time2;
struct timespec sleep;
sleep.tv_sec = 0;
sleep.tv_nsec = SLEEPTIME;
clocks_gettime(&time1);
ticks tic = getticks();
/* Could do some calculation, but constant_tsc should protect us. */
nanosleep(&sleep, NULL);
clocks_gettime(&time2);
ticks toc = getticks();
double realsleep = clocks_diff(&time1, &time2);
clocks_cpufreq =
(signed long long)(double)(toc - tic) * 1.0 / realsleep * 1000.0;
clocks_units_index = 0;
clocks_units_scale = 1000.0;
#endif
/* Look for the system value, if available. Tends to be too large. */
#ifdef __linux__
if (clocks_cpufreq == 0) {
FILE *file =
fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
if (file != NULL) {
unsigned long long maxfreq;
if (fscanf(file, "%llu", &maxfreq) == 1) {
clocks_cpufreq = maxfreq * 1000;
clocks_units_index = 0;
clocks_units_scale = 1000.0;
}
fclose(file);
}
}
#endif
/* If all fails just report ticks in any times. */
if (clocks_cpufreq == 0) {
clocks_cpufreq = 1;
clocks_units_index = 1;
clocks_units_scale = 1.0;
}
}
/**
* @brief Return the difference between two ticks.
*
* Only an approximation as based on how well we have estimated the
* rtc frequency. Should be good for machines that support constant_rtc
* and clock_gettime().
*
* @param tic a number of ticks returned by the cycle.h getticks() function.
* @param toc a number of ticks returned by the cycle.h getticks() function.
*
* @result the difference.
*/
double clocks_diff_ticks(ticks tic, ticks toc) {
return clocks_from_ticks(tic - toc);
}
/**
* @brief Convert a number of ticks into milli seconds, if possible.
*
* Only an approximation as based on how well we have estimated the
* rtc frequency. Should be good for machines that support constant_rtc
* and clock_gettime(), and reasonable for most Linux machines, otherwise
* ticks will just be returned. See clocks_getunit() for the actual units.
*
* @param tics a number of ticks returned by the cycle.h getticks() function.
*
* @result the milli seconds, if possible.
*/
double clocks_from_ticks(ticks tics) {
return ((double)tics / (double)clocks_get_cpufreq() * clocks_units_scale);
}
/**
* @brief return the time units.
*
* Normally "ms" for milliseconds, but can be "ticks" when no conversion
* factor is available.
*
* @result the current time units.
*/
const char *clocks_getunit() {
return clocks_units[clocks_units_index];
}
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Peter W. Draper (p.w.draper@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_CLOCKS_H
#define SWIFT_CLOCKS_H
#include <time.h>
#include "cycle.h"
/* Struct to record a time for the clocks functions. */
struct clocks_time {
#ifdef HAVE_CLOCK_GETTIME
struct timespec time;
#else
ticks time;
#endif
};
void clocks_gettime(struct clocks_time *time);
double clocks_diff(struct clocks_time *start, struct clocks_time *end);
const char *clocks_getunit();
void clocks_set_cpufreq(unsigned long long freq);
unsigned long long clocks_get_cpufreq();
double clocks_from_ticks(ticks tics);
double clocks_diff_ticks(ticks tic, ticks toc);
#endif /* SWIFT_CLOCKS_H */
......@@ -46,6 +46,7 @@
/* Local headers. */
#include "atomic.h"
#include "cell.h"
#include "clocks.h"
#include "cycle.h"
#include "debug.h"
#include "error.h"
......@@ -285,7 +286,6 @@ void engine_redistribute(struct engine *e) {
#endif
}
/**
* @brief Repartition the cells amongst the nodes.
*
......@@ -1116,7 +1116,7 @@ int engine_marktasks(struct engine *e) {
}
}
// message( "took %.3f ms." , (double)(getticks() - tic)/CPU_TPS*1000 );
// message( "took %.3f %s." , clocks_from_ticks(getticks() - tic), clocks_getunit());
/* All is well... */
return 0;
......@@ -1167,29 +1167,29 @@ void engine_rebuild(struct engine *e) {
/* Re-build the space. */
// tic = getticks();
space_rebuild(e->s, 0.0, e->nodeID == 0);
// message( "space_rebuild took %.3f ms." , (double)(getticks() -
// tic)/CPU_TPS*1000 );
// message( "space_rebuild took %.3f %s." ,
//clocks_from_ticks(getticks() - tic), clocks_getunit());
/* If in parallel, exchange the cell structure. */
#ifdef WITH_MPI
// tic = getticks();
engine_exchange_cells(e);
// message( "engine_exchange_cells took %.3f ms." , (double)(getticks() -
// tic)/CPU_TPS*1000 );
// message( "engine_exchange_cells took %.3f %s." ,
//clocks_from_ticks(getticks() - tic), clocks_getunit());
#endif
/* Re-build the tasks. */
// tic = getticks();
engine_maketasks(e);
// message( "engine_maketasks took %.3f ms." , (double)(getticks() -
// tic)/CPU_TPS*1000 );
// message( "engine_maketasks took %.3f %s." ,
//clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Run through the tasks and mark as skip or not. */
// tic = getticks();
if (engine_marktasks(e))
error("engine_marktasks failed after space_rebuild.");
// message( "engine_marktasks took %.3f ms." , (double)(getticks() -
// tic)/CPU_TPS*1000 );
// message( "engine_marktasks took %.3f %s." ,
//clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Print the status of the system */
engine_print(e);
......@@ -1210,8 +1210,8 @@ void engine_prepare(struct engine *e) {
/* Run through the tasks and mark as skip or not. */
// tic = getticks();
rebuild = (e->forcerebuild || engine_marktasks(e));
// message( "space_marktasks took %.3f ms." , (double)(getticks() -
// tic)/CPU_TPS*1000 );
// message( "space_marktasks took %.3f %s." ,
//clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Collect the values of rebuild from all nodes. */
#ifdef WITH_MPI
......@@ -1221,8 +1221,8 @@ void engine_prepare(struct engine *e) {
MPI_SUCCESS)
error("Failed to aggregate the rebuild flag across nodes.");
rebuild = buff;
// message( "rebuild allreduce took %.3f ms." , (double)(getticks() -
// tic)/CPU_TPS*1000 );
// message( "rebuild allreduce took %.3f %s." ,
//clocks_from_ticks(getticks() - tic), clocks_getunit());
#endif
e->tic_step = getticks();
......@@ -1230,16 +1230,16 @@ void engine_prepare(struct engine *e) {
if (rebuild) {
// tic = getticks();
engine_rebuild(e);
// message( "engine_rebuild took %.3f ms." , (double)(getticks() -
// tic)/CPU_TPS*1000 );
// message( "engine_rebuild took %.3f %s." ,
//clocks_from_ticks(getticks() - tic), clocks_getunit());
}
/* Re-rank the tasks every now and then. */
if (e->tasks_age % engine_tasksreweight == 1) {
// tic = getticks();
scheduler_reweight(&e->sched);
// message( "scheduler_reweight took %.3f ms." , (double)(getticks() -