Commit f8fb297f authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Better output of wallclock time when writing a snapshot. Slight improvement to...

Better output of wallclock time when writing a snapshot. Slight improvement to the task plotting scripts
parent e32fe263
......@@ -252,6 +252,8 @@ int main(int argc, char *argv[]) {
message("sizeof(struct part) is %4zi bytes.", sizeof(struct part));
message("sizeof(struct xpart) is %4zi bytes.", sizeof(struct xpart));
message("sizeof(struct gpart) is %4zi bytes.", sizeof(struct gpart));
message("sizeof(struct task) is %4zi bytes.", sizeof(struct task));
message("sizeof(struct cell) is %4zi bytes.", sizeof(struct cell));
}
/* How vocal are we ? */
......@@ -514,12 +516,12 @@ int main(int argc, char *argv[]) {
/* Open file and position at end. */
file_thread = fopen(dumpfile, "a");
fprintf(file_thread, " %03i 0 0 0 0 %lli 0 0 0 0\n", myrank,
e.tic_step);
fprintf(file_thread, " %03i 0 0 0 0 %lli %lli 0 0 0 0 %lli\n", myrank,
e.tic_step, e.toc_step, cpufreq);
int count = 0;
for (int l = 0; l < e.sched.nr_tasks; l++)
if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) {
fprintf(file_thread, " %03i %i %i %i %i %lli %lli %i %i %i\n",
fprintf(file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %i\n",
myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type,
e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
e.sched.tasks[l].tic, e.sched.tasks[l].toc,
......@@ -527,6 +529,10 @@ int main(int argc, char *argv[]) {
: 0,
(e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->count
: 0,
(e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->gcount
: 0,
(e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->gcount
: 0,
e.sched.tasks[l].flags);
fflush(stdout);
count++;
......@@ -545,15 +551,20 @@ int main(int argc, char *argv[]) {
snprintf(dumpfile, 30, "thread_info-step%d.dat", j);
FILE *file_thread;
file_thread = fopen(dumpfile, "w");
/* Add some information to help with the plots */
fprintf(file_thread, " %i %i %i %i %lli %lli %i %i %i %lli\n", -2, -1, -1, 1,
e.tic_step, e.toc_step, 0, 0, 0, cpufreq);
for (int l = 0; l < e.sched.nr_tasks; l++)
if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit)
fprintf(
file_thread, " %i %i %i %i %lli %lli %i %i\n",
e.sched.tasks[l].rid, e.sched.tasks[l].type,
file_thread, " %i %i %i %i %lli %lli %i %i %i %i\n",
e.sched.tasks[l].last_rid, e.sched.tasks[l].type,
e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
e.sched.tasks[l].tic, e.sched.tasks[l].toc,
(e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->count,
(e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count);
(e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count,
(e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->gcount,
(e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount);
fclose(file_thread);
#endif
}
......
......@@ -35,9 +35,6 @@ import pylab as pl
import numpy as np
import sys
# CPU ticks per second.
CPU_CLOCK = 2.7e9
# Basic plot configuration.
PLOT_PARAMS = {"axes.labelsize": 10,
"axes.titlesize": 10,
......@@ -108,7 +105,7 @@ infile = sys.argv[1]
outpng = sys.argv[2]
delta_t = 0
if len( sys.argv ) == 4:
delta_t = int(sys.argv[3]) * CPU_CLOCK / 1000
delta_t = int(sys.argv[3])
# Read input.
data = pl.loadtxt( infile )
......@@ -116,20 +113,31 @@ data = pl.loadtxt( infile )
nthread = int(max(data[:,0])) + 1
print "Number of threads:", nthread
# Recover the start and end time
full_step = data[0,:]
tic_step = int(full_step[4])
toc_step = int(full_step[5])
CPU_CLOCK = float(full_step[-1])
data = data[1:,:]
print "CPU frequency:", CPU_CLOCK / 1.e9
# Avoid start and end times of zero.
data = data[data[:,4] != 0]
data = data[data[:,5] != 0]
# Calculate the time range, it not given.
# Calculate the time range, if not given.
delta_t = delta_t * CPU_CLOCK / 1000
if delta_t == 0:
dt = max(data[:,5]) - min(data[:,4])
if dt > delta_t:
delta_t = dt
# Once more doing the real gather and plots this time.
start_t = min(data[:,4])
start_t = tic_step
data[:,4] -= start_t
data[:,5] -= start_t
end_t = (toc_step - start_t) / CPU_CLOCK * 1000
tasks = {}
tasks[-1] = []
......@@ -147,7 +155,7 @@ for line in range(num_lines):
tasks[thread][-1]["tic"] = tic
tasks[thread][-1]["toc"] = toc
tasks[thread][-1]["t"] = (toc + tic)/ 2
combtasks = {}
combtasks[-1] = []
for i in range(nthread):
......@@ -173,11 +181,11 @@ for thread in range(nthread):
lasttype = task["type"]
else:
combtasks[thread][-1]["toc"] = task["toc"]
typesseen = []
fig = pl.figure()
ax = fig.add_subplot(1,1,1)
ax.set_xlim(0, delta_t * 1.03 * 1000 / CPU_CLOCK)
ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK)
ax.set_ylim(0, nthread)
tictoc = np.zeros(2)
for i in range(nthread):
......@@ -222,6 +230,10 @@ ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white")
ax.set_ylim(0, nthread + nrow + 1)
ax.legend(loc=1, shadow=True, mode="expand", ncol=5)
# Start and end of time-step
ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.set_xlabel("Wall clock time [ms]")
ax.set_ylabel("Thread ID" )
ax.set_yticks(pl.array(range(nthread)), True)
......
......@@ -41,9 +41,6 @@ import pylab as pl
import numpy as np
import sys
# CPU ticks per second.
CPU_CLOCK = 2.7e9
# Basic plot configuration.
PLOT_PARAMS = {"axes.labelsize": 10,
"axes.titlesize": 10,
......@@ -115,11 +112,20 @@ infile = sys.argv[1]
outbase = sys.argv[2]
delta_t = 0
if len( sys.argv ) == 4:
delta_t = int(sys.argv[3]) * CPU_CLOCK / 1000
delta_t = int(sys.argv[3])
# Read input.
data = pl.loadtxt( infile )
# Recover the start and end time
full_step = data[0,:]
tic_step = int(full_step[4])
toc_step = int(full_step[5])
CPU_CLOCK = float(full_step[-1])
data = data[1:,:]
print "CPU frequency:", CPU_CLOCK / 1.e9
nranks = int(max(data[:,0])) + 1
print "Number of ranks:", nranks
nthread = int(max(data[:,1])) + 1
......@@ -132,6 +138,7 @@ sdata = sdata[sdata[:,6] != 0]
# Each rank can have different clock (compute node), but we want to use the
# same delta times range for comparisons, so we suck it up and take the hit of
# precalculating this, unless the user knows better.
delta_t = delta_t * CPU_CLOCK / 1000
if delta_t == 0:
for rank in range(nranks):
data = sdata[sdata[:,0] == rank]
......@@ -146,6 +153,7 @@ for rank in range(nranks):
start_t = min(data[:,5])
data[:,5] -= start_t
data[:,6] -= start_t
end_t = (toc_step - start_t) / CPU_CLOCK * 1000
tasks = {}
tasks[-1] = []
......@@ -193,7 +201,7 @@ for rank in range(nranks):
typesseen = []
fig = pl.figure()
ax = fig.add_subplot(1,1,1)
ax.set_xlim(0, delta_t * 1.03 * 1000 / CPU_CLOCK)
ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK)
ax.set_ylim(0, nthread)
tictoc = np.zeros(2)
for i in range(nthread):
......@@ -238,6 +246,10 @@ for rank in range(nranks):
ax.set_ylim(0, nthread + nrow + 1)
ax.legend(loc=1, shadow=True, mode="expand", ncol=5)
# Start and end of time-step
ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.set_xlabel("Wall clock time [ms]")
ax.set_ylabel("Thread ID for MPI Rank " + str(rank) )
ax.set_yticks(pl.array(range(nthread)), True)
......
......@@ -1729,7 +1729,6 @@ void engine_prepare(struct engine *e) {
error("Failed to aggregate the rebuild flag across nodes.");
rebuild = buff;
#endif
e->tic_step = getticks();
/* Did this not go through? */
if (rebuild) {
......@@ -1984,6 +1983,7 @@ void engine_step(struct engine *e) {
double e_pot = 0.0, e_int = 0.0, e_kin = 0.0;
float mom[3] = {0.0, 0.0, 0.0};
float ang[3] = {0.0, 0.0, 0.0};
double snapshot_drift_time = 0.;
struct space *s = e->s;
TIMER_TIC2;
......@@ -1991,6 +1991,8 @@ void engine_step(struct engine *e) {
struct clocks_time time1, time2;
clocks_gettime(&time1);
e->tic_step = getticks();
/* Collect the cell data. */
for (int k = 0; k < s->nr_cells; k++)
if (s->cells[k].nodeID == e->nodeID) {
......@@ -2057,6 +2059,7 @@ void engine_step(struct engine *e) {
e->time = e->ti_current * e->timeBase + e->timeBegin;
e->timeOld = e->ti_old * e->timeBase + e->timeBegin;
e->timeStep = (e->ti_current - e->ti_old) * e->timeBase;
snapshot_drift_time = e->timeStep;
/* Drift everybody to the snapshot position */
engine_launch(e, e->nr_threads, 1 << task_type_drift, 0);
......@@ -2074,7 +2077,7 @@ void engine_step(struct engine *e) {
e->step += 1;
e->time = e->ti_current * e->timeBase + e->timeBegin;
e->timeOld = e->ti_old * e->timeBase + e->timeBegin;
e->timeStep = (e->ti_current - e->ti_old) * e->timeBase;
e->timeStep = (e->ti_current - e->ti_old) * e->timeBase + snapshot_drift_time;
/* Drift everybody */
engine_launch(e, e->nr_threads, 1 << task_type_drift, 0);
......@@ -2147,6 +2150,7 @@ void engine_step(struct engine *e) {
clocks_gettime(&time2);
e->wallclock_time = (float)clocks_diff(&time1, &time2);
e->toc_step = getticks();
}
/**
......
......@@ -161,8 +161,8 @@ struct engine {
struct proxy *proxies;
int nr_proxies, *proxy_ind;
/* Tic at the start of a step. */
ticks tic_step;
/* Tic/toc at the start/end of a step. */
ticks tic_step, toc_step;
/* Wallclock time of the last time-step */
float wallclock_time;
......
......@@ -1327,6 +1327,7 @@ void *runner_main(void *data) {
struct cell *ci = t->ci;
struct cell *cj = t->cj;
t->rid = r->cpuid;
t->last_rid = r->cpuid;
/* Different types of tasks... */
switch (t->type) {
......
......@@ -687,6 +687,8 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype,
t->tic = 0;
t->toc = 0;
t->nr_unlock_tasks = 0;
t->rid = -1;
t->last_rid = -1;
/* Init the lock. */
lock_init(&t->lock);
......
......@@ -85,7 +85,7 @@ struct task {
MPI_Request req;
#endif
int rid;
int rid, last_rid;
ticks tic, toc;
int nr_unlock_tasks;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment