diff --git a/examples/main.c b/examples/main.c index e5417c628680bacdc8e896b36639aaa57aed1d28..19d2d7a7871200ba6e4902d5918a59511bf32b47 100644 --- a/examples/main.c +++ b/examples/main.c @@ -236,8 +236,8 @@ int main(int argc, char *argv[]) { "Executing a dry run. No i/o or time integration will be performed."); /* Report CPU frequency. */ + cpufreq = clocks_get_cpufreq(); if (myrank == 0) { - cpufreq = clocks_get_cpufreq(); message("CPU frequency used for tick conversion: %llu Hz", cpufreq); } @@ -252,6 +252,8 @@ int main(int argc, char *argv[]) { message("sizeof(struct part) is %4zi bytes.", sizeof(struct part)); message("sizeof(struct xpart) is %4zi bytes.", sizeof(struct xpart)); message("sizeof(struct gpart) is %4zi bytes.", sizeof(struct gpart)); + message("sizeof(struct task) is %4zi bytes.", sizeof(struct task)); + message("sizeof(struct cell) is %4zi bytes.", sizeof(struct cell)); } /* How vocal are we ? */ @@ -514,19 +516,23 @@ int main(int argc, char *argv[]) { /* Open file and position at end. */ file_thread = fopen(dumpfile, "a"); - fprintf(file_thread, " %03i 0 0 0 0 %lli 0 0 0 0\n", myrank, - e.tic_step); + fprintf(file_thread, " %03i 0 0 0 0 %lli %lli 0 0 0 0 %lli\n", myrank, + e.tic_step, e.toc_step, cpufreq); int count = 0; for (int l = 0; l < e.sched.nr_tasks; l++) if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) { - fprintf(file_thread, " %03i %i %i %i %i %lli %lli %i %i %i\n", - myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type, + fprintf(file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %i\n", + myrank, e.sched.tasks[l].last_rid, e.sched.tasks[l].type, e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), e.sched.tasks[l].tic, e.sched.tasks[l].toc, (e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->count : 0, (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->count : 0, + (e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->gcount + : 0, + (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->gcount + : 0, e.sched.tasks[l].flags); fflush(stdout); count++; @@ -545,15 +551,20 @@ int main(int argc, char *argv[]) { snprintf(dumpfile, 30, "thread_info-step%d.dat", j); FILE *file_thread; file_thread = fopen(dumpfile, "w"); + /* Add some information to help with the plots */ + fprintf(file_thread, " %i %i %i %i %lli %lli %i %i %i %lli\n", -2, -1, -1, 1, + e.tic_step, e.toc_step, 0, 0, 0, cpufreq); for (int l = 0; l < e.sched.nr_tasks; l++) if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) fprintf( - file_thread, " %i %i %i %i %lli %lli %i %i\n", - e.sched.tasks[l].rid, e.sched.tasks[l].type, + file_thread, " %i %i %i %i %lli %lli %i %i %i %i\n", + e.sched.tasks[l].last_rid, e.sched.tasks[l].type, e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), e.sched.tasks[l].tic, e.sched.tasks[l].toc, (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->count, - (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count); + (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count, + (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->gcount, + (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount); fclose(file_thread); #endif } diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index c96d063e0bf1adf614a447f0dd524302a070e9dd..ffabdf06f9324fba35770e7fbba4cfadc8add770 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -35,9 +35,6 @@ import pylab as pl import numpy as np import sys -# CPU ticks per second. -CPU_CLOCK = 2.7e9 - # Basic plot configuration. PLOT_PARAMS = {"axes.labelsize": 10, "axes.titlesize": 10, @@ -108,7 +105,7 @@ infile = sys.argv[1] outpng = sys.argv[2] delta_t = 0 if len( sys.argv ) == 4: - delta_t = int(sys.argv[3]) * CPU_CLOCK / 1000 + delta_t = int(sys.argv[3]) # Read input. data = pl.loadtxt( infile ) @@ -116,20 +113,31 @@ data = pl.loadtxt( infile ) nthread = int(max(data[:,0])) + 1 print "Number of threads:", nthread +# Recover the start and end time +full_step = data[0,:] +tic_step = int(full_step[4]) +toc_step = int(full_step[5]) +CPU_CLOCK = float(full_step[-1]) +data = data[1:,:] + +print "CPU frequency:", CPU_CLOCK / 1.e9 + # Avoid start and end times of zero. data = data[data[:,4] != 0] data = data[data[:,5] != 0] -# Calculate the time range, it not given. +# Calculate the time range, if not given. +delta_t = delta_t * CPU_CLOCK / 1000 if delta_t == 0: dt = max(data[:,5]) - min(data[:,4]) if dt > delta_t: delta_t = dt # Once more doing the real gather and plots this time. -start_t = min(data[:,4]) +start_t = tic_step data[:,4] -= start_t data[:,5] -= start_t +end_t = (toc_step - start_t) / CPU_CLOCK * 1000 tasks = {} tasks[-1] = [] @@ -147,7 +155,7 @@ for line in range(num_lines): tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc tasks[thread][-1]["t"] = (toc + tic)/ 2 - + combtasks = {} combtasks[-1] = [] for i in range(nthread): @@ -173,11 +181,11 @@ for thread in range(nthread): lasttype = task["type"] else: combtasks[thread][-1]["toc"] = task["toc"] - + typesseen = [] fig = pl.figure() ax = fig.add_subplot(1,1,1) -ax.set_xlim(0, delta_t * 1.03 * 1000 / CPU_CLOCK) +ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK) ax.set_ylim(0, nthread) tictoc = np.zeros(2) for i in range(nthread): @@ -222,6 +230,10 @@ ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white") ax.set_ylim(0, nthread + nrow + 1) ax.legend(loc=1, shadow=True, mode="expand", ncol=5) +# Start and end of time-step +ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1) +ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1) + ax.set_xlabel("Wall clock time [ms]") ax.set_ylabel("Thread ID" ) ax.set_yticks(pl.array(range(nthread)), True) diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py index ae84b0177bfa01d4bb4c2c9c3e44c088b8ae7776..cf591028b6dc3f724847b52c9efac4355d27f87e 100755 --- a/examples/plot_tasks_MPI.py +++ b/examples/plot_tasks_MPI.py @@ -41,9 +41,6 @@ import pylab as pl import numpy as np import sys -# CPU ticks per second. -CPU_CLOCK = 2.7e9 - # Basic plot configuration. PLOT_PARAMS = {"axes.labelsize": 10, "axes.titlesize": 10, @@ -115,11 +112,20 @@ infile = sys.argv[1] outbase = sys.argv[2] delta_t = 0 if len( sys.argv ) == 4: - delta_t = int(sys.argv[3]) * CPU_CLOCK / 1000 - + delta_t = int(sys.argv[3]) + # Read input. data = pl.loadtxt( infile ) +# Recover the start and end time +full_step = data[0,:] +tic_step = int(full_step[5]) +toc_step = int(full_step[6]) +CPU_CLOCK = float(full_step[-1]) + +print "CPU frequency:", CPU_CLOCK / 1.e9 + + nranks = int(max(data[:,0])) + 1 print "Number of ranks:", nranks nthread = int(max(data[:,1])) + 1 @@ -132,6 +138,7 @@ sdata = sdata[sdata[:,6] != 0] # Each rank can have different clock (compute node), but we want to use the # same delta times range for comparisons, so we suck it up and take the hit of # precalculating this, unless the user knows better. +delta_t = delta_t * CPU_CLOCK / 1000 if delta_t == 0: for rank in range(nranks): data = sdata[sdata[:,0] == rank] @@ -143,16 +150,22 @@ if delta_t == 0: for rank in range(nranks): data = sdata[sdata[:,0] == rank] - start_t = min(data[:,5]) + full_step = data[0,:] + tic_step = int(full_step[5]) + toc_step = int(full_step[6]) + data = data[1:,:] + + start_t = tic_step data[:,5] -= start_t data[:,6] -= start_t + end_t = (toc_step - start_t) / CPU_CLOCK * 1000 tasks = {} tasks[-1] = [] for i in range(nthread): tasks[i] = [] - num_lines = pl.size(data) / 10 + num_lines = pl.shape(data)[0] for line in range(num_lines): thread = int(data[line,1]) tasks[thread].append({}) @@ -193,7 +206,7 @@ for rank in range(nranks): typesseen = [] fig = pl.figure() ax = fig.add_subplot(1,1,1) - ax.set_xlim(0, delta_t * 1.03 * 1000 / CPU_CLOCK) + ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK) ax.set_ylim(0, nthread) tictoc = np.zeros(2) for i in range(nthread): @@ -238,6 +251,10 @@ for rank in range(nranks): ax.set_ylim(0, nthread + nrow + 1) ax.legend(loc=1, shadow=True, mode="expand", ncol=5) + # Start and end of time-step + ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1) + ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1) + ax.set_xlabel("Wall clock time [ms]") ax.set_ylabel("Thread ID for MPI Rank " + str(rank) ) ax.set_yticks(pl.array(range(nthread)), True) diff --git a/src/engine.c b/src/engine.c index 76243b1e481191c095d223a6db973aaa82c227a0..819e6907c5e62864fb69517502143101d90d442c 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1729,7 +1729,6 @@ void engine_prepare(struct engine *e) { error("Failed to aggregate the rebuild flag across nodes."); rebuild = buff; #endif - e->tic_step = getticks(); /* Did this not go through? */ if (rebuild) { @@ -1984,6 +1983,7 @@ void engine_step(struct engine *e) { double e_pot = 0.0, e_int = 0.0, e_kin = 0.0; float mom[3] = {0.0, 0.0, 0.0}; float ang[3] = {0.0, 0.0, 0.0}; + double snapshot_drift_time = 0.; struct space *s = e->s; TIMER_TIC2; @@ -1991,6 +1991,8 @@ void engine_step(struct engine *e) { struct clocks_time time1, time2; clocks_gettime(&time1); + e->tic_step = getticks(); + /* Collect the cell data. */ for (int k = 0; k < s->nr_cells; k++) if (s->cells[k].nodeID == e->nodeID) { @@ -2057,6 +2059,7 @@ void engine_step(struct engine *e) { e->time = e->ti_current * e->timeBase + e->timeBegin; e->timeOld = e->ti_old * e->timeBase + e->timeBegin; e->timeStep = (e->ti_current - e->ti_old) * e->timeBase; + snapshot_drift_time = e->timeStep; /* Drift everybody to the snapshot position */ engine_launch(e, e->nr_threads, 1 << task_type_drift, 0); @@ -2074,7 +2077,7 @@ void engine_step(struct engine *e) { e->step += 1; e->time = e->ti_current * e->timeBase + e->timeBegin; e->timeOld = e->ti_old * e->timeBase + e->timeBegin; - e->timeStep = (e->ti_current - e->ti_old) * e->timeBase; + e->timeStep = (e->ti_current - e->ti_old) * e->timeBase + snapshot_drift_time; /* Drift everybody */ engine_launch(e, e->nr_threads, 1 << task_type_drift, 0); @@ -2147,6 +2150,7 @@ void engine_step(struct engine *e) { clocks_gettime(&time2); e->wallclock_time = (float)clocks_diff(&time1, &time2); + e->toc_step = getticks(); } /** diff --git a/src/engine.h b/src/engine.h index 9ef7d57599d30aad5e8000e64148812493299d23..cc6ed9bb038667d4bd548f33dafad07176be0750 100644 --- a/src/engine.h +++ b/src/engine.h @@ -161,8 +161,8 @@ struct engine { struct proxy *proxies; int nr_proxies, *proxy_ind; - /* Tic at the start of a step. */ - ticks tic_step; + /* Tic/toc at the start/end of a step. */ + ticks tic_step, toc_step; /* Wallclock time of the last time-step */ float wallclock_time; diff --git a/src/runner.c b/src/runner.c index 3349fa35138c717d0994d6cbcce16657641a5a27..5c79c5fe08af41306d4f59a41a213fd4ceed105a 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1327,6 +1327,7 @@ void *runner_main(void *data) { struct cell *ci = t->ci; struct cell *cj = t->cj; t->rid = r->cpuid; + t->last_rid = r->cpuid; /* Different types of tasks... */ switch (t->type) { diff --git a/src/scheduler.c b/src/scheduler.c index d1d343240b37f5afd5f41fecacf106b0e85f726f..278b1d0cedb7dde293bda1765120111e1be27903 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -687,6 +687,8 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype, t->tic = 0; t->toc = 0; t->nr_unlock_tasks = 0; + t->rid = -1; + t->last_rid = -1; /* Init the lock. */ lock_init(&t->lock); diff --git a/src/task.h b/src/task.h index 25cc886f4b38456a0431fb6c7d0b7b1864053dd9..3e6bdc7370b005b32bafcbb20dd2ddbf807996ae 100644 --- a/src/task.h +++ b/src/task.h @@ -85,7 +85,7 @@ struct task { MPI_Request req; #endif - int rid; + int rid, last_rid; ticks tic, toc; int nr_unlock_tasks;