diff --git a/mpiuse.c b/mpiuse.c index ed6007a3a7883df053b062f1ce495e7eb73fa52f..6bd7b3116e0910b152e49112be86057bad80a0f6 100644 --- a/mpiuse.c +++ b/mpiuse.c @@ -204,15 +204,18 @@ void mpiuse_dump_logs(int nranks, const char *dumpfile) { fd = fopen(dumpfile, "a"); /* And append our logs. Note log->tic is not necessarily from this - machine, so the conversion to ms may be suspect. */ + * machine, so the conversion to ms may be suspect. We also rebase a + * version to match the expected injection times for this new run. */ size_t nlogs = mpiuse_log_count; + ticks basetics = 0; for (size_t k = 0; k < nlogs; k++) { struct mpiuse_log_entry *log = &mpiuse_log[k]; if (log->rank == myrank && log->endtic > 0) { + if (basetics == 0) basetics = log->tic; fprintf(fd, "%lld %.4f %.4f %.4f %.6f %d %d %d %d %d %d %zd %d %.4f %.6f " "%.6f\n", - log->tic, clocks_from_ticks(log->tic), + log->tic, clocks_from_ticks(log->tic - basetics), clocks_from_ticks(log->injtic - clocks_start_ticks), clocks_from_ticks(log->endtic - clocks_start_ticks), clocks_from_ticks(log->endtic - log->injtic), log->step, diff --git a/swiftmpistepsim.c b/swiftmpistepsim.c index ab8088413cf954ece2c479e0361c3f1569b11501..6181b28f6101adfe1c73c05d68ad9ad7c8e866db 100644 --- a/swiftmpistepsim.c +++ b/swiftmpistepsim.c @@ -78,7 +78,6 @@ static void injection_runner(int qid) { ticks basetic = reqs[0]->tic; ticks looptics = 0; double deadtime = 0.0; - struct timespec sleep; sleep.tv_sec = 0; @@ -98,8 +97,6 @@ static void injection_runner(int qid) { * CPU frequency. Note nanosleep is not very accurate and seems to have * a base line at 50us using tests on Durham COSMA. */ if (dt > looptics) { - struct timespec sleep; - sleep.tv_sec = 0; /* Remember to be fair and remove the looptics, then convert to * nanoseconds. */ @@ -155,12 +152,12 @@ static void injection_runner(int qid) { ind_req[qid]++; /* Set looptics on the first pass. Assumes MPI_Isend and MPI_Irecv are - * equally timed. Note we include a nanosleep, they are slow and a fudge factor. */ + * equally timed. Note we include a nanosleep, they are slow. */ if (looptics == 0 && usetics) { sleep.tv_nsec = 1; nanosleep(&sleep, NULL); - looptics = (getticks() - starttics) * 2; - //if (verbose) + looptics = (getticks() - starttics); + if (verbose) message("injection loop took %.3f %s.", clocks_from_ticks(looptics), clocks_getunit()); } @@ -380,6 +377,12 @@ static void pick_logs(void) { /* Sort into increasing time. */ qsort(reqs, nreqs, sizeof(struct mpiuse_log_entry *), cmp_logs); + /* Check. */ + for (int k = 0; k < nreqs - 1; k++) { + if (reqs[k]->tic > reqs[k+1]->tic) + message("reqs_queue: %lld > %lld", reqs[k]->tic, reqs[k+1]->tic); + } + /* And partition into queues for injection. Use interleave pick so that * close in time injects are on different queues. */ reqs_queue[0] = (struct mpiuse_log_entry **)malloc( @@ -476,7 +479,8 @@ int main(int argc, char *argv[]) { /* Each rank requires its own queue, so extract them. */ pick_logs(); - /* Time to start time. */ + /* Time to start time. Try to make it synchronous across the ranks. */ + MPI_Barrier(MPI_COMM_WORLD); clocks_set_cpufreq(0); if (myrank == 0) { message("Start of MPI tests");