diff --git a/swiftmpiproxies.c b/swiftmpiproxies.c index e05c9efe92c8148dcb5b3efe2a41af84c5f76093..9d48671ccdd088c0d24944c770394c79155e8acc 100644 --- a/swiftmpiproxies.c +++ b/swiftmpiproxies.c @@ -88,11 +88,40 @@ static int datacheck_test(int offset, size_t size, void *data) { unsigned char *p = (unsigned char *)data; for (size_t i = 0; i < size; i++) { - if (p[i] != fill) return 0; + if (p[i] != fill) { + if (verbose) { + message("%d: %d != %d", offset, p[i], fill); + fflush(stdout); + } + return 0; + } } return 1; } +/** + * @brief check a data area reporting some statistics about the content. + * + * Assumes datacheck_test() has already failed. + * + * @param size size of data in bytes. + * @param data the data to fill. + */ +static void datacheck_fulltest(size_t size, void *data) { + + unsigned char *p = (unsigned char *)data; + double sum = 0.0; + unsigned char pmin = 255; + unsigned char pmax = 0; + for (size_t i = 0; i < size; i++) { + sum += p[i]; + if (p[i] > pmax) pmax = p[i]; + if (p[i] < pmin) pmin = p[i]; + } + message("sum: %.2f, mean: %.2f, min: %d, max: %d", sum, sum / (double)size, + pmin, pmax); +} + /** * @brief Pick out the relevant logging data for our rank, i.e. all * activations of sends and recvs. We ignore the original completion logs, @@ -172,7 +201,7 @@ int main(int argc, char *argv[]) { if (res != MPI_SUCCESS) error("Call to MPI_Comm_rank failed with error %i.", res); - /* Handle the command-line, we expect a mpiuse data file to read and + /* Handle the command-line, we expect a mpiuse data file to read and * various options. */ int opt; while ((opt = getopt(argc, argv, "vn:")) != -1) { @@ -201,7 +230,7 @@ int main(int argc, char *argv[]) { /* Now we read the SWIFT MPI logger output that defines the communcations we * will undertake. Note this has all ranks for a single step, SWIFT outputs * one MPI log per rank per step, so you need to combine all ranks from a - * step. XXX also extract proxy related communications XXX */ + * step. */ mpiuse_log_restore(infile); int nranks = mpiuse_nr_ranks(); @@ -226,27 +255,27 @@ int main(int argc, char *argv[]) { if (myrank == 0) message("*** Proxy simulation exchange loop: %d ***", nloop); - /* XXX note in SWIFT we use the threadpool to launch these. That may - * matter. */ + /* Note in SWIFT we use the threadpool to launch these. */ for (int k = 0; k < nr_send_pcells; k++) { struct mpiuse_log_entry *log = send_pcells[k]; /* Need to regenerate the tags for each other communication type. */ int basetag = myrank * proxy_tag_shift; - /* Start Isend counts of pcells. Really just the size of the buffer we're - * about to send, SWIFT sends the count. */ + /* Start Isend counts of pcells. Really just the size of the buffer + * we're about to send, SWIFT sends the count. */ int size = log->size; - res = - MPI_Isend(&size, 1, MPI_INT, log->otherrank, basetag + proxy_tag_count, - MPI_COMM_WORLD, &req_send_counts[k]); + + res = MPI_Isend(&size, 1, MPI_INT, log->otherrank, + basetag + proxy_tag_count, + MPI_COMM_WORLD, &req_send_counts[k]); if (res != MPI_SUCCESS) error("Counts MPI_Isend failed."); - /* Start Isend of pcells. */ + /* Start Isend of pcells, filling the data with a pattern for checking + * on arrival. */ log->data = calloc(log->size, 1); - - /* Fill data with a pattern for checking on arrival. */ datacheck_fill(0, log->size, log->data); + res = MPI_Isend(log->data, log->size, MPI_BYTE, log->otherrank, basetag + proxy_tag_cells, MPI_COMM_WORLD, &req_pcells_out[k]); @@ -254,6 +283,7 @@ int main(int argc, char *argv[]) { /* Start Irecv counts of pcells from other rank. */ basetag = log->otherrank * proxy_tag_shift; + res = MPI_Irecv(&pcells_size[k], 1, MPI_INT, log->otherrank, basetag + proxy_tag_count, MPI_COMM_WORLD, &req_recv_counts[k]); @@ -268,6 +298,7 @@ int main(int argc, char *argv[]) { for (int k = 0; k < nr_send_pcells; k++) { int pid = MPI_UNDEFINED; MPI_Status status; + res = MPI_Waitany(nr_send_pcells, req_recv_counts, &pid, &status); if (res != MPI_SUCCESS || pid == MPI_UNDEFINED) error("MPI_Waitany failed."); @@ -280,36 +311,42 @@ int main(int argc, char *argv[]) { /* Fill data with a pattern for checking when overwritten. */ datacheck_fill(1, pcells_size[pid], pcells_in[pid]); - res = MPI_Irecv(pcells_in[pid], pcells_size[pid], MPI_BYTE, log->otherrank, - basetag + proxy_tag_cells, MPI_COMM_WORLD, - &req_pcells_in[pid]); + res = MPI_Irecv(pcells_in[pid], pcells_size[pid], MPI_BYTE, + log->otherrank, basetag + proxy_tag_cells, + MPI_COMM_WORLD, &req_pcells_in[pid]); if (res != MPI_SUCCESS) error("Pcell MPI_Irecv failed."); } - message("All proxy cell counts have arrived"); + message("All proxy cell counts have arrived, pcells irecvs are launched"); /* Waitall for all Isend counts to complete. */ + res = MPI_Waitall(nr_send_pcells, req_send_counts, MPI_STATUSES_IGNORE); if (res != MPI_SUCCESS) error("Waitall for counts Isend failed."); + message("All sends of counts have completed"); /* Now wait for the pcell irecvs to complete, so we receive the pcells, * which would be unpacked in SWIFT. */ for (int k = 0; k < nr_send_pcells; k++) { int pid = MPI_UNDEFINED; MPI_Status status; + res = MPI_Waitany(nr_send_pcells, req_pcells_in, &pid, &status); if (res != MPI_SUCCESS || pid == MPI_UNDEFINED) error("MPI_Waitany failed."); /* Check the data received is correct. */ if (!datacheck_test(0, pcells_size[pid], pcells_in[pid])) { - if (!datacheck_test(1, pcells_size[pid], pcells_in[pid])) { - error("Received data is not modified"); + message("Received data is not correct"); + datacheck_fulltest(pcells_size[pid], pcells_in[pid]); + if (datacheck_test(1, pcells_size[pid], pcells_in[pid])) { + error("Received data is not modified on receive"); } else { error("Received data is corrupt"); } } else { - message("Received data is correct"); + if (verbose) + message("Received data is correct"); } free(pcells_in[pid]); pcells_in[pid] = NULL; @@ -319,14 +356,17 @@ int main(int argc, char *argv[]) { /* Waitall for Isend of pcells to complete. */ res = MPI_Waitall(nr_send_pcells, req_pcells_out, MPI_STATUSES_IGNORE); if (res != MPI_SUCCESS) error("Waitall for pcells Isend failed."); + message("All sends of pcells have completed"); - /* Check data is unmodified. */ + /* Check data is unmodified while being offloaded. */ for (int k = 0; k < nr_send_pcells; k++) { struct mpiuse_log_entry *log = send_pcells[k]; if (!datacheck_test(0, log->size, log->data)) { + datacheck_fulltest(log->size, log->data); error("Sent data has been corrupted"); } else { - message("Sent data is correct"); + if (verbose) + message("Sent data is correct"); } free(log->data); log->data = NULL; @@ -339,7 +379,7 @@ int main(int argc, char *argv[]) { if (res != MPI_SUCCESS) error("call to MPI_Finalize failed with error %i.", res); - if (myrank == 0) message("Bye"); + if (myrank == 0) message("All done, no errors detected"); return 0; }