Skip to content
Snippets Groups Projects
Commit 99a14298 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Extract the local ranktags for searching, much faster but isn't the issue with...

Extract the local ranktags for searching, much faster but isn't the issue with underlying speed, that seems related to the setup costs of the qps
parent 9c1f9b9c
No related branches found
No related tags found
1 merge request!8Draft: RDMA version with wrapped infinity calls
......@@ -252,6 +252,30 @@ static void *send_thread(void *arg) {
/* Queue of our sends. */
struct mpiuse_log_entry **send_queue = sends_queue[rank];
/* Extract the offset lists that we use. */
int nr = 0;
int size = (max_logs / 16 + 1);
size_t *ranktags = (size_t *)malloc(size * sizeof(size_t));
size_t *offsets = (size_t *)malloc(size * sizeof(size_t));
/* A tag that will match any subtype or tag with our destination and source rank. */
size_t matchranktag = toranktag(0, myrank, rank, 0);
for (size_t j = 0; j < max_logs; j++) {
size_t ranktag = ranktag_lists[INDEX3(MAX_NR_RANKS, nr_ranks, myrank, rank, j)];
if ((ranktag & matchranktag) == matchranktag) {
/* Keep this one. */
ranktags[nr] = ranktag;
offsets[nr] = ranktag_offsets[INDEX3(MAX_NR_RANKS, nr_ranks, myrank, rank, j)];
nr++;
if (nr >= size) {
size += (max_logs / 16 + 1);
ranktags = (size_t *)realloc(ranktags, size * sizeof(size_t));
offsets = (size_t *)realloc(offsets, size * sizeof(size_t));
}
}
}
for (int k = 0; k < nr_sends[rank]; k++) {
struct mpiuse_log_entry *log = send_queue[k];
......@@ -275,18 +299,14 @@ static void *send_thread(void *arg) {
/* Copy this to the registered memory. */
memcpy(sendBuffer->getData(), dataptr, tobytes(datasize));
/* Need to find the offset for this data in the remotes window. We match
* subtype, tag and rank. Need to search the ranktag_lists for our ranktag
* value. XXX bisection search if sorted? XXX */
/* Need to find the offset for this data in the remotes window. */
size_t ranktag = toranktag(log->subtype, log->rank, log->otherrank, log->tag);
log->offset = 0;
int found = 0;
for (size_t j = 0; j < max_logs; j++) {
if (ranktag_lists[INDEX3(MAX_NR_RANKS, nr_ranks, log->rank,
log->otherrank, j)] == ranktag) {
log->offset = ranktag_offsets[INDEX3(MAX_NR_RANKS, nr_ranks, log->rank,
log->otherrank, j)];
for (int j = 0; j < nr; j++) {
if (ranktags[j] == ranktag) {
log->offset = offsets[j];
found = 1;
break;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment