diff --git a/src/queue.c b/src/queue.c index 24e7ba85be6f853a30ff5c9bd7820e3ae11236cd..32fc06fdecdc8a4f6214e71983f9079dd3367412 100644 --- a/src/queue.c +++ b/src/queue.c @@ -36,6 +36,71 @@ /* Local headers. */ #include "atomic.h" #include "error.h" +#include "memswap.h" + +/** + * @brief Push the task at the given index up the heap until it is either at the + * top or smaller than its parent. + * + * @param q The task #queue. + * @param ind The index of the task to be sifted-down in the queue. + * + * @return The new index of the entry. + */ +int queue_bubble_up(struct queue *q, int ind) { + /* Set some pointers we will use often. */ + struct queue_entry *entries = q->entries; + const float w = entries[ind].weight; + + /* While we are not yet at the top of the heap... */ + while (ind > 0) { + /* Check if the parent is larger and bail if not.. */ + const int parent = (ind - 1) / 2; + if (w < entries[parent].weight) break; + + /* Parent is not larger, so swap. */ + memswap(&entries[ind], &entries[parent], sizeof(struct queue_entry)); + ind = parent; + } + + return ind; +} + +/** + * @brief Push the task at the given index down the heap until both its children + * have a smaller weight. + * + * @param q The task #queue. + * @param ind The index of the task to be sifted-down in the queue. + * + * @return The new index of the entry. + */ +int queue_sift_down(struct queue *q, int ind) { + /* Set some pointers we will use often. */ + struct queue_entry *entries = q->entries; + const int qcount = q->count; + const float w = entries[ind].weight; + + /* While we still have at least one child... */ + while (1) { + /* Check if we still have children. */ + int child = 2 * ind + 1; + if (child >= qcount) break; + + /* Which of both children is the largest? */ + if (child + 1 < qcount && entries[child + 1].weight > entries[child].weight) + child += 1; + + /* Do we want to swap with the largest child? */ + if (entries[child].weight > w) { + memswap(&entries[ind], &entries[child], sizeof(struct queue_entry)); + ind = child; + } else + break; + } + + return ind; +} /** * @brief Enqueue all tasks in the incoming DEQ. @@ -44,8 +109,7 @@ */ void queue_get_incoming(struct queue *q) { - int *tid = q->tid; - struct task *tasks = q->tasks; + struct queue_entry *entries = q->entries; /* Loop over the incoming DEQ. */ while (1) { @@ -60,33 +124,31 @@ void queue_get_incoming(struct queue *q) { /* Does the queue need to be grown? */ if (q->count == q->size) { - int *temp; + struct queue_entry *temp; q->size *= queue_sizegrow; - if ((temp = (int *)malloc(sizeof(int) * q->size)) == NULL) + if ((temp = (struct queue_entry *)malloc(sizeof(struct queue_entry) * + q->size)) == NULL) error("Failed to allocate new indices."); - memcpy(temp, tid, sizeof(int) * q->count); - free(tid); - q->tid = tid = temp; + memcpy(temp, entries, sizeof(struct queue_entry) * q->count); + free(entries); + q->entries = entries = temp; } /* Drop the task at the end of the queue. */ - tid[q->count] = offset; + entries[q->count].tid = offset; + entries[q->count].weight = q->tasks[offset].weight; q->count += 1; atomic_dec(&q->count_incoming); - /* Shuffle up. */ - for (int k = q->count - 1; k > 0; k = (k - 1) / 2) - if (tasks[tid[k]].weight > tasks[tid[(k - 1) / 2]].weight) { - int temp = tid[k]; - tid[k] = tid[(k - 1) / 2]; - tid[(k - 1) / 2] = temp; - } else - break; + /* Re-heap by bubbling up the new (last) element. */ + queue_bubble_up(q, q->count - 1); +#ifdef SWIFT_DEBUG_CHECK /* Check the queue's consistency. */ - /* for (int k = 1; k < q->count; k++) - if ( tasks[ tid[(k-1)/2] ].weight < tasks[ tid[k] ].weight ) - error( "Queue heap is disordered." ); */ + for (int k = 1; k < q->count; k++) + if (entries[(k - 1) / 2].weight < entries[k].weight) + error("Queue heap is disordered."); +#endif } } @@ -131,8 +193,9 @@ void queue_init(struct queue *q, struct task *tasks) { /* Allocate the task list if needed. */ q->size = queue_sizeinit; - if ((q->tid = (int *)malloc(sizeof(int) * q->size)) == NULL) - error("Failed to allocate queue tids."); + if ((q->entries = (struct queue_entry *)malloc(sizeof(struct queue_entry) * + q->size)) == NULL) + error("Failed to allocate queue entries."); /* Set the tasks pointer. */ q->tasks = tasks; @@ -185,109 +248,53 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, } /* Set some pointers we will use often. */ - int *qtid = q->tid; + struct queue_entry *entries = q->entries; struct task *qtasks = q->tasks; const int old_qcount = q->count; - /* Data for the sliding window in which to try the task with the - best overlap with the previous task. */ - struct { - int ind, tid; - float score; - } window[queue_search_window]; - int window_count = 0; - int tid = -1; - int ind = -1; - /* Loop over the queue entries. */ - for (int k = 0; k < old_qcount; k++) { - if (k < queue_search_window) { - window[window_count].ind = k; - window[window_count].tid = qtid[k]; - window[window_count].score = task_overlap(prev, &qtasks[qtid[k]]); - window_count += 1; - } else { - /* Find the task with the largest overlap. */ - int ind_max = 0; - for (int i = 1; i < window_count; i++) - if (window[i].score > window[ind_max].score) ind_max = i; - - /* Try to lock that task. */ - if (task_lock(&qtasks[window[ind_max].tid])) { - tid = window[ind_max].tid; - ind = window[ind_max].ind; - // message("best task has overlap %f.", window[ind_max].score); - break; - - /* Otherwise, replace it with a new one from the queue. */ - } else { - window[ind_max].ind = k; - window[ind_max].tid = qtid[k]; - window[ind_max].score = task_overlap(prev, &qtasks[qtid[k]]); - } - } - } + int ind; + for (ind = 0; ind < old_qcount; ind++) { - /* If we didn't get a task, loop through whatever is left in the window. */ - if (tid < 0) { - while (window_count > 0) { - int ind_max = 0; - for (int i = 1; i < window_count; i++) - if (window[i].score > window[ind_max].score) ind_max = i; - if (task_lock(&qtasks[window[ind_max].tid])) { - tid = window[ind_max].tid; - ind = window[ind_max].ind; - // message("best task has overlap %f.", window[ind_max].score); - break; - } else { - window_count -= 1; - window[ind_max] = window[window_count]; - } + /* Try to lock the next task. */ + if (task_lock(&qtasks[entries[ind].tid])) break; + + /* Should we de-prioritize this task? */ + if ((1ULL << qtasks[entries[ind].tid].type) & + queue_lock_fail_reweight_mask) { + /* Scale the task's weight. */ + entries[ind].weight *= queue_lock_fail_reweight_factor; + + /* Send it down the binary heap. */ + if (queue_sift_down(q, ind) != ind) ind -= 1; } } /* Did we get a task? */ - if (ind >= 0) { + if (ind < old_qcount) { /* Another one bites the dust. */ const int qcount = q->count -= 1; /* Get a pointer on the task that we want to return. */ - res = &qtasks[tid]; + res = &qtasks[entries[ind].tid]; /* Swap this task with the last task and re-heap. */ - int k = ind; - if (k < qcount) { - qtid[k] = qtid[qcount]; - const float w = qtasks[qtid[k]].weight; - while (k > 0 && w > qtasks[qtid[(k - 1) / 2]].weight) { - int temp = q->tid[k]; - q->tid[k] = q->tid[(k - 1) / 2]; - q->tid[(k - 1) / 2] = temp; - k = (k - 1) / 2; - } - int i; - while ((i = 2 * k + 1) < qcount) { - if (i + 1 < qcount && - qtasks[qtid[i + 1]].weight > qtasks[qtid[i]].weight) - i += 1; - if (qtasks[qtid[i]].weight > w) { - int temp = qtid[i]; - qtid[i] = qtid[k]; - qtid[k] = temp; - k = i; - } else - break; - } + if (ind < qcount) { + entries[ind] = entries[qcount]; + ind = queue_bubble_up(q, ind); + ind = queue_sift_down(q, ind); } } else res = NULL; +#ifdef SWIFT_DEBUG_CHECKS /* Check the queue's consistency. */ - /* for ( k = 1 ; k < q->count ; k++ ) - if ( qtasks[ qtid[(k-1)/2] ].weight < qtasks[ qtid[k] ].weight ) - error( "Queue heap is disordered." ); */ + for (int k = 1; k < q->count; k++) + if (entries[(k - 1) / 2].weight < entries[k].weight) + error("Queue heap is disordered."); +#endif /* Release the task lock. */ if (lock_unlock(qlock) != 0) error("Unlocking the qlock failed.\n"); @@ -298,7 +305,7 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, void queue_clean(struct queue *q) { - free(q->tid); + free(q->entries); free(q->tid_incoming); } @@ -322,7 +329,7 @@ void queue_dump(int nodeID, int index, FILE *file, struct queue *q) { /* Loop over the queue entries. */ for (int k = 0; k < q->count; k++) { - struct task *t = &q->tasks[q->tid[k]]; + struct task *t = &q->tasks[q->entries[k].tid]; fprintf(file, "%d %d %d %s %s %.2f\n", nodeID, index, k, taskID_names[t->type], subtaskID_names[t->subtype], t->weight); diff --git a/src/queue.h b/src/queue.h index e3c7b6f7b2059df14c14813b34946df2a1935c74..0576403bef8ed66dd408d40748435530155a7901 100644 --- a/src/queue.h +++ b/src/queue.h @@ -32,6 +32,12 @@ #define queue_incoming_size 10240 #define queue_struct_align 64 +/* Constants dealing with task de-priorization. */ +#define queue_lock_fail_reweight_factor 0.5 +/* #define queue_lock_fail_reweight_mask \ + ((1ULL << task_type_send) | (1ULL << task_type_recv)) */ +#define queue_lock_fail_reweight_mask ((1ULL << task_type_count) - 1) + /* Counters. */ enum { queue_counter_swap = 0, @@ -39,6 +45,17 @@ enum { }; extern int queue_counter[queue_counter_count]; +/** Struct containing a task offset and a weight, used to build the binary heap + * of tasks in the queue. */ +struct queue_entry { + /* The offset of the task in the task list. */ + int tid; + + /* The weight of the task. This is stored in queue as well since it may well + * be adjusted dynamically. */ + float weight; +}; + /** The queue struct. */ struct queue { @@ -51,8 +68,8 @@ struct queue { /* The actual tasks to which the indices refer. */ struct task *tasks; - /* The task indices. */ - int *tid; + /* The task indices and weights. */ + struct queue_entry *entries; /* DEQ for incoming tasks. */ int *tid_incoming;