diff --git a/src/cuda_queue.cu b/src/cuda_queue.cu index 8c23ad39a49aeca0794749a164ea4a726a92a4a5..f30314efd6790ecef9147c1afcf151a05004d6fd 100644 --- a/src/cuda_queue.cu +++ b/src/cuda_queue.cu @@ -982,16 +982,24 @@ for(i = 0; i < s->count; i++) for(j = 0; j < t->nr_uses; j++) { - if(num_uses[t->uses[j]] == size_uses[t->uses[j]]) + int currentID = t->uses[j]; + struct res *current = &s->res[t->uses[j]]; + while( current->parent >= 0 && current->utask == s->res[current->parent].utask ) + { + currentID = current->parent; + current = &s->res[current->parent]; + } + if(num_uses[currentID] == size_uses[currentID]) { /* Stretch. */ - int* temp = (int*) malloc(sizeof(int) * size_uses[t->uses[j]] * 2 ); - memcpy( temp, usage_list[t->uses[j]], sizeof(int) * size_uses[t->uses[j]]); - free(usage_list[t->uses[j]]); - usage_list[t->uses[j]] = temp; - size_uses[t->uses[j]] *=2; + int* temp = (int*) malloc(sizeof(int) * size_uses[currentID] * 2 ); + memcpy( temp, usage_list[currentID], sizeof(int) * size_uses[currentID]); + free(usage_list[currentID]); + usage_list[currentID] = temp; + size_uses[currentID] *=2; } - usage_list[t->uses[j]][num_uses[t->uses[j]]++] = i; + + usage_list[currentID][num_uses[currentID]++] = i; } for(j = 0; j < t->nr_locks; j++) { @@ -999,16 +1007,23 @@ for(i = 0; i < s->count; i++) s->tasks[s->res[t->locks[j]].utask].wait_init +=1 ; deps_new_key[(t->unlocks - deps_new) + t->nr_unlocks] = i; t->nr_unlocks++; - if(num_uses[t->locks[j]] == size_uses[t->locks[j]]) + int currentID = t->locks[j]; + struct res *current = &s->res[t->locks[j]]; + while( current->parent >= 0 && current->utask == s->res[current->parent].utask ) + { + currentID = current->parent; + current = &s->res[current->parent]; + } + if(num_uses[currentID] == size_uses[currentID]) { /* Stretch. */ - int* temp = (int*) malloc(sizeof(int) * size_uses[t->locks[j]] * 2 ); - memcpy( temp, usage_list[t->locks[j]], sizeof(int) * size_uses[t->locks[j]]); - free(usage_list[t->locks[j]]); - usage_list[t->locks[j]] = temp; - size_uses[t->locks[j]] *=2; + int* temp = (int*) malloc(sizeof(int) * size_uses[currentID] * 2 ); + memcpy( temp, usage_list[currentID], sizeof(int) * size_uses[currentID]); + free(usage_list[currentID]); + usage_list[currentID] = temp; + size_uses[currentID] *=2; } - usage_list[t->locks[j]][num_uses[t->locks[j]]++] = i; + usage_list[currentID][num_uses[currentID]++] = i; } } @@ -1036,16 +1051,19 @@ for(i = 0; i < s->count_res; i++ ) int parent = s->res[ID].parent; struct res *resource = &s->res[ res[i] ]; -/* if(s->res[ID].task == s->res[parent].task) - continue;*/ + if(s->res[ID].task == s->res[parent].task) + continue; /* Loop through children if there are any. */ if(numChildren > 0) { /* Do unload task stuff first. */ + /* Set the pointer for this unload task */ if(!tasks_assigned[resource->utask]) { s->tasks[resource->utask].unlocks = &deps_new[s->count_deps]; s->tasks[resource->utask].nr_unlocks = 0; } + + /* Do we need to extend the array?*/ if(s->count_deps + numChildren > s->size_deps) { qsched_task_t *temp1, *temp2; @@ -1069,12 +1087,15 @@ for(i = 0; i < s->count_res; i++ ) deps_new = temp1; deps_new_key = temp2; } + + /* Loop over the children.*/ for(j = 0; j < numChildren; j++) { struct res *child = &s->res[ res[sorted[ ID ]+j] ]; - + /* If the childs unload task is not our unload task*/ if( child->utask != resource->utask ) { + /* Make our unload task unlock the childs unlock task.*/ s->tasks[resource->utask].unlocks[ s->tasks[resource->utask].nr_unlocks ] = child->utask; s->tasks[child->utask].wait_init += 1; deps_new_key[s->count_deps] = resource->utask; @@ -1088,6 +1109,7 @@ for(i = 0; i < s->count_res; i++ ) } /* Do load task stuff. */ + /* Set the pointer for this load task */ if(!tasks_assigned[resource->task]) { s->tasks[resource->task].unlocks = &deps_new[s->count_deps]; s->tasks[resource->task].nr_unlocks = 0; @@ -1097,8 +1119,11 @@ for(i = 0; i < s->count_res; i++ ) for(j = 0; j < numChildren; j++) { struct res *child = &s->res[ res[sorted[ ID ]+j] ]; + + /* If the child's utask is the same as the parents'. */ if( child->utask == resource->utask ) { + /* Extend if needed.*/ if( s->size_deps < s->count_deps + num_uses[res[ sorted[ ID ] + j ] ] ) { qsched_task_t *temp1, *temp2; @@ -1121,7 +1146,8 @@ for(i = 0; i < s->count_res; i++ ) free(deps_new_key); deps_new = temp1; deps_new_key = temp2; - } + } + /* The parent unlocks all the tasks that use the child resources. */ for(k = 0; k < num_uses[ res[ sorted[ ID ] + j ] ]; k++) { s->tasks[resource->task].unlocks[ s->tasks[resource->task].nr_unlocks ] = usage_list[ res[ sorted[ ID ] +j ] ][k]; @@ -1134,8 +1160,7 @@ for(i = 0; i < s->count_res; i++ ) } } -if(s->res[ID].task == s->res[parent].task) - continue; + if( s->size_deps < s->count_deps + 1 + num_uses[ res[ i ] ]) { qsched_task_t *temp1, *temp2;