Skip to content
Snippets Groups Projects
Commit a5adf9bc authored by Aidan Chalk's avatar Aidan Chalk
Browse files

Fixed test_matadd and the errors it found to do with send/recv tasks becoming...

Fixed test_matadd and the errors it found to do with send/recv tasks becoming visible to user code (rather than being solely contained in the library. It finds severe issues with load balancing (I believe) slash movement of data when the initial layout would be load balanced. Need a python script to generate task plots from this.
parent c8305199
No related branches found
No related tags found
No related merge requests found
......@@ -77,5 +77,5 @@ test_qr_mpi_cblas_LDFLAGS = $(MPI_THREAD_LIBS)
test_matadd_mpi_SOURCES = test_matadd_mpi.c
test_matadd_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI
test_madadd_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
test_matadd_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS)
test_matadd_mpi_LDFLAGS = $(MPI_THREAD_LIBS)
......@@ -32,7 +32,7 @@
#include <fenv.h>
#include <mpi.h>
#define NO_TASK_TIMERS
#define TASK_TIMERS
#define NO_LOAD_BALANCE_EXACT
/* Local includes. */
......@@ -61,7 +61,7 @@ void addMatrixTile(double *A, double *B, double *C){
//Matrix is matsize*matsize tiles of TILESIZE*TILESIZE
void test_matadd(int matsize,int nr_threads`){
void test_matadd(int matsize,int nr_threads){
struct qsched s;
......@@ -92,9 +92,9 @@ void test_matadd(int matsize,int nr_threads`){
local_B = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE);
local_C = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE);
if(local_A == NULL || local_B == NULL || local_C == NULL) error("Failed to allocate arrays");
for(int i = 0; i < numRows*matsize*TILESIZE*TILESIZE){
local_A = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
local_B = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
for(int i = 0; i < numRows*matsize*TILESIZE*TILESIZE; i++){
local_A[i] = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
local_B[i] = 2.0 * ((double)rand()) / RAND_MAX - 1.0;
}
bzero(local_C,sizeof(double)*numRows*matsize*TILESIZE*TILESIZE);
......@@ -111,24 +111,24 @@ void test_matadd(int matsize,int nr_threads`){
for(int i = 0; i < numRows*matsize; i++){
local_A_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_A_tiles[i]);
memcpy(local_A_tiles[i], local_A[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
memcpy(local_A_tiles[i], &local_A[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
local_B_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_B_tiles[i]);
memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
memcpy(local_B_tiles[i], &local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
local_C_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_C_tiles[i]);
memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
memcpy(local_C_tiles[i], &local_C[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE);
}
message("Synchronizing resources.");
qsched_sync_resources(&s);
//Generate tasks
qsched_task_t /* *local_tid = NULL,*/ tid_new = -1;
long long int MPI_data[3];
long long int MPI_Data[3];
// local_tid = malloc(sizeof(qsched_task_t)*numRows*matsize);
for(int i = 0; i < numRows*matsize; i++){
MPI_Data[0] = local_A_rid[i];
MPI_Data[1] = local_B_rid[i];
MPI_Data[2] = local_C_rid[i];
tid_new = qsched_addtask(&s, add_task, task_flag_none, MPI_data,
tid_new = qsched_addtask(&s, add_task, task_flag_none, MPI_Data,
sizeof(long long int) * 3, 200);
qsched_addlock(&s, tid_new, local_C_rid[i]);
qsched_adduse(&s, tid_new, local_A_rid[i]);
......@@ -156,6 +156,33 @@ void test_matadd(int matsize,int nr_threads`){
// Print off a hello world message
printf("Hello world from processor rank = %i, count_ranks = %i\n",
s.rank, s.count_ranks);
#ifdef TASK_TIMERS
//Each rank wants to loop through the tasks they executed and output the data, then synchronize.
int j;
FILE *file;
if(s.rank == 0)
{
file = fopen("task_timers.tks", "w");
fclose(file);
}
for(int i = 0; i < s.count_ranks; i++)
{
if(i == s.rank)
{
file = fopen("task_timers.tks", "a");
for(j = 0; j < s.task_ranks[s.count_ranks]; j++)
{
if(s.tasks[j].node_executed == s.rank)
{
struct task *t = &s.tasks[j];
fprintf(file, "%lli %i %llu %llu %i %i\n", t->id, t->type, t->task_start, t->task_finish, t->node_executed, t->thread_executed);
}
}
fclose(file);
}
MPI_Barrier(s.comm);
}
#endif
}
......
......@@ -147,7 +147,7 @@ void* qsched_getresdata( struct qsched *s, qsched_res_t res_id )
}
}
if(s->res[getindex(res_id, s)].data == NULL)
error("Data is NULL");
error("Data is NULL for resource %lli",res_id);
return s->res[getindex(res_id, s)].data;
}
#endif
......@@ -3150,6 +3150,7 @@ for(i = 0; i < node_count; i++)
if( METIS_PartGraphKway(&node_count, &one, edgelist_pos, edgelist_new, nodelist, NULL, edgelist_vwgt, &temp_count_ranks, NULL, NULL,options, objval, nodeIDs) != METIS_OK)
error("Failed to partition\n");
/*qsched_partition_random(s, nodelist, edgelist_new, node_count, nodeIDs );*/
/*qsched_partition_roundrobin(s, nodelist, edgelist_new, node_count, nodeIDs);*/
}
toc = getticks();
message("METIS_PartGraphKway took %lli (= %.3fms) ticks\n", toc-tic, (float)(toc-tic)/2.67e6);
......@@ -3490,7 +3491,6 @@ if(tid == NULL)
}
/* Sort the tasks topologically. */
for ( j = 0 , k = 0 ; k < count ; k++ )
if ( s->tasks[k].wait == 0 ) {
tid[j] = s->tasks[k].id;
......@@ -3702,7 +3702,9 @@ void *qsched_pthread_run ( void *in ) {
#ifndef WITH_MPI
s->fun( t->type , &s->data[ t->data ] );
#else
if(t->type != task_type_send && t->type != task_type_recv){
s->fun( s, t->type , &s->data[ t->data ] );
}
#endif
/* Mark that task as done. */
qsched_done( s , t );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment