diff --git a/examples/Makefile.am b/examples/Makefile.am index be88cc27d0485962b88be2dec2593928e9a6f699..5e968e45169ff93ab471379c4db1390dc3632050 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -77,5 +77,5 @@ test_qr_mpi_cblas_LDFLAGS = $(MPI_THREAD_LIBS) test_matadd_mpi_SOURCES = test_matadd_mpi.c test_matadd_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI -test_madadd_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS) +test_matadd_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS) test_matadd_mpi_LDFLAGS = $(MPI_THREAD_LIBS) diff --git a/examples/test_matadd_mpi.c b/examples/test_matadd_mpi.c index f7f8e707cd9e417d1ceffe9490887b74dd00587a..0a90c108428db9781eb62e71736ef5ced48ee119 100644 --- a/examples/test_matadd_mpi.c +++ b/examples/test_matadd_mpi.c @@ -32,7 +32,7 @@ #include <fenv.h> #include <mpi.h> -#define NO_TASK_TIMERS +#define TASK_TIMERS #define NO_LOAD_BALANCE_EXACT /* Local includes. */ @@ -61,7 +61,7 @@ void addMatrixTile(double *A, double *B, double *C){ //Matrix is matsize*matsize tiles of TILESIZE*TILESIZE -void test_matadd(int matsize,int nr_threads`){ +void test_matadd(int matsize,int nr_threads){ struct qsched s; @@ -92,9 +92,9 @@ void test_matadd(int matsize,int nr_threads`){ local_B = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE); local_C = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE); if(local_A == NULL || local_B == NULL || local_C == NULL) error("Failed to allocate arrays"); - for(int i = 0; i < numRows*matsize*TILESIZE*TILESIZE){ - local_A = 2.0 * ((double)rand()) / RAND_MAX - 1.0; - local_B = 2.0 * ((double)rand()) / RAND_MAX - 1.0; + for(int i = 0; i < numRows*matsize*TILESIZE*TILESIZE; i++){ + local_A[i] = 2.0 * ((double)rand()) / RAND_MAX - 1.0; + local_B[i] = 2.0 * ((double)rand()) / RAND_MAX - 1.0; } bzero(local_C,sizeof(double)*numRows*matsize*TILESIZE*TILESIZE); @@ -111,24 +111,24 @@ void test_matadd(int matsize,int nr_threads`){ for(int i = 0; i < numRows*matsize; i++){ local_A_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_A_tiles[i]); - memcpy(local_A_tiles[i], local_A[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); + memcpy(local_A_tiles[i], &local_A[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); local_B_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_B_tiles[i]); - memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); + memcpy(local_B_tiles[i], &local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); local_C_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_C_tiles[i]); - memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); + memcpy(local_C_tiles[i], &local_C[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); } message("Synchronizing resources."); qsched_sync_resources(&s); //Generate tasks qsched_task_t /* *local_tid = NULL,*/ tid_new = -1; - long long int MPI_data[3]; + long long int MPI_Data[3]; // local_tid = malloc(sizeof(qsched_task_t)*numRows*matsize); for(int i = 0; i < numRows*matsize; i++){ MPI_Data[0] = local_A_rid[i]; MPI_Data[1] = local_B_rid[i]; MPI_Data[2] = local_C_rid[i]; - tid_new = qsched_addtask(&s, add_task, task_flag_none, MPI_data, + tid_new = qsched_addtask(&s, add_task, task_flag_none, MPI_Data, sizeof(long long int) * 3, 200); qsched_addlock(&s, tid_new, local_C_rid[i]); qsched_adduse(&s, tid_new, local_A_rid[i]); @@ -156,6 +156,33 @@ void test_matadd(int matsize,int nr_threads`){ // Print off a hello world message printf("Hello world from processor rank = %i, count_ranks = %i\n", s.rank, s.count_ranks); +#ifdef TASK_TIMERS +//Each rank wants to loop through the tasks they executed and output the data, then synchronize. +int j; +FILE *file; +if(s.rank == 0) +{ + file = fopen("task_timers.tks", "w"); + fclose(file); +} +for(int i = 0; i < s.count_ranks; i++) +{ + if(i == s.rank) + { + file = fopen("task_timers.tks", "a"); + for(j = 0; j < s.task_ranks[s.count_ranks]; j++) + { + if(s.tasks[j].node_executed == s.rank) + { + struct task *t = &s.tasks[j]; + fprintf(file, "%lli %i %llu %llu %i %i\n", t->id, t->type, t->task_start, t->task_finish, t->node_executed, t->thread_executed); + } + } + fclose(file); + } + MPI_Barrier(s.comm); +} +#endif } diff --git a/src/qsched.c b/src/qsched.c index ffc74f5a13646d80b1cbe6e113ac9dcf3e7a3562..6edfb7fe4e324042f2e06309011073b371d06075 100644 --- a/src/qsched.c +++ b/src/qsched.c @@ -147,7 +147,7 @@ void* qsched_getresdata( struct qsched *s, qsched_res_t res_id ) } } if(s->res[getindex(res_id, s)].data == NULL) - error("Data is NULL"); + error("Data is NULL for resource %lli",res_id); return s->res[getindex(res_id, s)].data; } #endif @@ -3150,6 +3150,7 @@ for(i = 0; i < node_count; i++) if( METIS_PartGraphKway(&node_count, &one, edgelist_pos, edgelist_new, nodelist, NULL, edgelist_vwgt, &temp_count_ranks, NULL, NULL,options, objval, nodeIDs) != METIS_OK) error("Failed to partition\n"); /*qsched_partition_random(s, nodelist, edgelist_new, node_count, nodeIDs );*/ + /*qsched_partition_roundrobin(s, nodelist, edgelist_new, node_count, nodeIDs);*/ } toc = getticks(); message("METIS_PartGraphKway took %lli (= %.3fms) ticks\n", toc-tic, (float)(toc-tic)/2.67e6); @@ -3490,7 +3491,6 @@ if(tid == NULL) } /* Sort the tasks topologically. */ - for ( j = 0 , k = 0 ; k < count ; k++ ) if ( s->tasks[k].wait == 0 ) { tid[j] = s->tasks[k].id; @@ -3702,7 +3702,9 @@ void *qsched_pthread_run ( void *in ) { #ifndef WITH_MPI s->fun( t->type , &s->data[ t->data ] ); #else + if(t->type != task_type_send && t->type != task_type_recv){ s->fun( s, t->type , &s->data[ t->data ] ); + } #endif /* Mark that task as done. */ qsched_done( s , t );