Skip to content
Snippets Groups Projects
Commit d1ff684d authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

use flags for phtread/yield.

parent 4797042d
No related branches found
No related tags found
No related merge requests found
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
******************************************************************************/ ******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Standard includes. */ /* Standard includes. */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -61,21 +64,41 @@ void matmul ( int m , int n , int k , double *a , int lda , double *b , int ldb ...@@ -61,21 +64,41 @@ void matmul ( int m , int n , int k , double *a , int lda , double *b , int ldb
void test2 ( int m , int n , int k , int nr_threads ) { void test2 ( int m , int n , int k , int nr_threads ) {
int i, j, kk, qid, data[3], *d; int i, j, kk, data[3];
qsched_task_t tid; qsched_task_t tid;
qsched_res_t rid; qsched_res_t rid;
struct qsched s; struct qsched s;
struct task *t;
double *a, *b, *c, *res, err = 0.0, irm = 1.0/RAND_MAX; double *a, *b, *c, *res, err = 0.0, irm = 1.0/RAND_MAX;
ticks tic_task, toc_task, tic_ref, toc_ref; ticks tic_task, toc_task, tic_ref, toc_ref;
/* Runner function to pass to the scheduler. */
void runner ( int type , void *data ) {
/* Decode the task data. */
int *d = (int *)data;
/* Decode and execute the task. */
switch ( type ) {
case 1:
// message( "thread %i working on block [ %i , %i ] with k=%i, lock[0]=%i." , qid , d[0] , d[1] , d[2] , t->locks[0] ); fflush(stdout);
matmul( 32 , 32 , 32 , &a[ d[2]*32*m*32 + d[0]*32 ] , m*32 , &b[ k*32*d[1]*32 + d[2]*32 ] , k*32 , &c[ d[0]*32 + m*32*d[1]*32 ] , m*32 );
break;
default:
error( "Unknown task type." );
}
}
/* Tell the user something about the test. */ /* Tell the user something about the test. */
message( "computing a tiled matrix multiplication of the form " message( "computing a tiled matrix multiplication of the form "
"C_ij = A_i: * B_:j, with tasks for each k where C_ij += A_ik*B_kj." ); "C_ij = A_i: * B_:j, with tasks for each k where C_ij += A_ik*B_kj." );
/* Init the sched. */ /* Init the sched. */
bzero( &s , sizeof(struct qsched) ); bzero( &s , sizeof(struct qsched) );
qsched_init( &s , nr_threads , m * n ); qsched_init( &s , nr_threads, qsched_flag_none );
/* Allocate the matrices. */ /* Allocate the matrices. */
if ( ( a = (double *)malloc( sizeof(double) * m * k * 32 * 32 ) ) == NULL || if ( ( a = (double *)malloc( sizeof(double) * m * k * 32 * 32 ) ) == NULL ||
...@@ -99,50 +122,18 @@ void test2 ( int m , int n , int k , int nr_threads ) { ...@@ -99,50 +122,18 @@ void test2 ( int m , int n , int k , int nr_threads ) {
data[0] = i; data[1] = j; data[0] = i; data[1] = j;
for ( kk = 0 ; kk < k ; kk++ ) { for ( kk = 0 ; kk < k ; kk++ ) {
data[2] = kk; data[2] = kk;
tid = qsched_newtask( &s , 1 , task_flag_none , data , 3*sizeof(int) , 1 ); tid = qsched_addtask( &s , 1 , task_flag_none , data , 3*sizeof(int) , 1 );
qsched_addlock( &s , tid , rid ); qsched_addlock( &s , tid , rid );
} }
} }
/* Prepare the sched for execution. */
qsched_prepare( &s );
/* Parallel loop. */
tic_task = getticks();
#pragma omp parallel private(t,qid,d)
{
/* Get the ID of this runner. */
if ( ( qid = omp_get_thread_num() ) < nr_threads ) {
/* Main loop. */
while ( 1 ) {
/* Get a task, break if unsucessful. */
if ( ( t = qsched_gettask( &s , qid ) ) == NULL )
break;
/* Decode and execute the task. */
switch ( t->type ) {
case 1:
d = qsched_getdata( &s , t );
// message( "thread %i working on block [ %i , %i ] with k=%i, lock[0]=%i." , qid , d[0] , d[1] , d[2] , t->locks[0] ); fflush(stdout);
matmul( 32 , 32 , 32 , &a[ d[2]*32*m*32 + d[0]*32 ] , m*32 , &b[ k*32*d[1]*32 + d[2]*32 ] , k*32 , &c[ d[0]*32 + m*32*d[1]*32 ] , m*32 );
break;
default:
error( "Unknown task type." );
}
/* Clean up afterwards. */ /* Run the scheduler. */
qsched_done( &s , t ); tic_task = getticks();
qsched_run( &s , nr_threads , runner );
} /* main loop. */
} /* valid queue? */
}
toc_task = getticks(); toc_task = getticks();
/* Verify the result. */ /* Verify the result. */
tic_ref = getticks(); tic_ref = getticks();
matmul( m*32 , n*32 , k*32 , a , m*32 , b , k*32 , res , m*32 ); matmul( m*32 , n*32 , k*32 , a , m*32 , b , k*32 , res , m*32 );
...@@ -178,21 +169,39 @@ void test2 ( int m , int n , int k , int nr_threads ) { ...@@ -178,21 +169,39 @@ void test2 ( int m , int n , int k , int nr_threads ) {
void test1 ( int m , int n , int k , int nr_threads ) { void test1 ( int m , int n , int k , int nr_threads ) {
int i, j, qid, data[2], *d; int i, j, data[2];
qsched_task_t tid; qsched_task_t tid;
qsched_res_t rid; qsched_res_t rid;
struct qsched s; struct qsched s;
struct task *t;
double *a, *b, *c, *res, err = 0.0, irm = 1.0/RAND_MAX; double *a, *b, *c, *res, err = 0.0, irm = 1.0/RAND_MAX;
ticks tic_task, toc_task, tic_ref, toc_ref; ticks tic_task, toc_task, tic_ref, toc_ref;
/* Runner function to pass to the scheduler. */
void runner ( int type , void *data ) {
/* Decode the task data. */
int *d = (int *)data;
/* Decode and execute the task. */
switch ( type ) {
case 1:
matmul( 32 , 32 , k*32 , &a[ d[0]*32 ] , m*32 , &b[ k*32*d[1]*32 ] , k*32 , &c[ d[0]*32 + m*32*d[1]*32 ] , m*32 );
break;
default:
error( "Unknown task type." );
}
}
/* Tell the user something about the test. */ /* Tell the user something about the test. */
message( "computing a tiled matrix multiplication of the form " message( "computing a tiled matrix multiplication of the form "
"C_ij = A_i: * B_:j, with a single task per C_ij." ); "C_ij = A_i: * B_:j, with a single task per C_ij." );
/* Init the sched. */ /* Init the sched. */
bzero( &s , sizeof(struct qsched) ); bzero( &s , sizeof(struct qsched) );
qsched_init( &s , nr_threads , m * n ); qsched_init( &s , nr_threads , qsched_flag_none );
/* Allocate the matrices. */ /* Allocate the matrices. */
if ( ( a = (double *)malloc( sizeof(double) * m * k * 32 * 32 ) ) == NULL || if ( ( a = (double *)malloc( sizeof(double) * m * k * 32 * 32 ) ) == NULL ||
...@@ -214,49 +223,17 @@ void test1 ( int m , int n , int k , int nr_threads ) { ...@@ -214,49 +223,17 @@ void test1 ( int m , int n , int k , int nr_threads ) {
for ( j = 0 ; j < n ; j++ ) { for ( j = 0 ; j < n ; j++ ) {
data[0] = i; data[1] = j; data[0] = i; data[1] = j;
rid = qsched_addres( &s , -1 ); rid = qsched_addres( &s , -1 );
tid = qsched_newtask( &s , 1 , task_flag_none , data , 2*sizeof(int) , 1 ); tid = qsched_addtask( &s , 1 , task_flag_none , data , 2*sizeof(int) , 1 );
qsched_addlock( &s , tid , rid ); qsched_addlock( &s , tid , rid );
} }
/* Prepare the sched for execution. */
qsched_prepare( &s );
/* Parallel loop. */ /* Run the scheduler. */
tic_task = getticks(); tic_task = getticks();
#pragma omp parallel private(t,qid,d) qsched_run( &s , nr_threads , runner );
{
/* Get the ID of this runner. */
if ( ( qid = omp_get_thread_num() ) < nr_threads ) {
/* Main loop. */
while ( 1 ) {
/* Get a task, break if unsucessful. */
if ( ( t = qsched_gettask( &s , qid ) ) == NULL )
break;
/* Decode and execute the task. */
switch ( t->type ) {
case 1:
d = qsched_getdata( &s , t );
// message( "thread %i working on block [ %i , %i ]." , qid , d[0] , d[1] ); fflush(stdout);
matmul( 32 , 32 , k*32 , &a[ d[0]*32 ] , m*32 , &b[ k*32*d[1]*32 ] , k*32 , &c[ d[0]*32 + m*32*d[1]*32 ] , m*32 );
break;
default:
error( "Unknown task type." );
}
/* Clean up afterwards. */
qsched_done( &s , t );
} /* main loop. */
} /* valid thread. */
}
toc_task = getticks(); toc_task = getticks();
/* Verify the result. */ /* Verify the result. */
tic_ref = getticks(); tic_ref = getticks();
matmul( m*32 , n*32 , k*32 , a , m*32 , b , k*32 , res , m*32 ); matmul( m*32 , n*32 , k*32 , a , m*32 , b , k*32 , res , m*32 );
......
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
******************************************************************************/ ******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Standard includes. */ /* Standard includes. */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -134,7 +137,7 @@ void cell_split ( struct cell *c , struct qsched *s ) { ...@@ -134,7 +137,7 @@ void cell_split ( struct cell *c , struct qsched *s ) {
c->res = qsched_addres( s , qsched_res_none ); c->res = qsched_addres( s , qsched_res_none );
/* Attach a center-of-mass task to the cell. */ /* Attach a center-of-mass task to the cell. */
c->com_tid = qsched_newtask( s , task_type_com , task_flag_none , &c , sizeof(struct cell *) , 1 ); c->com_tid = qsched_addtask( s , task_type_com , task_flag_none , &c , sizeof(struct cell *) , 1 );
/* Does this cell need to be split? */ /* Does this cell need to be split? */
if ( count > cell_maxparts ) { if ( count > cell_maxparts ) {
...@@ -520,7 +523,7 @@ void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) { ...@@ -520,7 +523,7 @@ void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) {
data[0] = ci; data[1] = NULL; data[0] = ci; data[1] = NULL;
/* Create the task. */ /* Create the task. */
tid = qsched_newtask( s , task_type_self , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*ci->count/2 ); tid = qsched_addtask( s , task_type_self , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*ci->count/2 );
/* Add the resource. */ /* Add the resource. */
qsched_addlock( s , tid , ci->res ); qsched_addlock( s , tid , ci->res );
...@@ -560,13 +563,13 @@ void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) { ...@@ -560,13 +563,13 @@ void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) {
/* Interact ci's parts with cj as a cell. */ /* Interact ci's parts with cj as a cell. */
data[0] = ci; data[1] = cj; data[0] = ci; data[1] = cj;
tid = qsched_newtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count ); tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count );
qsched_addlock( s , tid , ci->res ); qsched_addlock( s , tid , ci->res );
qsched_addunlock( s , cj->com_tid , tid ); qsched_addunlock( s , cj->com_tid , tid );
/* Interact cj's parts with ci as a cell. */ /* Interact cj's parts with ci as a cell. */
data[0] = cj; data[1] = ci; data[0] = cj; data[1] = ci;
tid = qsched_newtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count ); tid = qsched_addtask( s , task_type_pair_pc , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count );
qsched_addlock( s , tid , cj->res ); qsched_addlock( s , tid , cj->res );
qsched_addunlock( s , ci->com_tid , tid ); qsched_addunlock( s , ci->com_tid , tid );
...@@ -579,7 +582,7 @@ void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) { ...@@ -579,7 +582,7 @@ void create_tasks ( struct qsched *s , struct cell *ci , struct cell *cj ) {
data[0] = ci; data[1] = cj; data[0] = ci; data[1] = cj;
/* Create the task. */ /* Create the task. */
tid = qsched_newtask( s , task_type_pair , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*cj->count ); tid = qsched_addtask( s , task_type_pair , task_flag_none , data , sizeof(struct cell *) * 2 , ci->count*cj->count );
/* Add the resources. */ /* Add the resources. */
qsched_addlock( s , tid , ci->res ); qsched_addlock( s , tid , ci->res );
...@@ -614,8 +617,33 @@ void test_bh ( int N , int nr_threads ) { ...@@ -614,8 +617,33 @@ void test_bh ( int N , int nr_threads ) {
struct part *parts; struct part *parts;
struct qsched s; struct qsched s;
/* Runner function. */
void runner ( int type , void *data ) {
/* Decode the data. */
struct cell **d = (struct cell **)data;
/* Decode and execute the task. */
switch ( type ) {
case task_type_self:
iact_self( d[0] );
break;
case task_type_pair:
iact_pair( d[0] , d[1] );
break;
case task_type_pair_pc:
iact_pair_pc( d[0] , d[1] );
break;
case task_type_com:
comp_com( d[0] );
break;
default:
error( "Unknown task type." );
}
}
/* Initialize the scheduler. */ /* Initialize the scheduler. */
qsched_init( &s , nr_threads , 1000 ); qsched_init( &s , nr_threads , qsched_flag_yield );
/* Init and fill the particle array. */ /* Init and fill the particle array. */
if ( ( parts = (struct part *)malloc( sizeof(struct part) * N ) ) == NULL ) if ( ( parts = (struct part *)malloc( sizeof(struct part) * N ) ) == NULL )
...@@ -642,61 +670,16 @@ void test_bh ( int N , int nr_threads ) { ...@@ -642,61 +670,16 @@ void test_bh ( int N , int nr_threads ) {
/* Create the tasks. */ /* Create the tasks. */
create_tasks( &s , root , NULL ); create_tasks( &s , root , NULL );
/* Prepare the scheduler. */ /* Execute the tasks. */
qsched_prepare( &s ); qsched_run( &s , nr_threads , runner );
/* Parallel loop. */
#pragma omp parallel
{
int qid;
struct cell **d;
struct task *t;
/* Get the ID of this runner. */
if ( ( qid = omp_get_thread_num() ) < nr_threads ) {
/* Main loop. */
while ( 1 ) {
/* Get a task, break if unsucessful. */
if ( ( t = qsched_gettask( &s , qid ) ) == NULL )
break;
/* Get the task's data. */
d = qsched_getdata( &s , t );
/* Decode and execute the task. */
switch ( t->type ) {
case task_type_self:
iact_self( d[0] );
break;
case task_type_pair:
iact_pair( d[0] , d[1] );
break;
case task_type_pair_pc:
iact_pair_pc( d[0] , d[1] );
break;
case task_type_com:
comp_com( d[0] );
break;
default:
error( "Unknown task type." );
}
/* Clean up afterwards. */
qsched_done( &s , t );
} /* main loop. */
} /* valid thread. */
} /* parallel loop. */
/* Dump the tasks. */ /* Dump the tasks. */
for ( k = 0 ; k < s.count ; k++ ) for ( k = 0 ; k < s.count ; k++ )
printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , s.tasks[k].tic , s.tasks[k].toc ); printf( " %i %i %lli %lli\n" , s.tasks[k].type , s.tasks[k].qid , s.tasks[k].tic , s.tasks[k].toc );
/* Clean up. */
qsched_free( &s );
} }
...@@ -740,8 +723,11 @@ int main ( int argc , char *argv[] ) { ...@@ -740,8 +723,11 @@ int main ( int argc , char *argv[] ) {
message( "Computing the N-body problem over %i particles using %i threads." , message( "Computing the N-body problem over %i particles using %i threads." ,
N , nr_threads ); N , nr_threads );
/* Run the test. */
test_bh( N , nr_threads ); test_bh( N , nr_threads );
return 0;
} }
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
******************************************************************************/ ******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Standard includes. */ /* Standard includes. */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -402,6 +405,37 @@ void test_qr ( int m , int n , int nr_threads ) { ...@@ -402,6 +405,37 @@ void test_qr ( int m , int n , int nr_threads ) {
int data[3]; int data[3];
enum task_types { task_DGEQRF , task_DLARFT , task_DTSQRF , task_DSSRFT }; enum task_types { task_DGEQRF , task_DLARFT , task_DTSQRF , task_DSSRFT };
/* Runner function to pass to the scheduler. */
void runner ( int type , void *data ) {
/* Decode the task data. */
int *idata = (int *)data;
int i = idata[0], j = idata[1], k = idata[2];
double buff[ 2*32*32 ];
/* Decode and execute the task. */
switch ( type ) {
case task_DGEQRF:
DGEQRF( &A[ j*m*32*32 + i*32 ] , &tau[ j*m*32 + i*32 ] , 32 , 32 , 32*m , buff );
break;
case task_DLARFT:
DLARFT( &A[ j*m*32*32 + i*32 ] , &A[ i*m*32*32 + i*32 ] , &tau[ i*m*32 + i*32 ] , 32 , 32 , 32*m );
break;
case task_DTSQRF:
DTSQRF( &A[ j*m*32*32 + j*32 ] , &A[ j*m*32*32 + i*32 ] , &tau[ j*m*32 + i*32 ] , 32 , 32 , 32 , 32*m , buff );
break;
case task_DSSRFT:
DSSRFT( &A[ k*m*32 + i*32 ] , &A[ j*m*32*32 + k*32 ] , &A[ j*m*32*32 + i*32 ] , &tau[ k*m*32 + i*32 ] , 32 , 32 , 32*m );
break;
default:
error( "Unknown task type." );
}
}
/* Allocate and fill the original matrix. */ /* Allocate and fill the original matrix. */
if ( ( A = (double *)malloc( sizeof(double) * m * n * 32 * 32 ) ) == NULL || if ( ( A = (double *)malloc( sizeof(double) * m * n * 32 * 32 ) ) == NULL ||
...@@ -423,7 +457,7 @@ void test_qr ( int m , int n , int nr_threads ) { ...@@ -423,7 +457,7 @@ void test_qr ( int m , int n , int nr_threads ) {
printf( "];\n" ); */ printf( "];\n" ); */
/* Initialize the scheduler. */ /* Initialize the scheduler. */
qsched_init( &s , nr_threads , m*n ); qsched_init( &s , nr_threads , qsched_flag_none );
/* Allocate and init the task ID and resource ID matrix. */ /* Allocate and init the task ID and resource ID matrix. */
if ( ( tid = (qsched_task_t *)malloc( sizeof(qsched_task_t) * m * n ) ) == NULL || if ( ( tid = (qsched_task_t *)malloc( sizeof(qsched_task_t) * m * n ) ) == NULL ||
...@@ -439,7 +473,7 @@ void test_qr ( int m , int n , int nr_threads ) { ...@@ -439,7 +473,7 @@ void test_qr ( int m , int n , int nr_threads ) {
/* Add kth corner task. */ /* Add kth corner task. */
data[0] = k; data[1] = k; data[2] = k; data[0] = k; data[1] = k; data[2] = k;
tid_new = qsched_newtask( &s , task_DGEQRF , task_flag_none , data , sizeof(int)*3 , 2 ); tid_new = qsched_addtask( &s , task_DGEQRF , task_flag_none , data , sizeof(int)*3 , 2 );
qsched_addlock( &s , tid_new , rid[ k*m + k ] ); qsched_addlock( &s , tid_new , rid[ k*m + k ] );
if ( tid[ k*m + k ] != -1 ) if ( tid[ k*m + k ] != -1 )
qsched_addunlock( &s , tid[ k*m + k ] , tid_new ); qsched_addunlock( &s , tid[ k*m + k ] , tid_new );
...@@ -448,7 +482,7 @@ void test_qr ( int m , int n , int nr_threads ) { ...@@ -448,7 +482,7 @@ void test_qr ( int m , int n , int nr_threads ) {
/* Add column tasks on kth row. */ /* Add column tasks on kth row. */
for ( j = k+1 ; j < n ; j++ ) { for ( j = k+1 ; j < n ; j++ ) {
data[0] = k; data[1] = j; data[2] = k; data[0] = k; data[1] = j; data[2] = k;
tid_new = qsched_newtask( &s , task_DLARFT , task_flag_none , data , sizeof(int)*3 , 3 ); tid_new = qsched_addtask( &s , task_DLARFT , task_flag_none , data , sizeof(int)*3 , 3 );
qsched_addlock( &s , tid_new , rid[ j*m + k ] ); qsched_addlock( &s , tid_new , rid[ j*m + k ] );
qsched_adduse( &s , tid_new , rid[ k*m + k ] ); qsched_adduse( &s , tid_new , rid[ k*m + k ] );
qsched_addunlock( &s , tid[ k*m + k ] , tid_new ); qsched_addunlock( &s , tid[ k*m + k ] , tid_new );
...@@ -462,7 +496,7 @@ void test_qr ( int m , int n , int nr_threads ) { ...@@ -462,7 +496,7 @@ void test_qr ( int m , int n , int nr_threads ) {
/* Add the row taks for the kth column. */ /* Add the row taks for the kth column. */
data[0] = i; data[1] = k; data[2] = k; data[0] = i; data[1] = k; data[2] = k;
tid_new = qsched_newtask( &s , task_DTSQRF , task_flag_none , data , sizeof(int)*3 , 3 ); tid_new = qsched_addtask( &s , task_DTSQRF , task_flag_none , data , sizeof(int)*3 , 3 );
qsched_addlock( &s , tid_new , rid[ k*m + i ] ); qsched_addlock( &s , tid_new , rid[ k*m + i ] );
qsched_adduse( &s , tid_new , rid[ k*m + k ] ); qsched_adduse( &s , tid_new , rid[ k*m + k ] );
qsched_addunlock( &s , tid[ k*m + (i-1) ] , tid_new ); qsched_addunlock( &s , tid[ k*m + (i-1) ] , tid_new );
...@@ -473,7 +507,7 @@ void test_qr ( int m , int n , int nr_threads ) { ...@@ -473,7 +507,7 @@ void test_qr ( int m , int n , int nr_threads ) {
/* Add the inner tasks. */ /* Add the inner tasks. */
for ( j = k+1 ; j < n ; j++ ) { for ( j = k+1 ; j < n ; j++ ) {
data[0] = i; data[1] = j; data[2] = k; data[0] = i; data[1] = j; data[2] = k;
tid_new = qsched_newtask( &s , task_DSSRFT , task_flag_none , data , sizeof(int)*3 , 5 ); tid_new = qsched_addtask( &s , task_DSSRFT , task_flag_none , data , sizeof(int)*3 , 5 );
qsched_addlock( &s , tid_new , rid[ j*m + i ] ); qsched_addlock( &s , tid_new , rid[ j*m + i ] );
qsched_adduse( &s , tid_new , rid[ k*m + i ] ); qsched_adduse( &s , tid_new , rid[ k*m + i ] );
qsched_adduse( &s , tid_new , rid[ j*m + k ] ); qsched_adduse( &s , tid_new , rid[ j*m + k ] );
...@@ -488,57 +522,9 @@ void test_qr ( int m , int n , int nr_threads ) { ...@@ -488,57 +522,9 @@ void test_qr ( int m , int n , int nr_threads ) {
} /* build the tasks. */ } /* build the tasks. */
/* Prepare the scheduler. */ /* Execute the the tasks. */
qsched_prepare( &s ); qsched_run( &s , nr_threads , runner );
/* Parallel loop. */
#pragma omp parallel
{
int *d, qid;
double buff[ 2*32*32 ];
struct task *t;
/* Get the ID of this runner. */
if ( ( qid = omp_get_thread_num() ) < nr_threads ) {
/* Main loop. */
while ( 1 ) {
/* Get a task, break if unsucessful. */
if ( ( t = qsched_gettask( &s , qid ) ) == NULL )
break;
/* Get the task's data. */
d = qsched_getdata( &s , t );
i = d[0]; j = d[1]; k = d[2];
/* Decode and execute the task. */
switch ( t->type ) {
case task_DGEQRF:
DGEQRF( &A[ j*m*32*32 + i*32 ] , &tau[ j*m*32 + i*32 ] , 32 , 32 , 32*m , buff );
break;
case task_DLARFT:
DLARFT( &A[ j*m*32*32 + i*32 ] , &A[ i*m*32*32 + i*32 ] , &tau[ i*m*32 + i*32 ] , 32 , 32 , 32*m );
break;
case task_DTSQRF:
DTSQRF( &A[ j*m*32*32 + j*32 ] , &A[ j*m*32*32 + i*32 ] , &tau[ j*m*32 + i*32 ] , 32 , 32 , 32 , 32*m , buff );
break;
case task_DSSRFT:
DSSRFT( &A[ k*m*32 + i*32 ] , &A[ j*m*32*32 + k*32 ] , &A[ j*m*32*32 + i*32 ] , &tau[ k*m*32 + i*32 ] , 32 , 32 , 32*m );
break;
default:
error( "Unknown task type." );
}
/* Clean up afterwards. */
qsched_done( &s , t );
} /* main loop. */
} /* valid thread. */
} /* parallel loop. */
/* Dump A. */ /* Dump A. */
/* message( "A = [" ); /* message( "A = [" );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment