Skip to content
Snippets Groups Projects
Select Git revision
  • 80d1e8b6508b741f03b4d8d29cefd105f436b770
  • master default protected
  • MPI_qsched
  • resource_reuse
  • task_timers
  • openMP_locks
  • fortran_bindings
  • fortran_with_timers
  • paper_updates
  • aidan
  • ompss_fixes
  • paper_fixes
  • FMM
  • autotools-update
  • paper_simulated_memcpy
  • gpu_friendly_bh
  • unbiased_gravity
  • test_bh_2
  • cleanup
  • merge_from_svn
20 results

test.c

Blame
  • test.c 10.31 KiB
    /*******************************************************************************
     * This file is part of QuickSched.
     * Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
     * 
     * This program is free software: you can redistribute it and/or modify
     * it under the terms of the GNU Lesser General Public License as published
     * by the Free Software Foundation, either version 3 of the License, or
     * (at your option) any later version.
     * 
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     * 
     * You should have received a copy of the GNU Lesser General Public License
     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
     * 
     ******************************************************************************/
    
    
    /* Config parameters. */
    #include "../config.h"
    
    /* Standard includes. */
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <unistd.h>
    #include <math.h>
    #include <omp.h>
    
    /* Local includes. */
    #include "quicksched.h"
    
    
    /**
     * @brief Matrix multiplication kernel.
     */
     
    void matmul ( int m , int n , int k , double *a , int lda , double *b , int ldb , double *c , int ldc ) {
    
        int ii, jj, kk;
        double acc;
        
        // message( "matmul: m=%i, n=%i, k=%i, lda=%i, ldb=%i, ldc=%i." ,
        //     m , n , k , lda , ldb , ldc ); fflush(stdout);
        
        for ( ii = 0 ; ii < m ; ii++ )
            for ( jj = 0 ; jj < n ; jj++ ) {
                for ( acc = 0.0, kk = 0 ; kk < k ; kk++ )
                    acc += a[ ii + lda*kk ] * b[ kk + ldb*jj ];
                c[ ii + ldc*jj ] += acc;
                }
    
        }
        
        
    /**
     * @brief First test: Just tasks, no dependencies or conflicts.
     *
     * Computes a tiled matrix multiplication of the form
     * C_ij = A_i: * B_:j, with tasks C_ij += A_ik*B_kj.
     */
     
    void test2 ( int m , int n , int k , int nr_threads ) { 
    
        int i, j, kk, data[3];
        qsched_task_t tid;
        qsched_res_t rid;
        struct qsched s;
        double *a, *b, *c, *res, err = 0.0, irm = 1.0/RAND_MAX;
        ticks tic_task, toc_task, tic_ref, toc_ref;
        
        
        /* Runner function to pass to the scheduler. */
        void runner ( int type , void *data ) {
        
            /* Decode the task data. */
            int *d = (int *)data;
            
            /* Decode and execute the task. */
            switch ( type ) {
                case 1:
                    // message( "thread %i working on block [ %i , %i ] with k=%i, lock[0]=%i." , qid , d[0] , d[1] , d[2] , t->locks[0] ); fflush(stdout);
                    matmul( 32 , 32 , 32 , &a[ d[2]*32*m*32 + d[0]*32 ] , m*32 , &b[ k*32*d[1]*32 + d[2]*32 ] , k*32 , &c[ d[0]*32 + m*32*d[1]*32 ] , m*32 );
                    break;
                default:
                    error( "Unknown task type." );
                }
    
            }
            
        
        
        /* Tell the user something about the test. */
        message( "computing a tiled matrix multiplication of the form "
                 "C_ij = A_i: * B_:j, with tasks for each k where C_ij += A_ik*B_kj." );
        
        /* Init the sched. */
        bzero( &s , sizeof(struct qsched) );
        qsched_init( &s , nr_threads, qsched_flag_none );
    
        /* Allocate the matrices. */
        if ( ( a = (double *)malloc( sizeof(double) * m * k * 32 * 32 ) ) == NULL ||
             ( b = (double *)malloc( sizeof(double) * k * n * 32 * 32 ) ) == NULL ||
             ( c = (double *)malloc( sizeof(double) * m * n * 32 * 32 ) ) == NULL ||
             ( res = (double *)malloc( sizeof(double) * m * n * 32 * 32 ) ) == NULL )
            error( "Failed to allocate matrices." );
            
        /* Fill the matrices. */
        for ( i = 0 ; i < m * k * 32 * 32 ; i++ )
            a[i] = rand() * irm;
        for ( i = 0 ; i < k * n * 32 * 32 ; i++ )
            b[i] = rand() * irm;
        bzero( c , sizeof(double) * m * n * 32 * 32 );
        bzero( res , sizeof(double) * m * n * 32 * 32 );
        
        /* Build a task for each tile of the matrix c. */
        for ( i = 0 ; i < m ; i++ )
            for ( j = 0 ; j < n ; j++ ) {
                rid = qsched_addres( &s , qsched_owner_none , qsched_res_none );
                data[0] = i; data[1] = j;
                for ( kk = 0 ; kk < k ; kk++ ) {
                    data[2] = kk;
                    tid = qsched_addtask( &s , 1 , task_flag_none , data , 3*sizeof(int) , 1 );
                    qsched_addlock( &s , tid , rid );
                    }
                }
                
    
        /* Run the scheduler. */
        tic_task = getticks();
        qsched_run( &s , nr_threads , runner );
        toc_task = getticks();
                
    
        /* Verify the result. */
        tic_ref = getticks();
        matmul( m*32 , n*32 , k*32 , a , m*32 , b , k*32 , res , m*32 );
        toc_ref = getticks();
        for ( i = 0 ; i < m * n * 32 * 32 ; i++ )
            err += ( res[i] - c[i] ) * ( res[i] - c[i] );
        message( "Frob. norm of error is %.3e." , sqrt( err ) );
        message( "tasks took %lli ticks." , toc_task - tic_task );
        message( "ref.  took %lli ticks." , toc_ref - tic_ref );
        
        /* Dump the tasks. */
        /* for ( k = 0 ; k < s.count ; k++ ) {
            d = (int *)&s.data[ s.tasks[k].data ];
            printf( " %i %i %i %i %lli %lli\n" , k , s.tasks[k].qid , d[0] , d[1] , s.tasks[k].tic , s.tasks[k].toc );
            } */
        
        /* Clean up. */
        qsched_free( &s );
        free( a );
        free( b );
        free( c );
        free( res );
        
        }
        
        
    /**
     * @brief First test: Just tasks, no dependencies or conflicts.
     *
     * Computes a tiled matrix multiplication of the form
     * C_ij = A_i: * B_:j, with a single task per C_ij.
     */
     
    void test1 ( int m , int n , int k , int nr_threads ) { 
    
        int i, j, data[2];
        qsched_task_t tid;
        qsched_res_t rid;
        struct qsched s;
        double *a, *b, *c, *res, err = 0.0, irm = 1.0/RAND_MAX;
        ticks tic_task, toc_task, tic_ref, toc_ref;
        
        
        /* Runner function to pass to the scheduler. */
        void runner ( int type , void *data ) {
        
            /* Decode the task data. */
            int *d = (int *)data;
            
            /* Decode and execute the task. */
            switch ( type ) {
                case 1:
                    matmul( 32 , 32 , k*32 , &a[ d[0]*32 ] , m*32 , &b[ k*32*d[1]*32 ] , k*32 , &c[ d[0]*32 + m*32*d[1]*32 ] , m*32 );
                    break;
                default:
                    error( "Unknown task type." );
                }
    
            }
            
        
        /* Tell the user something about the test. */
        message( "computing a tiled matrix multiplication of the form "
                 "C_ij = A_i: * B_:j, with a single task per C_ij." );
        
        /* Init the sched. */
        bzero( &s , sizeof(struct qsched) );
        qsched_init( &s , nr_threads , qsched_flag_none );
    
        /* Allocate the matrices. */
        if ( ( a = (double *)malloc( sizeof(double) * m * k * 32 * 32 ) ) == NULL ||
             ( b = (double *)malloc( sizeof(double) * k * n * 32 * 32 ) ) == NULL ||
             ( c = (double *)malloc( sizeof(double) * m * n * 32 * 32 ) ) == NULL ||
             ( res = (double *)malloc( sizeof(double) * m * n * 32 * 32 ) ) == NULL )
            error( "Failed to allocate matrices." );
            
        /* Fill the matrices. */
        for ( i = 0 ; i < m * k * 32 * 32 ; i++ )
            a[i] = rand() * irm;
        for ( i = 0 ; i < k * n * 32 * 32 ; i++ )
            b[i] = rand() * irm;
        bzero( c , sizeof(double) * m * n * 32 * 32 );
        bzero( res , sizeof(double) * m * n * 32 * 32 );
        
        /* Build a task for each tile of the matrix c. */
        for ( i = 0 ; i < m ; i++ )
            for ( j = 0 ; j < n ; j++ ) {
                data[0] = i; data[1] = j;
                rid = qsched_addres( &s , qsched_owner_none , qsched_res_none );
                tid = qsched_addtask( &s , 1 , task_flag_none , data , 2*sizeof(int) , 1 );
                qsched_addlock( &s , tid , rid );
                }
                
                
        /* Run the scheduler. */
        tic_task = getticks();
        qsched_run( &s , nr_threads , runner );
        toc_task = getticks();
                
    
        /* Verify the result. */
        tic_ref = getticks();
        matmul( m*32 , n*32 , k*32 , a , m*32 , b , k*32 , res , m*32 );
        toc_ref = getticks();
        for ( i = 0 ; i < m * n * 32 * 32 ; i++ )
            err += ( res[i] - c[i] ) * ( res[i] - c[i] );
        message( "Frob. norm of error is %.3e." , sqrt( err ) );
        message( "tasks took %lli ticks." , toc_task - tic_task );
        message( "ref.  took %lli ticks." , toc_ref - tic_ref );
        
        /* Dump the tasks. */
        /* for ( k = 0 ; k < s.count ; k++ ) {
            d = (int *)&s.data[ s.tasks[k].data ];
            printf( " %i %i %i %i %lli %lli\n" , k , s.tasks[k].qid , d[0] , d[1] , s.tasks[k].tic , s.tasks[k].toc );
            } */
        
        /* Clean up. */
        qsched_free( &s );
        free( a );
        free( b );
        free( c );
        free( res );
        
        }
        
        
    /**
     * @brief Main function.
     */
     
    int main ( int argc , char *argv[] ) {
    
        int c, nr_threads;
        int M = 4, N = 4, K = 4;
        
        /* Get the number of threads. */
        #pragma omp parallel shared(nr_threads)
        {
            if ( omp_get_thread_num() == 0 )
                nr_threads = omp_get_num_threads();
        }
        
        /* Parse the options */
        while ( ( c = getopt( argc , argv  , "m:n:k:t:" ) ) != -1 )
            switch( c ) {
    	        case 'k':
    	            if ( sscanf( optarg , "%d" , &K ) != 1 )
    	                error( "Error parsing dimension M." );
    	            break;
    	        case 'm':
    	            if ( sscanf( optarg , "%d" , &M ) != 1 )
    	                error( "Error parsing dimension M." );
    	            break;
    	        case 'n':
    	            if ( sscanf( optarg , "%d" , &N ) != 1 )
    	                error( "Error parsing dimension M." );
    	            break;
    	        case 't':
    	            if ( sscanf( optarg , "%d" , &nr_threads ) != 1 )
    	                error( "Error parsing number of threads." );
    	            omp_set_num_threads( nr_threads );
    	            break;
    	        case '?':
                    fprintf( stderr , "Usage: %s [-t nr_threads] [-m M] [-n N] [-k K]\n" , argv[0] );
                    fprintf( stderr , "Computes tests with nr_threads threads for the multiplication\n"
                                      "of a matrix of size MxK and of size KxN tiles of size 32x32.\n" );
    	            exit( EXIT_FAILURE );
    	        }
                
        /* Dump arguments. */
        message( "multiplying two matrices of size %ix%i and %ix%i using %i threads." ,
            32*M , 32*K , 32*K , 32*N , nr_threads );
        
        /* Call the first test. */
        test1( M , N , K , nr_threads );
    
        /* Call the second test. */
        test2( M , N , K , nr_threads );
    
        }