diff --git a/examples/Makefile.am b/examples/Makefile.am index 2054d83875d03916ef0245f0a86de63815da2dca..be88cc27d0485962b88be2dec2593928e9a6f699 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -29,7 +29,7 @@ MPI_THREAD_LIBS = @MPI_THREAD_LIBS@ MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) # Set-up the library -bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted test_bh_mpi test_qr_mpi test_qr +bin_PROGRAMS = test test_bh test_bh_sorted test_fmm_sorted test_bh_mpi test_qr_mpi test_qr test_matadd_mpi if HAVECBLAS bin_PROGRAMS += test_qr_mpi_cblas endif @@ -74,3 +74,8 @@ test_qr_mpi_cblas_SOURCES = test_qr_mpi.c test_qr_mpi_cblas_CFLAGS = $(AM_CFLAGS) -DWITH_MPI -DWITH_CBLAS_LIB test_qr_mpi_cblas_LDADD = ../src/.libs/libquickschedMPI.a -llapacke -llapacke -lblas -lcblas $(METIS_LIBS) test_qr_mpi_cblas_LDFLAGS = $(MPI_THREAD_LIBS) + +test_matadd_mpi_SOURCES = test_matadd_mpi.c +test_matadd_mpi_CFLAGS = $(AM_CFLAGS) -DWITH_MPI +test_madadd_mpi_LDADD = ../src/.libs/libquickschedMPI.a $(METIS_LIBS) +test_matadd_mpi_LDFLAGS = $(MPI_THREAD_LIBS) diff --git a/examples/test_matadd_mpi.c b/examples/test_matadd_mpi.c new file mode 100644 index 0000000000000000000000000000000000000000..f7f8e707cd9e417d1ceffe9490887b74dd00587a --- /dev/null +++ b/examples/test_matadd_mpi.c @@ -0,0 +1,199 @@ +/******************************************************************************* + * This file is part of QuickSched. + * Coypright (c) 2018 Aidan Chalk (aidan.chalk@stfc.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * +* *****************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Standard includes. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <math.h> +#include <float.h> +#include <limits.h> +#include <omp.h> +#include <fenv.h> +#include <mpi.h> + +#define NO_TASK_TIMERS +#define NO_LOAD_BALANCE_EXACT + +/* Local includes. */ +#include "quicksched.h" +#include "res.h" + + +#define TILESIZE 64 + +/** + * @brief Computes the matrix addition of two tiles, C=A+B. + * + * @param A Pointer to the tile of A + * @param B Pointer to the tile of B + * @param C Pointer to the tile of the output matrix, C + * +*/ +void addMatrixTile(double *A, double *B, double *C){ + + for(int i = 0; i < TILESIZE*TILESIZE; i++){ + C[i] = A[i] + B[i]; // This is wrong + } + +} + + + +//Matrix is matsize*matsize tiles of TILESIZE*TILESIZE +void test_matadd(int matsize,int nr_threads`){ + + + struct qsched s; + const int add_task = 206; + // Initialize the MPI environment + int MpiThreadLevel; + double *local_A, *local_B, *local_C; + MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &MpiThreadLevel); + + if(MpiThreadLevel != MPI_THREAD_MULTIPLE) + error("We didn't get thread multiple!"); + + qsched_init(&s, nr_threads, qsched_flag_yield | qsched_flag_pthread, MPI_COMM_WORLD); + + srand(s.rank); + + int startRow; + int numRows; + numRows = matsize / s.count_ranks; + if(numRows*s.count_ranks != matsize){ + numRows = numRows + 1; + startRow = s.rank * numRows; + if(startRow + numRows > matsize){ + numRows = matsize-startRow; + } + } + local_A = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE); + local_B = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE); + local_C = malloc(sizeof(double) * numRows * matsize * TILESIZE*TILESIZE); + if(local_A == NULL || local_B == NULL || local_C == NULL) error("Failed to allocate arrays"); + for(int i = 0; i < numRows*matsize*TILESIZE*TILESIZE){ + local_A = 2.0 * ((double)rand()) / RAND_MAX - 1.0; + local_B = 2.0 * ((double)rand()) / RAND_MAX - 1.0; + } + bzero(local_C,sizeof(double)*numRows*matsize*TILESIZE*TILESIZE); + + //Initialise resources. + qsched_res_t *local_A_rid = NULL, *local_B_rid = NULL, *local_C_rid = NULL; + double **local_A_tiles, **local_B_tiles, **local_C_tiles; + + local_A_rid = malloc(sizeof(qsched_res_t) * numRows*matsize); + local_B_rid = malloc(sizeof(qsched_res_t) * numRows*matsize); + local_C_rid = malloc(sizeof(qsched_res_t) * numRows*matsize); + local_A_tiles = malloc(sizeof(double*) * numRows*matsize); + local_B_tiles = malloc(sizeof(double*) * numRows*matsize); + local_C_tiles = malloc(sizeof(double*) * numRows*matsize); + + for(int i = 0; i < numRows*matsize; i++){ + local_A_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_A_tiles[i]); + memcpy(local_A_tiles[i], local_A[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); + local_B_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_B_tiles[i]); + memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); + local_C_rid[i] = qsched_addres(&s, qsched_owner_none, sizeof(double) * TILESIZE * TILESIZE, (void**) &local_C_tiles[i]); + memcpy(local_B_tiles[i], local_B[i*TILESIZE*TILESIZE],sizeof(double)*TILESIZE*TILESIZE); + } + message("Synchronizing resources."); + qsched_sync_resources(&s); + + //Generate tasks + qsched_task_t /* *local_tid = NULL,*/ tid_new = -1; + long long int MPI_data[3]; +// local_tid = malloc(sizeof(qsched_task_t)*numRows*matsize); + for(int i = 0; i < numRows*matsize; i++){ + MPI_Data[0] = local_A_rid[i]; + MPI_Data[1] = local_B_rid[i]; + MPI_Data[2] = local_C_rid[i]; + tid_new = qsched_addtask(&s, add_task, task_flag_none, MPI_data, + sizeof(long long int) * 3, 200); + qsched_addlock(&s, tid_new, local_C_rid[i]); + qsched_adduse(&s, tid_new, local_A_rid[i]); + qsched_adduse(&s, tid_new, local_B_rid[i]); + } + + /* Runner function to pass to the scheduler. */ + void runner(struct qsched *s, int type, void * data) { + /* Decode the task data. */ + long long int* idata = (long long int*)data; +// int i = idata[0], j = idata[1], k = idata[2]; + long long int a, b, c; + /* Need to pull the resources.*/ + + /* Decode and execute the task. */ + a = idata[0]; + b = idata[1]; + c = idata[2]; + double *A = (double*)qsched_getresdata(s, a); + double *B = (double*)qsched_getresdata(s, b); + double *C = (double*)qsched_getresdata(s, c); + addMatrixTile(A,B,C); + } + qsched_run_MPI(&s, nr_threads, runner); + // Print off a hello world message + printf("Hello world from processor rank = %i, count_ranks = %i\n", + s.rank, s.count_ranks); +} + + +int main(int argc, char* argv[]) { + + int c, nr_threads=1; + int M = 4, runs = 1; + +/* Get the number of threads. */ +//#pragma omp parallel shared(nr_threads) + //{ + // if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads(); + //} + + /* Parse the options */ + while ((c = getopt(argc, argv, "m:n:k:r:t:")) != -1) switch (c) { + case 'm': + if (sscanf(optarg, "%d", &M) != 1) error("Error parsing dimension."); + break; + case 'r': + if (sscanf(optarg, "%d", &runs) != 1) + error("Error parsing number of runs."); + break; + case 't': + if (sscanf(optarg, "%d", &nr_threads) != 1) + error("Error parsing number of threads."); + break; + case '?': + fprintf(stderr, "Usage: %s [-t nr_threads] [-m M]\n", + argv[0]); + fprintf(stderr, "Computes the addition of a pair of MxM tiled\n" + "matrix using nr_threads threads.\n"); + exit(EXIT_FAILURE); + } + + /* Dump arguments. */ + message("Computing the sum of two %ix%i tiled matrices using %i threads.\n",M,M,nr_threads); + + test_matadd(M,nr_threads); +} +