From ef54e445cac33e9e3c23c4e846e54fb8ee10be95 Mon Sep 17 00:00:00 2001 From: Aidan Chalk <d74ksy@cosma-e.cosma> Date: Wed, 17 May 2017 16:21:28 +0100 Subject: [PATCH] Fixing mess I made --- examples/test_qr.c | 4 ++-- examples/test_qr_ompss.c | 31 +++++++++++++------------------ 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/examples/test_qr.c b/examples/test_qr.c index f941e41..5a911ff 100644 --- a/examples/test_qr.c +++ b/examples/test_qr.c @@ -585,8 +585,8 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) { sizeof(int) * 3, 5); qsched_addlock(&s, tid_new, rid[j * m + i]); qsched_adduse(&s, tid_new, rid[k * m + i]); - qsched_adduse(&s, tid_new, rid[j * m + k]); - // qsched_addunlock(&s, tid[k * m + i], tid_new); + qsched_addlock(&s, tid_new, rid[j * m + k]); + qsched_addunlock(&s, tid[k * m + i], tid_new); qsched_addunlock(&s, tid[j * m + i - 1], tid_new); if (tid[j * m + i] != -1) qsched_addunlock(&s, tid[j * m + i], tid_new); diff --git a/examples/test_qr_ompss.c b/examples/test_qr_ompss.c index 475e892..e3693c4 100644 --- a/examples/test_qr_ompss.c +++ b/examples/test_qr_ompss.c @@ -62,7 +62,7 @@ int nr_timers = 0; double* columnToTile(double* columnMatrix, int size, int m, int n, int tilesize) { double* TileMatrix; - TileMatrix = malloc(sizeof(double) * size); + TileMatrix = (double *)malloc(sizeof(double) * size); if (TileMatrix == NULL) error("failed to allocate TileMatrix"); int i, j, k, l; @@ -130,7 +130,8 @@ double* tileToColumn(double* tileMatrix, int size, int m, int n, int tilesize) { * * */ -#pragma omp task inout(cornerTile[0]) out(tauMatrix[0]) +#pragma omp task inout(cornerTile[0]) +// out(tauMatrix[0]) void DGEQRF(double* restrict cornerTile, int tileSize, double* restrict tauMatrix, int k, int tauNum) { int i, j, n; @@ -215,7 +216,8 @@ void DGEQRF(double* restrict cornerTile, int tileSize, * * */ -#pragma omp task in(cornerTile[0]) inout(rowTile[0]) in(tauMatrix[0]) +#pragma omp task in(cornerTile[0]) inout(rowTile[0]) +// in(tauMatrix[0]) void DLARFT(double* restrict cornerTile, double* restrict rowTile, int tileSize, int jj, int kk, double* restrict tauMatrix, int tauNum) { int i, j, n; @@ -269,7 +271,8 @@ void DLARFT(double* restrict cornerTile, double* restrict rowTile, int tileSize, * * */ -#pragma omp task inout(cornerTile[0]) inout(columnTile[0]) out(tauMatrix[0]) +#pragma omp task inout(cornerTile[0]) inout(columnTile[0]) +// out(tauMatrix[0]) void DTSQRF(double* restrict cornerTile, double* restrict columnTile, int tilesize, int ii, int kk, double* restrict tauMatrix, int tauNum) { @@ -362,7 +365,8 @@ void DTSQRF(double* restrict cornerTile, double* restrict columnTile, * * */ -#pragma omp task inout(cornerTile[0]) in(columnTile[0]) inout(rowTile[0]) in(tauMatrix[0]) +#pragma omp task inout(cornerTile[0]) in(columnTile[0]) inout(rowTile[0]) +// in(tauMatrix[0]) void DSSRFT(double* restrict cornerTile, double* restrict columnTile, double* restrict rowTile, int tilesize, int ii, int jj, int kk, double* restrict tauMatrix, int tauNum) { @@ -449,12 +453,10 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { for (k = 0; k < m && k < n; k++) { /* Add kth corner task. */ - // #pragma omp task inout( tid[ k*m + k ] ) DGEQRF(&A[(k * m + k) * K * K], K, tau, k, m); /* Add column tasks on kth row. */ for (j = k + 1; j < n; j++) { - // #pragma omp task inout( tid[ j*m + k ] ) in( tid[ k*m + k ] ) DLARFT(&A[(k * m + k) * K * K], &A[(j * m + k) * K * K], K, j, k, tau, m); } @@ -463,14 +465,11 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { for (i = k + 1; i < m; i++) { /* Add the row taks for the kth column. */ - // #pragma omp task inout( tid[ k*m + i ] ) in( tid[ k*m + k ] ) DTSQRF(&A[(k * m + k) * K * K], &A[(k * m + i) * K * K], K, i, k, tau, m); /* Add the inner tasks. */ for (j = k + 1; j < n; j++) { - // #pragma omp task inout( tid[ j*m + i ] ) in( tid[ k*m + i ] , tid[ - // j*m + k ] ) DSSRFT(&A[(j * m + i) * K * K], &A[(k * m + i) * K * K], &A[(j * m + k) * K * K], K, i, j, k, tau, m); } @@ -491,7 +490,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { /* Dump the tasks. */ /* for ( k = 0 ; k < nr_timers ; k++ ) printf( "%i %i %lli %lli\n" , timers[k].threadID , timers[k].type , - timers[k].tic , timers[k].toc ); */ + timers[k].tic , timers[k].toc ); */ } /** @@ -500,15 +499,9 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { int main(int argc, char* argv[]) { - int c, nr_threads; + int c, nr_threads = 1; int M = 4, N = 4, runs = 1, K = 32; -/* Get the number of threads. */ -#pragma omp parallel shared(nr_threads) - { - if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads(); - } - /* Parse the options */ while ((c = getopt(argc, argv, "m:n:k:r:t:")) != -1) switch (c) { case 'm': @@ -548,4 +541,6 @@ int main(int argc, char* argv[]) { error("Failed to allocate timers."); test_qr(M, N, K, nr_threads, runs); + abort(); + return 0; } -- GitLab