Skip to content
Snippets Groups Projects
Commit ef54e445 authored by Aidan Chalk's avatar Aidan Chalk
Browse files

Fixing mess I made

parent 13a409be
No related branches found
No related tags found
1 merge request!6Resource reuse
...@@ -585,8 +585,8 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) { ...@@ -585,8 +585,8 @@ void test_qr(int m, int n, int K, int nr_threads, int runs, double* matrix) {
sizeof(int) * 3, 5); sizeof(int) * 3, 5);
qsched_addlock(&s, tid_new, rid[j * m + i]); qsched_addlock(&s, tid_new, rid[j * m + i]);
qsched_adduse(&s, tid_new, rid[k * m + i]); qsched_adduse(&s, tid_new, rid[k * m + i]);
qsched_adduse(&s, tid_new, rid[j * m + k]); qsched_addlock(&s, tid_new, rid[j * m + k]);
// qsched_addunlock(&s, tid[k * m + i], tid_new); qsched_addunlock(&s, tid[k * m + i], tid_new);
qsched_addunlock(&s, tid[j * m + i - 1], tid_new); qsched_addunlock(&s, tid[j * m + i - 1], tid_new);
if (tid[j * m + i] != -1) qsched_addunlock(&s, tid[j * m + i], tid_new); if (tid[j * m + i] != -1) qsched_addunlock(&s, tid[j * m + i], tid_new);
......
...@@ -62,7 +62,7 @@ int nr_timers = 0; ...@@ -62,7 +62,7 @@ int nr_timers = 0;
double* columnToTile(double* columnMatrix, int size, int m, int n, double* columnToTile(double* columnMatrix, int size, int m, int n,
int tilesize) { int tilesize) {
double* TileMatrix; double* TileMatrix;
TileMatrix = malloc(sizeof(double) * size); TileMatrix = (double *)malloc(sizeof(double) * size);
if (TileMatrix == NULL) error("failed to allocate TileMatrix"); if (TileMatrix == NULL) error("failed to allocate TileMatrix");
int i, j, k, l; int i, j, k, l;
...@@ -130,7 +130,8 @@ double* tileToColumn(double* tileMatrix, int size, int m, int n, int tilesize) { ...@@ -130,7 +130,8 @@ double* tileToColumn(double* tileMatrix, int size, int m, int n, int tilesize) {
* *
* *
*/ */
#pragma omp task inout(cornerTile[0]) out(tauMatrix[0]) #pragma omp task inout(cornerTile[0])
// out(tauMatrix[0])
void DGEQRF(double* restrict cornerTile, int tileSize, void DGEQRF(double* restrict cornerTile, int tileSize,
double* restrict tauMatrix, int k, int tauNum) { double* restrict tauMatrix, int k, int tauNum) {
int i, j, n; int i, j, n;
...@@ -215,7 +216,8 @@ void DGEQRF(double* restrict cornerTile, int tileSize, ...@@ -215,7 +216,8 @@ void DGEQRF(double* restrict cornerTile, int tileSize,
* *
* *
*/ */
#pragma omp task in(cornerTile[0]) inout(rowTile[0]) in(tauMatrix[0]) #pragma omp task in(cornerTile[0]) inout(rowTile[0])
// in(tauMatrix[0])
void DLARFT(double* restrict cornerTile, double* restrict rowTile, int tileSize, void DLARFT(double* restrict cornerTile, double* restrict rowTile, int tileSize,
int jj, int kk, double* restrict tauMatrix, int tauNum) { int jj, int kk, double* restrict tauMatrix, int tauNum) {
int i, j, n; int i, j, n;
...@@ -269,7 +271,8 @@ void DLARFT(double* restrict cornerTile, double* restrict rowTile, int tileSize, ...@@ -269,7 +271,8 @@ void DLARFT(double* restrict cornerTile, double* restrict rowTile, int tileSize,
* *
* *
*/ */
#pragma omp task inout(cornerTile[0]) inout(columnTile[0]) out(tauMatrix[0]) #pragma omp task inout(cornerTile[0]) inout(columnTile[0])
// out(tauMatrix[0])
void DTSQRF(double* restrict cornerTile, double* restrict columnTile, void DTSQRF(double* restrict cornerTile, double* restrict columnTile,
int tilesize, int ii, int kk, double* restrict tauMatrix, int tilesize, int ii, int kk, double* restrict tauMatrix,
int tauNum) { int tauNum) {
...@@ -362,7 +365,8 @@ void DTSQRF(double* restrict cornerTile, double* restrict columnTile, ...@@ -362,7 +365,8 @@ void DTSQRF(double* restrict cornerTile, double* restrict columnTile,
* *
* *
*/ */
#pragma omp task inout(cornerTile[0]) in(columnTile[0]) inout(rowTile[0]) in(tauMatrix[0]) #pragma omp task inout(cornerTile[0]) in(columnTile[0]) inout(rowTile[0])
// in(tauMatrix[0])
void DSSRFT(double* restrict cornerTile, double* restrict columnTile, void DSSRFT(double* restrict cornerTile, double* restrict columnTile,
double* restrict rowTile, int tilesize, int ii, int jj, int kk, double* restrict rowTile, int tilesize, int ii, int jj, int kk,
double* restrict tauMatrix, int tauNum) { double* restrict tauMatrix, int tauNum) {
...@@ -449,12 +453,10 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { ...@@ -449,12 +453,10 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) {
for (k = 0; k < m && k < n; k++) { for (k = 0; k < m && k < n; k++) {
/* Add kth corner task. */ /* Add kth corner task. */
// #pragma omp task inout( tid[ k*m + k ] )
DGEQRF(&A[(k * m + k) * K * K], K, tau, k, m); DGEQRF(&A[(k * m + k) * K * K], K, tau, k, m);
/* Add column tasks on kth row. */ /* Add column tasks on kth row. */
for (j = k + 1; j < n; j++) { for (j = k + 1; j < n; j++) {
// #pragma omp task inout( tid[ j*m + k ] ) in( tid[ k*m + k ] )
DLARFT(&A[(k * m + k) * K * K], &A[(j * m + k) * K * K], K, j, k, tau, DLARFT(&A[(k * m + k) * K * K], &A[(j * m + k) * K * K], K, j, k, tau,
m); m);
} }
...@@ -463,14 +465,11 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { ...@@ -463,14 +465,11 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) {
for (i = k + 1; i < m; i++) { for (i = k + 1; i < m; i++) {
/* Add the row taks for the kth column. */ /* Add the row taks for the kth column. */
// #pragma omp task inout( tid[ k*m + i ] ) in( tid[ k*m + k ] )
DTSQRF(&A[(k * m + k) * K * K], &A[(k * m + i) * K * K], K, i, k, tau, DTSQRF(&A[(k * m + k) * K * K], &A[(k * m + i) * K * K], K, i, k, tau,
m); m);
/* Add the inner tasks. */ /* Add the inner tasks. */
for (j = k + 1; j < n; j++) { for (j = k + 1; j < n; j++) {
// #pragma omp task inout( tid[ j*m + i ] ) in( tid[ k*m + i ] , tid[
// j*m + k ] )
DSSRFT(&A[(j * m + i) * K * K], &A[(k * m + i) * K * K], DSSRFT(&A[(j * m + i) * K * K], &A[(k * m + i) * K * K],
&A[(j * m + k) * K * K], K, i, j, k, tau, m); &A[(j * m + k) * K * K], K, i, j, k, tau, m);
} }
...@@ -491,7 +490,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { ...@@ -491,7 +490,7 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) {
/* Dump the tasks. */ /* Dump the tasks. */
/* for ( k = 0 ; k < nr_timers ; k++ ) /* for ( k = 0 ; k < nr_timers ; k++ )
printf( "%i %i %lli %lli\n" , timers[k].threadID , timers[k].type , printf( "%i %i %lli %lli\n" , timers[k].threadID , timers[k].type ,
timers[k].tic , timers[k].toc ); */ timers[k].tic , timers[k].toc ); */
} }
/** /**
...@@ -500,15 +499,9 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) { ...@@ -500,15 +499,9 @@ void test_qr(int m, int n, int K, int nr_threads, int runs) {
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
int c, nr_threads; int c, nr_threads = 1;
int M = 4, N = 4, runs = 1, K = 32; int M = 4, N = 4, runs = 1, K = 32;
/* Get the number of threads. */
#pragma omp parallel shared(nr_threads)
{
if (omp_get_thread_num() == 0) nr_threads = omp_get_num_threads();
}
/* Parse the options */ /* Parse the options */
while ((c = getopt(argc, argv, "m:n:k:r:t:")) != -1) switch (c) { while ((c = getopt(argc, argv, "m:n:k:r:t:")) != -1) switch (c) {
case 'm': case 'm':
...@@ -548,4 +541,6 @@ int main(int argc, char* argv[]) { ...@@ -548,4 +541,6 @@ int main(int argc, char* argv[]) {
error("Failed to allocate timers."); error("Failed to allocate timers.");
test_qr(M, N, K, nr_threads, runs); test_qr(M, N, K, nr_threads, runs);
abort();
return 0;
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment