Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Q
QuickSched
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
QuickSched
Commits
7a0f32e7
Commit
7a0f32e7
authored
12 years ago
by
Pedro Gonnet
Browse files
Options
Downloads
Patches
Plain Diff
example file.
parent
c8f33233
No related branches found
No related tags found
Loading
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
examples/test.c
+300
-0
300 additions, 0 deletions
examples/test.c
with
300 additions
and
0 deletions
examples/test.c
0 → 100644
+
300
−
0
View file @
7a0f32e7
/*******************************************************************************
* This file is part of QuickSched.
* Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Standard includes. */
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
#include
<math.h>
#include
<omp.h>
/* Local includes. */
#include
"quicksched.h"
/* Error macro. */
#define error(s) { fprintf( stderr , "%s:%s():%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); }
/**
* @brief Matrix multiplication kernel.
*/
void
matmul
(
int
m
,
int
n
,
int
k
,
double
*
a
,
int
lda
,
double
*
b
,
int
ldb
,
double
*
c
,
int
ldc
)
{
int
ii
,
jj
,
kk
;
double
acc
;
// printf( "matmul: m=%i, n=%i, k=%i, lda=%i, ldb=%i, ldc=%i.\n" ,
// m , n , k , lda , ldb , ldc ); fflush(stdout);
for
(
ii
=
0
;
ii
<
m
;
ii
++
)
for
(
jj
=
0
;
jj
<
n
;
jj
++
)
{
for
(
acc
=
0
.
0
,
kk
=
0
;
kk
<
k
;
kk
++
)
acc
+=
a
[
ii
+
lda
*
kk
]
*
b
[
kk
+
ldb
*
jj
];
c
[
ii
+
ldc
*
jj
]
+=
acc
;
}
}
/**
* @brief First test: Just tasks, no dependencies or conflicts.
*
* Computes a tiled matrix multiplication of the form
* C_ij = A_i: * B_:j, with k taskx C_ij += A_ik*B_kj.
*/
void
test2
(
int
m
,
int
n
,
int
k
,
int
nr_threads
)
{
int
i
,
j
,
kk
,
qid
,
data
[
3
],
*
d
,
tid
,
rid
;
struct
sched
s
;
struct
task
*
t
;
double
*
a
,
*
b
,
*
c
,
*
res
,
err
=
0
.
0
,
irm
=
1
.
0
/
RAND_MAX
;
ticks
tic_task
,
toc_task
,
tic_ref
,
toc_ref
;
/* Init the sched. */
bzero
(
&
s
,
sizeof
(
struct
sched
)
);
sched_init
(
&
s
,
nr_threads
,
m
*
n
);
/* Allocate the matrices. */
if
(
(
a
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
m
*
k
*
32
*
32
)
)
==
NULL
||
(
b
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
k
*
n
*
32
*
32
)
)
==
NULL
||
(
c
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
m
*
n
*
32
*
32
)
)
==
NULL
||
(
res
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
m
*
n
*
32
*
32
)
)
==
NULL
)
error
(
"Failed to allocate matrices."
);
/* Fill the matrices. */
for
(
i
=
0
;
i
<
m
*
k
*
32
*
32
;
i
++
)
a
[
i
]
=
rand
()
*
irm
;
for
(
i
=
0
;
i
<
k
*
n
*
32
*
32
;
i
++
)
b
[
i
]
=
rand
()
*
irm
;
bzero
(
c
,
sizeof
(
double
)
*
m
*
n
*
32
*
32
);
bzero
(
res
,
sizeof
(
double
)
*
m
*
n
*
32
*
32
);
/* Build a task for each tile of the matrix c. */
for
(
i
=
0
;
i
<
m
;
i
++
)
for
(
j
=
0
;
j
<
n
;
j
++
)
{
rid
=
sched_addres
(
&
s
);
data
[
0
]
=
i
;
data
[
1
]
=
j
;
for
(
kk
=
0
;
kk
<
k
;
kk
++
)
{
data
[
2
]
=
kk
;
tid
=
sched_newtask
(
&
s
,
1
,
0
,
0
,
data
,
3
*
sizeof
(
int
)
);
sched_addlock
(
&
s
,
tid
,
rid
);
}
}
/* Prepare the sched for execution. */
sched_prepare
(
&
s
);
/* Parallel loop. */
tic_task
=
getticks
();
#pragma omp parallel private(t,qid,d)
{
/* Get the ID of this runner. */
if
(
(
qid
=
omp_get_thread_num
()
)
<
nr_threads
)
{
/* Main loop. */
while
(
1
)
{
/* Get a task, break if unsucessful. */
if
(
(
t
=
sched_gettask
(
&
s
,
qid
)
)
==
NULL
)
break
;
/* Decode and execute the task. */
switch
(
t
->
type
)
{
case
1
:
d
=
sched_getdata
(
&
s
,
t
);
// printf( "test2[%02i]: working on block [ %i , %i ] with k=%i, lock[0]=%i.\n" , qid , d[0] , d[1] , d[2] , t->locks[0] ); fflush(stdout);
matmul
(
32
,
32
,
32
,
&
a
[
d
[
2
]
*
32
*
m
*
32
+
d
[
0
]
*
32
]
,
m
*
32
,
&
b
[
k
*
32
*
d
[
1
]
*
32
+
d
[
2
]
*
32
]
,
k
*
32
,
&
c
[
d
[
0
]
*
32
+
m
*
32
*
d
[
1
]
*
32
]
,
m
*
32
);
break
;
default:
error
(
"Unknown task type."
);
}
/* Clean up afterwards. */
sched_done
(
&
s
,
t
);
}
/* main loop. */
}
/* valid queue? */
}
toc_task
=
getticks
();
/* Verify the result. */
tic_ref
=
getticks
();
matmul
(
m
*
32
,
n
*
32
,
k
*
32
,
a
,
m
*
32
,
b
,
k
*
32
,
res
,
m
*
32
);
toc_ref
=
getticks
();
for
(
i
=
0
;
i
<
m
*
n
*
32
*
32
;
i
++
)
err
+=
(
res
[
i
]
-
c
[
i
]
)
*
(
res
[
i
]
-
c
[
i
]
);
printf
(
"test2: Frob. norm of error is %.3e.
\n
"
,
sqrt
(
err
)
);
printf
(
"test2: tasks took %lli ticks.
\n
"
,
toc_task
-
tic_task
);
printf
(
"test2: ref. took %lli ticks.
\n
"
,
toc_ref
-
tic_ref
);
/* Dump the tasks. */
/* for ( k = 0 ; k < s.count ; k++ ) {
d = (int *)&s.data[ s.tasks[k].data ];
printf( " %i %i %i %i %lli %lli\n" , k , s.tasks[k].qid , d[0] , d[1] , s.tasks[k].tic , s.tasks[k].toc );
} */
/* Clean up. */
sched_free
(
&
s
);
free
(
a
);
free
(
b
);
free
(
c
);
free
(
res
);
}
/**
* @brief First test: Just tasks, no dependencies or conflicts.
*
* Computes a tiled matrix multiplication of the form
* C_ij = A_i: * B_:j, with a single task per C_ij.
*/
void
test1
(
int
m
,
int
n
,
int
k
,
int
nr_threads
)
{
int
i
,
j
,
qid
,
data
[
2
],
*
d
,
tid
,
rid
;
struct
sched
s
;
struct
task
*
t
;
double
*
a
,
*
b
,
*
c
,
*
res
,
err
=
0
.
0
,
irm
=
1
.
0
/
RAND_MAX
;
ticks
tic_task
,
toc_task
,
tic_ref
,
toc_ref
;
/* Init the sched. */
bzero
(
&
s
,
sizeof
(
struct
sched
)
);
sched_init
(
&
s
,
nr_threads
,
m
*
n
);
/* Allocate the matrices. */
if
(
(
a
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
m
*
k
*
32
*
32
)
)
==
NULL
||
(
b
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
k
*
n
*
32
*
32
)
)
==
NULL
||
(
c
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
m
*
n
*
32
*
32
)
)
==
NULL
||
(
res
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
m
*
n
*
32
*
32
)
)
==
NULL
)
error
(
"Failed to allocate matrices."
);
/* Fill the matrices. */
for
(
i
=
0
;
i
<
m
*
k
*
32
*
32
;
i
++
)
a
[
i
]
=
rand
()
*
irm
;
for
(
i
=
0
;
i
<
k
*
n
*
32
*
32
;
i
++
)
b
[
i
]
=
rand
()
*
irm
;
bzero
(
c
,
sizeof
(
double
)
*
m
*
n
*
32
*
32
);
bzero
(
res
,
sizeof
(
double
)
*
m
*
n
*
32
*
32
);
/* Build a task for each tile of the matrix c. */
for
(
i
=
0
;
i
<
m
;
i
++
)
for
(
j
=
0
;
j
<
n
;
j
++
)
{
data
[
0
]
=
i
;
data
[
1
]
=
j
;
rid
=
sched_addres
(
&
s
);
tid
=
sched_newtask
(
&
s
,
1
,
0
,
0
,
data
,
2
*
sizeof
(
int
)
);
sched_addlock
(
&
s
,
tid
,
rid
);
}
/* Prepare the sched for execution. */
sched_prepare
(
&
s
);
/* Parallel loop. */
tic_task
=
getticks
();
#pragma omp parallel private(t,qid,d)
{
/* Get the ID of this runner. */
if
(
(
qid
=
omp_get_thread_num
()
)
<
nr_threads
)
{
/* Main loop. */
while
(
1
)
{
/* Get a task, break if unsucessful. */
if
(
(
t
=
sched_gettask
(
&
s
,
qid
)
)
==
NULL
)
break
;
/* Decode and execute the task. */
switch
(
t
->
type
)
{
case
1
:
d
=
sched_getdata
(
&
s
,
t
);
// printf( "test1[%02i]: working on block [ %i , %i ].\n" , qid , d[0] , d[1] ); fflush(stdout);
matmul
(
32
,
32
,
k
*
32
,
&
a
[
d
[
0
]
*
32
]
,
m
*
32
,
&
b
[
k
*
32
*
d
[
1
]
*
32
]
,
k
*
32
,
&
c
[
d
[
0
]
*
32
+
m
*
32
*
d
[
1
]
*
32
]
,
m
*
32
);
break
;
default:
error
(
"Unknown task type."
);
}
/* Clean up afterwards. */
sched_done
(
&
s
,
t
);
}
/* main loop. */
}
/* valid thread. */
}
toc_task
=
getticks
();
/* Verify the result. */
tic_ref
=
getticks
();
matmul
(
m
*
32
,
n
*
32
,
k
*
32
,
a
,
m
*
32
,
b
,
k
*
32
,
res
,
m
*
32
);
toc_ref
=
getticks
();
for
(
i
=
0
;
i
<
m
*
n
*
32
*
32
;
i
++
)
err
+=
(
res
[
i
]
-
c
[
i
]
)
*
(
res
[
i
]
-
c
[
i
]
);
printf
(
"test1: Frob. norm of error is %.3e.
\n
"
,
sqrt
(
err
)
);
printf
(
"test1: tasks took %lli ticks.
\n
"
,
toc_task
-
tic_task
);
printf
(
"test1: ref. took %lli ticks.
\n
"
,
toc_ref
-
tic_ref
);
/* Dump the tasks. */
/* for ( k = 0 ; k < s.count ; k++ ) {
d = (int *)&s.data[ s.tasks[k].data ];
printf( " %i %i %i %i %lli %lli\n" , k , s.tasks[k].qid , d[0] , d[1] , s.tasks[k].tic , s.tasks[k].toc );
} */
/* Clean up. */
sched_free
(
&
s
);
free
(
a
);
free
(
b
);
free
(
c
);
free
(
res
);
}
/**
* @brief Main function.
*/
int
main
(
int
argc
,
char
*
argv
[]
)
{
int
nr_threads
;
int
M
=
4
,
N
=
4
,
K
=
4
;
/* Get the number of threads. */
#pragma omp parallel shared(nr_threads)
{
#pragma omp single
nr_threads
=
omp_get_num_threads
();
}
/* Call the first test. */
test1
(
M
,
N
,
K
,
nr_threads
);
/* Call the second test. */
test2
(
M
,
N
,
K
,
nr_threads
);
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment