Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
swiftmpistepsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
swiftmpistepsim
Commits
2709a75b
Commit
2709a75b
authored
5 years ago
by
Peter W. Draper
Browse files
Options
Downloads
Patches
Plain Diff
Start a proxy cell exchange simulation
parent
33317da6
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
Makefile
+5
-2
5 additions, 2 deletions
Makefile
swiftmpiproxies.c
+259
-0
259 additions, 0 deletions
swiftmpiproxies.c
testdata/EAGLE_25-mpiuse_report-step436-4ranks-proxies.dat
+96
-0
96 additions, 0 deletions
testdata/EAGLE_25-mpiuse_report-step436-4ranks-proxies.dat
with
360 additions
and
2 deletions
Makefile
+
5
−
2
View file @
2709a75b
CFLAGS
=
-g
-O0
-Wall
CFLAGS
=
-g
-O0
-Wall
all
:
swiftmpistepsim swiftmpiproxies
all
:
swiftmpistepsim
swiftmpistepsim
:
swiftmpistepsim.c mpiuse.c mpiuse.h atomic.h cycle.h clocks.h clocks.c
swiftmpistepsim
:
swiftmpistepsim.c mpiuse.c mpiuse.h atomic.h cycle.h clocks.h clocks.c
$(
CC
)
$(
CFLAGS
)
-o
swiftmpistepsim swiftmpistepsim.c mpiuse.c clocks.c
-I
/usr/include/mpi
-lmpi
-lpthread
$(
CC
)
$(
CFLAGS
)
-o
swiftmpistepsim swiftmpistepsim.c mpiuse.c clocks.c
-I
/usr/include/mpi
-lmpi
-lpthread
swiftmpiproxies
:
swiftmpiproxies.c mpiuse.c mpiuse.h atomic.h cycle.h clocks.h clocks.c
$(
CC
)
$(
CFLAGS
)
-o
swiftmpiproxies swiftmpiproxies.c mpiuse.c clocks.c
-I
/usr/include/mpi
-lmpi
-lpthread
clean
:
clean
:
rm
swiftmpistepsim
rm
swiftmpistepsim
rm
swiftmpiproxies
This diff is collapsed.
Click to expand it.
swiftmpiproxies.c
0 → 100644
+
259
−
0
View file @
2709a75b
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2020 Peter W. Draper
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#include
<limits.h>
#include
<mpi.h>
#include
<pthread.h>
#include
<stdio.h>
#include
<stdlib.h>
#include
<unistd.h>
#include
"atomic.h"
#include
"clocks.h"
#include
"error.h"
#include
"mpiuse.h"
/* Global: Our rank for all to see. */
int
myrank
=
-
1
;
/* Are we verbose. */
static
int
verbose
=
0
;
/* Integer types of send and recv tasks, must match log. */
static
const
int
task_type_send
=
23
;
static
const
int
task_type_recv
=
24
;
static
const
int
task_subtype_count
=
29
;
static
const
int
task_subtype_pcells
=
35
;
/* Proxy tag arithmetic. From proxy.h, must match log. */
#define proxy_tag_shift 8
#define proxy_tag_count 0
#define proxy_tag_cells 6
/* Our queues of communications. Need two to separate out the pcell sends and
* recvs. */
struct
mpiuse_log_entry
**
send_pcells
;
int
nr_send_pcells
=
0
;
struct
mpiuse_log_entry
**
recv_pcells
;
int
nr_recv_pcells
=
0
;
/**
* @brief Pick out the relevant logging data for our rank, i.e. all
* activations of sends and recvs. We ignore the original completion logs,
* those are not relevant.
*/
static
void
pick_logs
(
void
)
{
size_t
nlogs
=
mpiuse_nr_logs
();
/* Duplicate of logs. */
send_pcells
=
(
struct
mpiuse_log_entry
**
)
calloc
(
nlogs
,
sizeof
(
struct
mpiuse_log_entry
*
));
nr_send_pcells
=
0
;
recv_pcells
=
(
struct
mpiuse_log_entry
**
)
calloc
(
nlogs
,
sizeof
(
struct
mpiuse_log_entry
*
));
nr_recv_pcells
=
0
;
for
(
int
k
=
0
;
k
<
nlogs
;
k
++
)
{
struct
mpiuse_log_entry
*
log
=
mpiuse_get_log
(
k
);
if
(
log
->
rank
==
myrank
&&
log
->
activation
)
{
log
->
data
=
NULL
;
if
(
log
->
type
==
task_type_send
)
{
if
(
log
->
subtype
==
task_subtype_pcells
)
{
send_pcells
[
nr_send_pcells
]
=
log
;
nr_send_pcells
++
;
}
else
if
(
log
->
subtype
!=
task_subtype_count
)
{
error
(
"task subtype '%d' is not a known value"
,
log
->
subtype
);
}
}
else
if
(
log
->
type
==
task_type_recv
)
{
if
(
log
->
subtype
==
task_subtype_pcells
)
{
recv_pcells
[
nr_recv_pcells
]
=
log
;
nr_recv_pcells
++
;
}
else
if
(
log
->
subtype
!=
task_subtype_count
)
{
error
(
"task subtype '%d' is not a known value"
,
log
->
subtype
);
}
}
else
{
error
(
"task type '%d' is not a known send or recv task"
,
log
->
type
);
}
}
}
}
/**
* @brief usage help.
*/
static
void
usage
(
char
*
argv
[])
{
fprintf
(
stderr
,
"Usage: %s [-vf] SWIFT_mpiuse-log-file.dat
\n
"
,
argv
[
0
]);
fprintf
(
stderr
,
" options: -v verbose
\n
"
);
fflush
(
stderr
);
}
/**
* @brief main function.
*/
int
main
(
int
argc
,
char
*
argv
[])
{
/* Initiate MPI. */
int
prov
=
0
;
int
res
=
MPI_Init_thread
(
&
argc
,
&
argv
,
MPI_THREAD_MULTIPLE
,
&
prov
);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Call to MPI_Init_thread failed with error %i."
,
res
);
int
nr_nodes
=
0
;
res
=
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
nr_nodes
);
if
(
res
!=
MPI_SUCCESS
)
error
(
"MPI_Comm_size failed with error %i."
,
res
);
res
=
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
myrank
);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Call to MPI_Comm_rank failed with error %i."
,
res
);
/* Handle the command-line, we expect a mpiuse data file to read and various
* options. */
int
opt
;
while
((
opt
=
getopt
(
argc
,
argv
,
"v"
))
!=
-
1
)
{
switch
(
opt
)
{
case
'v'
:
verbose
=
1
;
break
;
default:
if
(
myrank
==
0
)
usage
(
argv
);
return
1
;
}
}
if
(
optind
>=
argc
)
{
if
(
myrank
==
0
)
usage
(
argv
);
return
1
;
}
char
*
infile
=
argv
[
optind
];
/* Start time across the ranks. */
MPI_Barrier
(
MPI_COMM_WORLD
);
clocks_set_cpufreq
(
0
);
/* Now we read the SWIFT MPI logger output that defines the communcations we
* will undertake. Note this has all ranks for a single step, SWIFT outputs
* one MPI log per rank per step, so you need to combine all ranks from a
* step. XXX also extract proxy related communications XXX */
mpiuse_log_restore
(
infile
);
int
nranks
=
mpiuse_nr_ranks
();
/* This should match the expected size. */
if
(
nr_nodes
!=
nranks
)
error
(
"The number of MPI ranks %d does not match the expected value %d"
,
nranks
,
nr_nodes
);
/* Each rank has its own queues of requests, so extract them. */
pick_logs
();
/* And run our version of the proxy exchanges. */
MPI_Request
req_send_counts
[
nr_send_pcells
];
MPI_Request
req_recv_counts
[
nr_send_pcells
];
MPI_Request
req_pcells_out
[
nr_send_pcells
];
int
pcells_size
[
nr_send_pcells
];
/* XXX note in SWIFT we use the threadpool to launch these. That may
* matter. */
for
(
int
k
=
0
;
k
<
nr_send_pcells
;
k
++
)
{
struct
mpiuse_log_entry
*
log
=
send_pcells
[
k
];
/* Need to regenerate the tags for each other communication type. */
int
basetag
=
log
->
rank
*
proxy_tag_shift
;
/* Start Isend counts of pcells. Really just the size of the buffer we're
* about to send, SWIFT sends the count. */
int
size
=
log
->
size
;
res
=
MPI_Isend
(
&
size
,
1
,
MPI_INT
,
log
->
otherrank
,
basetag
+
proxy_tag_count
,
MPI_COMM_WORLD
,
&
req_send_counts
[
k
]);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Counts MPI_Isend failed."
);
/* Start Isend of pcells. */
log
->
data
=
calloc
(
log
->
size
,
1
);
res
=
MPI_Isend
(
log
->
data
,
log
->
size
,
MPI_BYTE
,
log
->
otherrank
,
basetag
+
proxy_tag_cells
,
MPI_COMM_WORLD
,
&
req_pcells_out
[
k
]);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Pcell MPI_Isend failed."
);
/* Start Irecv counts of pcells from other rank. */
basetag
=
log
->
rank
*
proxy_tag_shift
;
res
=
MPI_Irecv
(
&
pcells_size
[
k
],
1
,
MPI_INT
,
log
->
otherrank
,
basetag
+
proxy_tag_count
,
MPI_COMM_WORLD
,
&
req_recv_counts
[
k
]);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Counts MPI_Irecv failed."
);
}
/* Now wait for any of the counts irecvs to complete and then create the
* irecv for the pcells. */
void
*
pcells_in
[
nr_send_pcells
];
MPI_Request
req_pcells_in
[
nr_send_pcells
];
for
(
int
k
=
0
;
k
<
nr_send_pcells
;
k
++
)
{
int
pid
=
MPI_UNDEFINED
;
MPI_Status
status
;
res
=
MPI_Waitany
(
nr_send_pcells
,
req_recv_counts
,
&
pid
,
&
status
);
if
(
res
!=
MPI_SUCCESS
||
pid
==
MPI_UNDEFINED
)
error
(
"MPI_Waitany failed."
);
struct
mpiuse_log_entry
*
log
=
send_pcells
[
pid
];
int
basetag
=
log
->
rank
*
proxy_tag_shift
;
pcells_in
[
pid
]
=
calloc
(
pcells_size
[
pid
],
1
);
res
=
MPI_Irecv
(
pcells_in
[
pid
],
pcells_size
[
pid
],
MPI_BYTE
,
log
->
otherrank
,
basetag
+
proxy_tag_cells
,
MPI_COMM_WORLD
,
&
req_pcells_in
[
pid
]);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Pcell MPI_Irecv failed."
);
}
/* Waitall for all Isend counts to complete. */
res
=
MPI_Waitall
(
nr_send_pcells
,
req_send_counts
,
MPI_STATUSES_IGNORE
);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Waitall for counts Isend failed."
);
/* Now wait for the pcell irecvs to complete, so we receive the pcells,
* which would be unpacked in SWIFT. */
for
(
int
k
=
0
;
k
<
nr_send_pcells
;
k
++
)
{
int
pid
=
MPI_UNDEFINED
;
MPI_Status
status
;
res
=
MPI_Waitany
(
nr_send_pcells
,
req_pcells_in
,
&
pid
,
&
status
);
if
(
res
!=
MPI_SUCCESS
||
pid
==
MPI_UNDEFINED
)
error
(
"MPI_Waitany failed."
);
/* XXX check the data received is correct? */
}
/* Waitall for Isend of pcells to complete. */
res
=
MPI_Waitall
(
nr_send_pcells
,
req_pcells_out
,
MPI_STATUSES_IGNORE
);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Waitall for pcells Isend failed."
);
/* Shutdown MPI. */
res
=
MPI_Finalize
();
if
(
res
!=
MPI_SUCCESS
)
error
(
"call to MPI_Finalize failed with error %i."
,
res
);
if
(
myrank
==
0
)
message
(
"Bye"
);
return
0
;
}
This diff is collapsed.
Click to expand it.
testdata/EAGLE_25-mpiuse_report-step436-4ranks-proxies.dat
0 → 100644
+
96
−
0
View file @
2709a75b
4850098056 694601348570 0 436 0 1 send 23 unknown 29 1 0 4 4
4850121226 694601371740 0 436 0 3 send 23 unknown 29 1 0 4 8
4850137570 694601388084 0 436 0 2 send 23 unknown 29 1 0 4 12
4903711386 694654961900 0 436 0 1 send 23 unknown 35 1 6 67144000 67144012
4903771972 694655022486 0 436 0 1 recv 24 unknown 29 1 8 4 67144016
4934108062 694685358576 0 436 0 2 send 23 unknown 35 1 6 112140224 179284240
4934167322 694685417836 0 436 0 2 recv 24 unknown 29 1 16 4 179284244
4934438236 694685688750 0 436 0 3 send 23 unknown 35 1 6 112768320 292052564
4934474960 694685725474 0 436 0 3 recv 24 unknown 29 1 24 4 292052568
4934698274 694685948788 30926302 436 0 1 recv 24 unknown 29 0 8 -4 292052564
4935788496 694687039010 0 436 0 1 recv 24 unknown 35 1 14 50257088 342309652
4985676110 694736926624 51508788 436 0 2 recv 24 unknown 29 0 16 -4 342309648
4986972416 694738222930 0 436 0 2 recv 24 unknown 35 1 22 76386688 418696336
4986993512 694738244026 52518552 436 0 3 recv 24 unknown 29 0 24 -4 418696332
4987726376 694738976890 0 436 0 3 recv 24 unknown 35 1 30 58033472 476729804
4987735264 694738985778 137637208 436 0 1 send 23 unknown 29 0 0 -4 476729800
4987735548 694738986062 137614322 436 0 3 send 23 unknown 29 0 0 -4 476729796
4987735802 694738986316 137598232 436 0 2 send 23 unknown 29 0 0 -4 476729792
4987749628 694739000142 51961132 436 0 1 recv 24 unknown 35 0 14 -50257088 426472704
5223473824 694974724338 236501408 436 0 2 recv 24 unknown 35 0 22 -76386688 350086016
5564522798 695315773312 576796422 436 0 3 recv 24 unknown 35 0 30 -58033472 292052544
5764020152 695515270666 860308766 436 0 1 send 23 unknown 35 0 6 -67144000 224908544
5764021262 695515271776 829583026 436 0 3 send 23 unknown 35 0 6 -112768320 112140224
5764021514 695515272028 829913452 436 0 2 send 23 unknown 35 0 6 -112140224 0
4849661950 694605451240 0 436 1 0 send 23 unknown 29 1 8 4 4
4849686448 694605475738 0 436 1 2 send 23 unknown 29 1 8 4 8
4849702938 694605492228 0 436 1 3 send 23 unknown 29 1 8 4 12
4888801436 694644590726 0 436 1 0 send 23 unknown 35 1 14 50257088 50257100
4888872168 694644661458 0 436 1 0 recv 24 unknown 29 1 0 4 50257104
4916161986 694671951276 0 436 1 2 send 23 unknown 35 1 14 87092544 137349648
4916219058 694672008348 0 436 1 2 recv 24 unknown 29 1 16 4 137349652
4926742762 694682532052 0 436 1 3 send 23 unknown 35 1 14 102003776 239353428
4926795992 694682585282 0 436 1 3 recv 24 unknown 29 1 24 4 239353432
4927024564 694682813854 38152396 436 1 0 recv 24 unknown 29 0 0 -4 239353428
4928120732 694683910022 0 436 1 0 recv 24 unknown 35 1 6 67144000 306497428
4991934490 694747723780 75715432 436 1 2 recv 24 unknown 29 0 16 -4 306497424
4993226760 694749016050 0 436 1 2 recv 24 unknown 35 1 22 103360320 409857744
4993246450 694749035740 66450458 436 1 3 recv 24 unknown 29 0 24 -4 409857740
4994297096 694750086386 0 436 1 3 recv 24 unknown 35 1 30 96912704 506770444
4994306838 694750096128 144603900 436 1 3 send 23 unknown 29 0 8 -4 506770440
4994307102 694750096392 144645152 436 1 0 send 23 unknown 29 0 8 -4 506770436
4994307370 694750096660 144620922 436 1 2 send 23 unknown 29 0 8 -4 506770432
4994319476 694750108766 66198744 436 1 0 recv 24 unknown 35 0 6 -67144000 439626432
5435638176 695191427466 442411416 436 1 2 recv 24 unknown 35 0 22 -103360320 336266112
5853839496 695609628786 859542400 436 1 3 recv 24 unknown 35 0 30 -96912704 239353408
6179956632 695935745922 1253213870 436 1 3 send 23 unknown 35 0 14 -102003776 137349632
6179957364 695935746654 1291155928 436 1 0 send 23 unknown 35 0 14 -50257088 87092544
6179957614 695935746904 1263795628 436 1 2 send 23 unknown 35 0 14 -87092544 0
4865683574 694616573774 0 436 2 1 send 23 unknown 29 1 16 4 4
4865694860 694616585060 0 436 2 0 send 23 unknown 29 1 16 4 8
4865719040 694616609240 0 436 2 3 send 23 unknown 29 1 16 4 12
4929671098 694680561298 0 436 2 0 send 23 unknown 35 1 22 76386688 76386700
4929728618 694680618818 0 436 2 0 recv 24 unknown 29 1 0 4 76386704
4946100866 694696991066 0 436 2 1 send 23 unknown 35 1 22 103360320 179747024
4946157516 694697047716 0 436 2 1 recv 24 unknown 29 1 8 4 179747028
4966431694 694717321894 0 436 2 3 send 23 unknown 35 1 22 131407808 311154836
4966473568 694717363768 0 436 2 3 recv 24 unknown 29 1 24 4 311154840
4966735668 694717625868 262100 436 2 3 recv 24 unknown 29 0 24 -4 311154836
4967822066 694718712266 0 436 2 3 recv 24 unknown 35 1 30 77125888 388280724
5039897426 694790787626 110168808 436 2 0 recv 24 unknown 29 0 0 -4 388280720
5041296494 694792186694 0 436 2 0 recv 24 unknown 35 1 6 112140224 500420944
5047476558 694798366758 101319042 436 2 1 recv 24 unknown 29 0 8 -4 500420940
5049229182 694800119382 0 436 2 1 recv 24 unknown 35 1 14 87092544 587513484
5049237980 694800128180 183543120 436 2 0 send 23 unknown 29 0 16 -4 587513480
5049238262 694800128462 183554688 436 2 1 send 23 unknown 29 0 16 -4 587513476
5049238548 694800128748 183519508 436 2 3 send 23 unknown 29 0 16 -4 587513472
5049252308 694800142508 81430242 436 2 3 recv 24 unknown 35 0 30 -77125888 510387584
5409565588 695160455788 368269094 436 2 0 recv 24 unknown 35 0 6 -112140224 398247360
5767826276 695518716476 718597094 436 2 1 recv 24 unknown 35 0 14 -87092544 311154816
6027888698 695778778898 1098217600 436 2 0 send 23 unknown 35 0 22 -76386688 234768128
6027889152 695778779352 1081788286 436 2 1 send 23 unknown 35 0 22 -103360320 131407808
6027889418 695778779618 1061457724 436 2 3 send 23 unknown 35 0 22 -131407808 0
4854204482 694605090028 0 436 3 1 send 23 unknown 29 1 24 4 4
4854234092 694605119638 0 436 3 2 send 23 unknown 29 1 24 4 8
4854255180 694605140726 0 436 3 0 send 23 unknown 29 1 24 4 12
4899877272 694650762818 0 436 3 0 send 23 unknown 35 1 30 58033472 58033484
4899949318 694650834864 0 436 3 0 recv 24 unknown 29 1 0 4 58033488
4912874760 694663760306 0 436 3 2 send 23 unknown 35 1 30 77125888 135159376
4912937056 694663822602 0 436 3 2 recv 24 unknown 29 1 16 4 135159380
4928374362 694679259908 0 436 3 1 send 23 unknown 35 1 30 96912704 232072084
4928425744 694679311290 0 436 3 1 recv 24 unknown 29 1 8 4 232072088
4928643938 694679529484 15706882 436 3 2 recv 24 unknown 29 0 16 -4 232072084
4930353406 694681238952 0 436 3 2 recv 24 unknown 35 1 22 131407808 363479892
4930363348 694681248894 30414030 436 3 0 recv 24 unknown 29 0 0 -4 363479888
4931947584 694682833130 0 436 3 0 recv 24 unknown 35 1 6 112768320 476248208
4931953158 694682838704 3527414 436 3 1 recv 24 unknown 29 0 8 -4 476248204
4934140284 694685025830 0 436 3 1 recv 24 unknown 35 1 14 102003776 578251980
4934154460 694685040006 79949978 436 3 1 send 23 unknown 29 0 24 -4 578251976
4934154790 694685040336 79899610 436 3 0 send 23 unknown 29 0 24 -4 578251972
4934155106 694685040652 79921014 436 3 2 send 23 unknown 29 0 24 -4 578251968
5084820146 694835705692 154466740 436 3 2 recv 24 unknown 35 0 22 -131407808 446844160
5647753010 695398638556 715805426 436 3 0 recv 24 unknown 35 0 6 -112768320 334075840
6028108176 695778993722 1093967892 436 3 1 recv 24 unknown 35 0 14 -102003776 232072064
6407169688 696158055234 1478795326 436 3 1 send 23 unknown 35 0 30 -96912704 135159360
6407171042 696158056588 1507293770 436 3 0 send 23 unknown 35 0 30 -58033472 77125888
6407171290 696158056836 1494296530 436 3 2 send 23 unknown 35 0 30 -77125888 0
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment