Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
swiftmpistepsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
swiftmpistepsim
Commits
47807adb
Commit
47807adb
authored
5 years ago
by
Peter W. Draper
Browse files
Options
Downloads
Patches
Plain Diff
Add more analysis of failure and more and less chat
parent
0342d542
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
swiftmpiproxies.c
+63
-23
63 additions, 23 deletions
swiftmpiproxies.c
with
63 additions
and
23 deletions
swiftmpiproxies.c
+
63
−
23
View file @
47807adb
...
...
@@ -88,11 +88,40 @@ static int datacheck_test(int offset, size_t size, void *data) {
unsigned
char
*
p
=
(
unsigned
char
*
)
data
;
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
if
(
p
[
i
]
!=
fill
)
return
0
;
if
(
p
[
i
]
!=
fill
)
{
if
(
verbose
)
{
message
(
"%d: %d != %d"
,
offset
,
p
[
i
],
fill
);
fflush
(
stdout
);
}
return
0
;
}
}
return
1
;
}
/**
* @brief check a data area reporting some statistics about the content.
*
* Assumes datacheck_test() has already failed.
*
* @param size size of data in bytes.
* @param data the data to fill.
*/
static
void
datacheck_fulltest
(
size_t
size
,
void
*
data
)
{
unsigned
char
*
p
=
(
unsigned
char
*
)
data
;
double
sum
=
0
.
0
;
unsigned
char
pmin
=
255
;
unsigned
char
pmax
=
0
;
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
sum
+=
p
[
i
];
if
(
p
[
i
]
>
pmax
)
pmax
=
p
[
i
];
if
(
p
[
i
]
<
pmin
)
pmin
=
p
[
i
];
}
message
(
"sum: %.2f, mean: %.2f, min: %d, max: %d"
,
sum
,
sum
/
(
double
)
size
,
pmin
,
pmax
);
}
/**
* @brief Pick out the relevant logging data for our rank, i.e. all
* activations of sends and recvs. We ignore the original completion logs,
...
...
@@ -172,7 +201,7 @@ int main(int argc, char *argv[]) {
if
(
res
!=
MPI_SUCCESS
)
error
(
"Call to MPI_Comm_rank failed with error %i."
,
res
);
/* Handle the command-line, we expect a mpiuse data file to read and
/* Handle the command-line, we expect a mpiuse data file to read and
* various options. */
int
opt
;
while
((
opt
=
getopt
(
argc
,
argv
,
"vn:"
))
!=
-
1
)
{
...
...
@@ -201,7 +230,7 @@ int main(int argc, char *argv[]) {
/* Now we read the SWIFT MPI logger output that defines the communcations we
* will undertake. Note this has all ranks for a single step, SWIFT outputs
* one MPI log per rank per step, so you need to combine all ranks from a
* step.
XXX also extract proxy related communications XXX
*/
* step. */
mpiuse_log_restore
(
infile
);
int
nranks
=
mpiuse_nr_ranks
();
...
...
@@ -226,27 +255,27 @@ int main(int argc, char *argv[]) {
if
(
myrank
==
0
)
message
(
"*** Proxy simulation exchange loop: %d ***"
,
nloop
);
/* XXX note in SWIFT we use the threadpool to launch these. That may
* matter. */
/* Note in SWIFT we use the threadpool to launch these. */
for
(
int
k
=
0
;
k
<
nr_send_pcells
;
k
++
)
{
struct
mpiuse_log_entry
*
log
=
send_pcells
[
k
];
/* Need to regenerate the tags for each other communication type. */
int
basetag
=
myrank
*
proxy_tag_shift
;
/* Start Isend counts of pcells. Really just the size of the buffer
we're
* about to send, SWIFT sends the count. */
/* Start Isend counts of pcells. Really just the size of the buffer
*
we're
about to send, SWIFT sends the count. */
int
size
=
log
->
size
;
res
=
MPI_Isend
(
&
size
,
1
,
MPI_INT
,
log
->
otherrank
,
basetag
+
proxy_tag_count
,
MPI_COMM_WORLD
,
&
req_send_counts
[
k
]);
res
=
MPI_Isend
(
&
size
,
1
,
MPI_INT
,
log
->
otherrank
,
basetag
+
proxy_tag_count
,
MPI_COMM_WORLD
,
&
req_send_counts
[
k
]);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Counts MPI_Isend failed."
);
/* Start Isend of pcells. */
/* Start Isend of pcells, filling the data with a pattern for checking
* on arrival. */
log
->
data
=
calloc
(
log
->
size
,
1
);
/* Fill data with a pattern for checking on arrival. */
datacheck_fill
(
0
,
log
->
size
,
log
->
data
);
res
=
MPI_Isend
(
log
->
data
,
log
->
size
,
MPI_BYTE
,
log
->
otherrank
,
basetag
+
proxy_tag_cells
,
MPI_COMM_WORLD
,
&
req_pcells_out
[
k
]);
...
...
@@ -254,6 +283,7 @@ int main(int argc, char *argv[]) {
/* Start Irecv counts of pcells from other rank. */
basetag
=
log
->
otherrank
*
proxy_tag_shift
;
res
=
MPI_Irecv
(
&
pcells_size
[
k
],
1
,
MPI_INT
,
log
->
otherrank
,
basetag
+
proxy_tag_count
,
MPI_COMM_WORLD
,
&
req_recv_counts
[
k
]);
...
...
@@ -268,6 +298,7 @@ int main(int argc, char *argv[]) {
for
(
int
k
=
0
;
k
<
nr_send_pcells
;
k
++
)
{
int
pid
=
MPI_UNDEFINED
;
MPI_Status
status
;
res
=
MPI_Waitany
(
nr_send_pcells
,
req_recv_counts
,
&
pid
,
&
status
);
if
(
res
!=
MPI_SUCCESS
||
pid
==
MPI_UNDEFINED
)
error
(
"MPI_Waitany failed."
);
...
...
@@ -280,36 +311,42 @@ int main(int argc, char *argv[]) {
/* Fill data with a pattern for checking when overwritten. */
datacheck_fill
(
1
,
pcells_size
[
pid
],
pcells_in
[
pid
]);
res
=
MPI_Irecv
(
pcells_in
[
pid
],
pcells_size
[
pid
],
MPI_BYTE
,
log
->
otherrank
,
basetag
+
proxy_tag_cells
,
MPI_COMM_WORLD
,
&
req_pcells_in
[
pid
]);
res
=
MPI_Irecv
(
pcells_in
[
pid
],
pcells_size
[
pid
],
MPI_BYTE
,
log
->
otherrank
,
basetag
+
proxy_tag_cells
,
MPI_COMM_WORLD
,
&
req_pcells_in
[
pid
]);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Pcell MPI_Irecv failed."
);
}
message
(
"All proxy cell counts have arrived"
);
message
(
"All proxy cell counts have arrived
, pcells irecvs are launched
"
);
/* Waitall for all Isend counts to complete. */
res
=
MPI_Waitall
(
nr_send_pcells
,
req_send_counts
,
MPI_STATUSES_IGNORE
);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Waitall for counts Isend failed."
);
message
(
"All sends of counts have completed"
);
/* Now wait for the pcell irecvs to complete, so we receive the pcells,
* which would be unpacked in SWIFT. */
for
(
int
k
=
0
;
k
<
nr_send_pcells
;
k
++
)
{
int
pid
=
MPI_UNDEFINED
;
MPI_Status
status
;
res
=
MPI_Waitany
(
nr_send_pcells
,
req_pcells_in
,
&
pid
,
&
status
);
if
(
res
!=
MPI_SUCCESS
||
pid
==
MPI_UNDEFINED
)
error
(
"MPI_Waitany failed."
);
/* Check the data received is correct. */
if
(
!
datacheck_test
(
0
,
pcells_size
[
pid
],
pcells_in
[
pid
]))
{
if
(
!
datacheck_test
(
1
,
pcells_size
[
pid
],
pcells_in
[
pid
]))
{
error
(
"Received data is not modified"
);
message
(
"Received data is not correct"
);
datacheck_fulltest
(
pcells_size
[
pid
],
pcells_in
[
pid
]);
if
(
datacheck_test
(
1
,
pcells_size
[
pid
],
pcells_in
[
pid
]))
{
error
(
"Received data is not modified on receive"
);
}
else
{
error
(
"Received data is corrupt"
);
}
}
else
{
message
(
"Received data is correct"
);
if
(
verbose
)
message
(
"Received data is correct"
);
}
free
(
pcells_in
[
pid
]);
pcells_in
[
pid
]
=
NULL
;
...
...
@@ -319,14 +356,17 @@ int main(int argc, char *argv[]) {
/* Waitall for Isend of pcells to complete. */
res
=
MPI_Waitall
(
nr_send_pcells
,
req_pcells_out
,
MPI_STATUSES_IGNORE
);
if
(
res
!=
MPI_SUCCESS
)
error
(
"Waitall for pcells Isend failed."
);
message
(
"All sends of pcells have completed"
);
/* Check data is unmodified. */
/* Check data is unmodified
while being offloaded
. */
for
(
int
k
=
0
;
k
<
nr_send_pcells
;
k
++
)
{
struct
mpiuse_log_entry
*
log
=
send_pcells
[
k
];
if
(
!
datacheck_test
(
0
,
log
->
size
,
log
->
data
))
{
datacheck_fulltest
(
log
->
size
,
log
->
data
);
error
(
"Sent data has been corrupted"
);
}
else
{
message
(
"Sent data is correct"
);
if
(
verbose
)
message
(
"Sent data is correct"
);
}
free
(
log
->
data
);
log
->
data
=
NULL
;
...
...
@@ -339,7 +379,7 @@ int main(int argc, char *argv[]) {
if
(
res
!=
MPI_SUCCESS
)
error
(
"call to MPI_Finalize failed with error %i."
,
res
);
if
(
myrank
==
0
)
message
(
"
Bye
"
);
if
(
myrank
==
0
)
message
(
"
All done, no errors detected
"
);
return
0
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment