Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
2ec982df
Commit
2ec982df
authored
Sep 18, 2019
by
Matthieu Schaller
Browse files
Move the updated synchronised redistribute function to the new engine_redistribute.c file.
parent
b52e6c84
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
src/engine.c
View file @
2ec982df
This diff is collapsed.
Click to expand it.
src/engine_redistribute.c
View file @
2ec982df
...
...
@@ -29,7 +29,6 @@
#include
"memswap.h"
#ifdef WITH_MPI
/**
* Do the exchange of one type of particles with all the other nodes.
*
...
...
@@ -44,6 +43,8 @@
* @param mpi_type the MPI_Datatype for these particles.
* @param nr_nodes the number of nodes to exchange with.
* @param nodeID the id of this node.
* @param syncredist whether to use slower more memory friendly synchronous
* exchanges.
*
* @result new particle data constructed from all the exchanges with the
* given alignment.
...
...
@@ -51,7 +52,7 @@
static
void
*
engine_do_redistribute
(
const
char
*
label
,
int
*
counts
,
char
*
parts
,
size_t
new_nr_parts
,
size_t
sizeofparts
,
size_t
alignsize
,
MPI_Datatype
mpi_type
,
int
nr_nodes
,
int
nodeID
)
{
int
nr_nodes
,
int
nodeID
,
int
syncredist
)
{
/* Allocate a new particle array with some extra margin */
char
*
parts_new
=
NULL
;
...
...
@@ -60,100 +61,178 @@ static void *engine_do_redistribute(const char *label, int *counts, char *parts,
sizeofparts
*
new_nr_parts
*
engine_redistribute_alloc_margin
)
!=
0
)
error
(
"Failed to allocate new particle data."
);
/* Prepare MPI requests for the asynchronous communications */
MPI_Request
*
reqs
;
if
((
reqs
=
(
MPI_Request
*
)
malloc
(
sizeof
(
MPI_Request
)
*
2
*
nr_nodes
))
==
NULL
)
error
(
"Failed to allocate MPI request list."
);
if
(
syncredist
)
{
/* Slow synchronous redistribute,. */
size_t
offset_send
=
0
,
offset_recv
=
0
;
/* Only send and receive only "chunk" particles per request. So we need to
* loop as many times as necessary here. Make 2Gb/sizeofparts so we only
* send 2Gb packets. */
const
int
chunk
=
INT_MAX
/
sizeofparts
;
int
sent
=
0
;
int
recvd
=
0
;
/* Only send and receive only "chunk" particles per request.
* Fixing the message size to 2GB. */
const
int
chunk
=
INT_MAX
/
sizeofparts
;
int
res
=
0
;
for
(
int
k
=
0
;
k
<
nr_nodes
;
k
++
)
{
int
kk
=
k
;
/* Rank 0 decides the index of sending node */
MPI_Bcast
(
&
kk
,
1
,
MPI_INT
,
0
,
MPI_COMM_WORLD
);
int
ind_recv
=
kk
*
nr_nodes
+
nodeID
;
if
(
nodeID
==
kk
)
{
/* Send out our particles. */
offset_send
=
0
;
for
(
int
j
=
0
;
j
<
nr_nodes
;
j
++
)
{
int
ind_send
=
kk
*
nr_nodes
+
j
;
/* Just copy our own parts */
if
(
counts
[
ind_send
]
>
0
)
{
if
(
j
==
nodeID
)
{
memcpy
(
&
parts_new
[
offset_recv
*
sizeofparts
],
&
parts
[
offset_send
*
sizeofparts
],
sizeofparts
*
counts
[
ind_recv
]);
offset_send
+=
counts
[
ind_send
];
offset_recv
+=
counts
[
ind_recv
];
}
else
{
for
(
int
i
=
0
,
n
=
0
;
i
<
counts
[
ind_send
];
n
++
)
{
/* Count and index, with chunk parts at most. */
size_t
sendc
=
min
(
chunk
,
counts
[
ind_send
]
-
i
);
size_t
sendo
=
offset_send
+
i
;
res
=
MPI_Send
(
&
parts
[
sendo
*
sizeofparts
],
sendc
,
mpi_type
,
j
,
n
,
MPI_COMM_WORLD
);
if
(
res
!=
MPI_SUCCESS
)
{
mpi_error
(
res
,
"Failed to send parts to node %i from %i."
,
j
,
nodeID
);
}
i
+=
sendc
;
}
offset_send
+=
counts
[
ind_send
];
}
}
}
}
else
{
/* Listen for sends from kk. */
if
(
counts
[
ind_recv
]
>
0
)
{
for
(
int
i
=
0
,
n
=
0
;
i
<
counts
[
ind_recv
];
n
++
)
{
/* Count and index, with +chunk parts at most. */
size_t
recvc
=
min
(
chunk
,
counts
[
ind_recv
]
-
i
);
size_t
recvo
=
offset_recv
+
i
;
MPI_Status
status
;
res
=
MPI_Recv
(
&
parts_new
[
recvo
*
sizeofparts
],
recvc
,
mpi_type
,
kk
,
n
,
MPI_COMM_WORLD
,
&
status
);
if
(
res
!=
MPI_SUCCESS
)
{
mpi_error
(
res
,
"Failed to recv of parts from node %i to %i."
,
kk
,
nodeID
);
}
i
+=
recvc
;
}
offset_recv
+=
counts
[
ind_recv
];
}
}
}
int
activenodes
=
1
;
while
(
activenodes
)
{
}
else
{
/* Asynchronous redistribute, can take a lot of memory. */
for
(
int
k
=
0
;
k
<
2
*
nr_nodes
;
k
++
)
reqs
[
k
]
=
MPI_REQUEST_NULL
;
/* Prepare MPI requests for the asynchronous communications */
MPI_Request
*
reqs
;
if
((
reqs
=
(
MPI_Request
*
)
malloc
(
sizeof
(
MPI_Request
)
*
2
*
nr_nodes
))
==
NULL
)
error
(
"Failed to allocate MPI request list."
);
/* Emit the sends and recvs for the data. */
size_t
offset_send
=
sent
;
size_t
offset_recv
=
recvd
;
activenodes
=
0
;
/* Only send and receive only "chunk" particles per request. So we need to
* loop as many times as necessary here. Make 2Gb/sizeofparts so we only
* send 2Gb packets. */
const
int
chunk
=
INT_MAX
/
sizeofparts
;
int
sent
=
0
;
int
recvd
=
0
;
for
(
int
k
=
0
;
k
<
nr_nodes
;
k
++
)
{
int
activenodes
=
1
;
while
(
activenodes
)
{
/* Indices in the count arrays of the node of interest */
const
int
ind_send
=
nodeID
*
nr_nodes
+
k
;
const
int
ind_recv
=
k
*
nr_nodes
+
nodeID
;
for
(
int
k
=
0
;
k
<
2
*
nr_nodes
;
k
++
)
reqs
[
k
]
=
MPI_REQUEST_NULL
;
/* Are we sending any data this loop? */
int
sending
=
counts
[
ind_send
]
-
sent
;
if
(
sending
>
0
)
{
activenodes
++
;
if
(
sending
>
chunk
)
sending
=
chunk
;
/* Emit the sends and recvs for the data. */
size_t
offset_send
=
sent
;
size_t
offset_recv
=
recvd
;
activenodes
=
0
;
/* If the send and receive is local then just copy. */
if
(
k
==
nodeID
)
{
int
receiving
=
counts
[
ind_recv
]
-
recvd
;
if
(
receiving
>
chunk
)
receiving
=
chunk
;
memcpy
(
&
parts_new
[
offset_recv
*
sizeofparts
],
&
parts
[
offset_send
*
sizeofparts
],
sizeofparts
*
receiving
);
}
else
{
/* Otherwise send it. */
int
res
=
MPI_Isend
(
&
parts
[
offset_send
*
sizeofparts
],
sending
,
mpi_type
,
k
,
ind_send
,
MPI_COMM_WORLD
,
&
reqs
[
2
*
k
+
0
]);
if
(
res
!=
MPI_SUCCESS
)
mpi_error
(
res
,
"Failed to isend parts to node %i."
,
k
);
}
}
for
(
int
k
=
0
;
k
<
nr_nodes
;
k
++
)
{
/* If we're sending to this node, then move past it to next. */
if
(
counts
[
ind_send
]
>
0
)
offset_send
+=
counts
[
ind_send
];
/* Indices in the count arrays of the node of interest */
const
int
ind_send
=
nodeID
*
nr_nodes
+
k
;
const
int
ind_recv
=
k
*
nr_nodes
+
nodeID
;
/* Are we receiving any data from this node? Note already done if coming
* from this node. */
if
(
k
!=
nodeID
)
{
int
receiving
=
counts
[
ind_recv
]
-
recvd
;
if
(
receiving
>
0
)
{
/* Are we sending any data this loop? */
int
sending
=
counts
[
ind_send
]
-
sent
;
if
(
sending
>
0
)
{
activenodes
++
;
if
(
receiving
>
chunk
)
receiving
=
chunk
;
int
res
=
MPI_Irecv
(
&
parts_new
[
offset_recv
*
sizeofparts
],
receiving
,
mpi_type
,
k
,
ind_recv
,
MPI_COMM_WORLD
,
&
reqs
[
2
*
k
+
1
]);
if
(
res
!=
MPI_SUCCESS
)
mpi_error
(
res
,
"Failed to emit irecv of parts from node %i."
,
k
);
if
(
sending
>
chunk
)
sending
=
chunk
;
/* If the send and receive is local then just copy. */
if
(
k
==
nodeID
)
{
int
receiving
=
counts
[
ind_recv
]
-
recvd
;
if
(
receiving
>
chunk
)
receiving
=
chunk
;
memcpy
(
&
parts_new
[
offset_recv
*
sizeofparts
],
&
parts
[
offset_send
*
sizeofparts
],
sizeofparts
*
receiving
);
}
else
{
/* Otherwise send it. */
int
res
=
MPI_Isend
(
&
parts
[
offset_send
*
sizeofparts
],
sending
,
mpi_type
,
k
,
ind_send
,
MPI_COMM_WORLD
,
&
reqs
[
2
*
k
+
0
]);
if
(
res
!=
MPI_SUCCESS
)
mpi_error
(
res
,
"Failed to isend parts to node %i."
,
k
);
}
}
}
/* If we're receiving from this node, then move past it to next. */
if
(
counts
[
ind_recv
]
>
0
)
offset_recv
+=
counts
[
ind_recv
];
}
/* If we're sending to this node, then move past it to next. */
if
(
counts
[
ind_send
]
>
0
)
offset_send
+=
counts
[
ind_send
];
/* Are we receiving any data from this node? Note already done if coming
* from this node. */
if
(
k
!=
nodeID
)
{
int
receiving
=
counts
[
ind_recv
]
-
recvd
;
if
(
receiving
>
0
)
{
activenodes
++
;
if
(
receiving
>
chunk
)
receiving
=
chunk
;
int
res
=
MPI_Irecv
(
&
parts_new
[
offset_recv
*
sizeofparts
],
receiving
,
mpi_type
,
k
,
ind_recv
,
MPI_COMM_WORLD
,
&
reqs
[
2
*
k
+
1
]);
if
(
res
!=
MPI_SUCCESS
)
mpi_error
(
res
,
"Failed to emit irecv of parts from node %i."
,
k
);
}
}
/* If we're receiving from this node, then move past it to next. */
if
(
counts
[
ind_recv
]
>
0
)
offset_recv
+=
counts
[
ind_recv
];
}
/* Wait for all the sends and recvs to tumble in. */
MPI_Status
stats
[
2
*
nr_nodes
];
int
res
;
if
((
res
=
MPI_Waitall
(
2
*
nr_nodes
,
reqs
,
stats
))
!=
MPI_SUCCESS
)
{
for
(
int
k
=
0
;
k
<
2
*
nr_nodes
;
k
++
)
{
char
buff
[
MPI_MAX_ERROR_STRING
];
MPI_Error_string
(
stats
[
k
].
MPI_ERROR
,
buff
,
&
res
);
message
(
"request from source %i, tag %i has error '%s'."
,
stats
[
k
].
MPI_SOURCE
,
stats
[
k
].
MPI_TAG
,
buff
);
/* Wait for all the sends and recvs to tumble in. */
MPI_Status
stats
[
2
*
nr_nodes
];
int
res
;
if
((
res
=
MPI_Waitall
(
2
*
nr_nodes
,
reqs
,
stats
))
!=
MPI_SUCCESS
)
{
for
(
int
k
=
0
;
k
<
2
*
nr_nodes
;
k
++
)
{
char
buff
[
MPI_MAX_ERROR_STRING
];
MPI_Error_string
(
stats
[
k
].
MPI_ERROR
,
buff
,
&
res
);
message
(
"request from source %i, tag %i has error '%s'."
,
stats
[
k
].
MPI_SOURCE
,
stats
[
k
].
MPI_TAG
,
buff
);
}
error
(
"Failed during waitall for part data."
);
}
error
(
"Failed during waitall for part data."
);
/* Move to next chunks. */
sent
+=
chunk
;
recvd
+=
chunk
;
}
/* Move to next chunks. */
sent
+=
chunk
;
recvd
+=
chunk
;
/* Free temps. */
free
(
reqs
);
}
/* Free temps. */
free
(
reqs
);
/* And return new memory. */
return
parts_new
;
}
...
...
@@ -430,7 +509,8 @@ static void engine_redistribute_relink_mapper(void *map_data, int num_elements,
* 3) The particles to send are placed in a temporary buffer in which the
* part-gpart links are preserved.
* 4) Each node allocates enough space for the new particles.
* 5) (Asynchronous) communications are issued to transfer the data.
* 5) Asynchronous or synchronous communications are issued to transfer the
* data.
*
*
* @param e The #engine.
...
...
@@ -895,7 +975,7 @@ void engine_redistribute(struct engine *e) {
/* SPH particles. */
void
*
new_parts
=
engine_do_redistribute
(
"parts"
,
counts
,
(
char
*
)
s
->
parts
,
nr_parts_new
,
sizeof
(
struct
part
),
part_align
,
part_mpi_type
,
nr_nodes
,
nodeID
);
part_align
,
part_mpi_type
,
nr_nodes
,
nodeID
,
e
->
syncredist
);
swift_free
(
"parts"
,
s
->
parts
);
s
->
parts
=
(
struct
part
*
)
new_parts
;
s
->
nr_parts
=
nr_parts_new
;
...
...
@@ -904,32 +984,35 @@ void engine_redistribute(struct engine *e) {
/* Extra SPH particle properties. */
new_parts
=
engine_do_redistribute
(
"xparts"
,
counts
,
(
char
*
)
s
->
xparts
,
nr_parts_new
,
sizeof
(
struct
xpart
),
xpart_align
,
xpart_mpi_type
,
nr_nodes
,
nodeID
);
xpart_align
,
xpart_mpi_type
,
nr_nodes
,
nodeID
,
e
->
syncredist
);
swift_free
(
"xparts"
,
s
->
xparts
);
s
->
xparts
=
(
struct
xpart
*
)
new_parts
;
/* Gravity particles. */
new_parts
=
engine_do_redistribute
(
"gparts"
,
g_counts
,
(
char
*
)
s
->
gparts
,
nr_gparts_new
,
sizeof
(
struct
gpart
),
gpart_align
,
gpart_mpi_type
,
nr_nodes
,
nodeID
);
new_parts
=
engine_do_redistribute
(
"gparts"
,
g_counts
,
(
char
*
)
s
->
gparts
,
nr_gparts_new
,
sizeof
(
struct
gpart
),
gpart_align
,
gpart_mpi_type
,
nr_nodes
,
nodeID
,
e
->
syncredist
);
swift_free
(
"gparts"
,
s
->
gparts
);
s
->
gparts
=
(
struct
gpart
*
)
new_parts
;
s
->
nr_gparts
=
nr_gparts_new
;
s
->
size_gparts
=
engine_redistribute_alloc_margin
*
nr_gparts_new
;
/* Star particles. */
new_parts
=
engine_do_redistribute
(
"sparts"
,
s_counts
,
(
char
*
)
s
->
sparts
,
nr_sparts_new
,
sizeof
(
struct
spart
),
spart_align
,
spart_mpi_type
,
nr_nodes
,
nodeID
);
new_parts
=
engine_do_redistribute
(
"sparts"
,
s_counts
,
(
char
*
)
s
->
sparts
,
nr_sparts_new
,
sizeof
(
struct
spart
),
spart_align
,
spart_mpi_type
,
nr_nodes
,
nodeID
,
e
->
syncredist
);
swift_free
(
"sparts"
,
s
->
sparts
);
s
->
sparts
=
(
struct
spart
*
)
new_parts
;
s
->
nr_sparts
=
nr_sparts_new
;
s
->
size_sparts
=
engine_redistribute_alloc_margin
*
nr_sparts_new
;
/* Black holes particles. */
new_parts
=
engine_do_redistribute
(
"bparts"
,
b_counts
,
(
char
*
)
s
->
bparts
,
nr_bparts_new
,
sizeof
(
struct
bpart
),
bpart_align
,
bpart_mpi_type
,
nr_nodes
,
nodeID
);
new_parts
=
engine_do_redistribute
(
"bparts"
,
b_counts
,
(
char
*
)
s
->
bparts
,
nr_bparts_new
,
sizeof
(
struct
bpart
),
bpart_align
,
bpart_mpi_type
,
nr_nodes
,
nodeID
,
e
->
syncredist
);
swift_free
(
"bparts"
,
s
->
bparts
);
s
->
bparts
=
(
struct
bpart
*
)
new_parts
;
s
->
nr_bparts
=
nr_bparts_new
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment