Commit 038c0da4 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

pack/unpack the cell ti_end_min values on the fly, makes the final communication step a lot leaner.

parent 916ac1aa
......@@ -128,6 +128,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
}
/* Return the total number of unpacked cells. */
c->pcell_size = count;
return count;
}
......@@ -213,6 +214,39 @@ int cell_pack(struct cell *c, struct pcell *pc) {
pc->progeny[k] = -1;
/* Return the number of packed cells used. */
c->pcell_size = count;
return count;
}
int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
/* Pack this cell's data. */
ti_ends[0] = c->ti_end_min;
/* Fill in the progeny, depth-first recursion. */
int count = 1;
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL) {
count += cell_pack_ti_ends(c->progeny[k], &ti_ends[count]);
}
/* Return the number of packed values. */
return count;
}
int cell_unpack_ti_ends(struct cell *c, int *ti_ends) {
/* Unpack this cell's data. */
c->ti_end_min = ti_ends[0];
/* Fill in the progeny, depth-first recursion. */
int count = 1;
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL) {
count += cell_unpack_ti_ends(c->progeny[k], &ti_ends[count]);
}
/* Return the number of packed values. */
return count;
}
......
......@@ -185,6 +185,8 @@ int cell_glocktree(struct cell *c);
void cell_gunlocktree(struct cell *c);
int cell_pack(struct cell *c, struct pcell *pc);
int cell_unpack(struct pcell *pc, struct cell *c, struct space *s);
int cell_pack_ti_ends(struct cell *c, int *ti_ends);
int cell_unpack_ti_ends(struct cell *c, int *ti_ends);
int cell_getsize(struct cell *c);
int cell_link_parts(struct cell *c, struct part *parts);
int cell_link_gparts(struct cell *c, struct gpart *gparts);
......
......@@ -641,7 +641,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
t_rho = scheduler_addtask(s, task_type_send, task_subtype_none,
3 * ci->tag + 1, 0, ci, cj, 0);
if (!(e->policy & engine_policy_fixdt))
t_ti = scheduler_addtask(s, task_type_send, task_subtype_none,
t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend,
3 * ci->tag + 2, 0, ci, cj, 0);
/* The send_rho task depends on the cell's ghost task. */
......@@ -700,7 +700,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none,
3 * c->tag + 1, 0, c, NULL, 0);
if (!(e->policy & engine_policy_fixdt))
t_ti = scheduler_addtask(s, task_type_recv, task_subtype_none,
t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend,
3 * c->tag + 2, 0, c, NULL, 0);
}
c->recv_xv = t_xv;
......@@ -2241,9 +2241,9 @@ void engine_init_particles(struct engine *e) {
/* Add MPI tasks if need be */
if (e->policy & engine_policy_mpi) {
mask |= 1 << task_type_send;
mask |= 1 << task_type_recv;
submask |= 1 << task_subtype_tend;
}
/* Now, launch the calculation */
......@@ -2371,9 +2371,9 @@ void engine_step(struct engine *e) {
/* Add MPI tasks if need be */
if (e->policy & engine_policy_mpi) {
mask |= 1 << task_type_send;
mask |= 1 << task_type_recv;
submask |= 1 << task_subtype_tend;
}
/* Send off the runners. */
......
......@@ -1114,9 +1114,17 @@ void *runner_main(void *data) {
runner_do_kick_fixdt(r, ci, 1);
break;
case task_type_send:
if (t->subtype == task_subtype_tend) {
free(t->buff);
}
break;
case task_type_recv:
runner_do_recv_cell(r, ci, 1);
if (t->subtype == task_subtype_tend) {
cell_unpack_ti_ends(ci, t->buff);
free(t->buff);
} else {
runner_do_recv_cell(r, ci, 1);
}
break;
case task_type_grav_external:
runner_do_grav_external(r, t->ci, 1);
......
......@@ -69,8 +69,8 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta,
struct task **unlocks_new;
int *unlock_ind_new;
s->size_unlocks *= 2;
if ((unlocks_new = (struct task **)malloc(sizeof(struct task *) *
s->size_unlocks)) == NULL ||
if ((unlocks_new = (struct task **)malloc(
sizeof(struct task *) *s->size_unlocks)) == NULL ||
(unlock_ind_new = (int *)malloc(sizeof(int) * s->size_unlocks)) == NULL)
error("Failed to re-allocate unlocks.");
memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * s->nr_unlocks);
......@@ -98,11 +98,13 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta,
void scheduler_splittasks(struct scheduler *s) {
const int pts[7][8] = {
{-1, 12, 10, 9, 4, 3, 1, 0}, {-1, -1, 11, 10, 5, 4, 2, 1},
{-1, -1, -1, 12, 7, 6, 4, 3}, {-1, -1, -1, -1, 8, 7, 5, 4},
{-1, -1, -1, -1, -1, 12, 10, 9}, {-1, -1, -1, -1, -1, -1, 11, 10},
{-1, -1, -1, -1, -1, -1, -1, 12}};
const int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0},
{-1, -1, 11, 10, 5, 4, 2, 1},
{-1, -1, -1, 12, 7, 6, 4, 3},
{-1, -1, -1, -1, 8, 7, 5, 4},
{-1, -1, -1, -1, -1, 12, 10, 9},
{-1, -1, -1, -1, -1, -1, 11, 10},
{-1, -1, -1, -1, -1, -1, -1, 12}};
const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788,
0.4025, 0.1897, 0.4025, 0.1897, 0.4025,
0.5788, 0.4025, 0.5788};
......@@ -707,8 +709,7 @@ void scheduler_reset(struct scheduler *s, int size) {
if (s->tasks_ind != NULL) free(s->tasks_ind);
/* Allocate the new lists. */
if ((s->tasks = (struct task *)malloc(sizeof(struct task) * size)) ==
NULL ||
if ((s->tasks = (struct task *)malloc(sizeof(struct task) *size)) == NULL ||
(s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL)
error("Failed to allocate task lists.");
}
......@@ -975,8 +976,14 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
break;
case task_type_recv:
#ifdef WITH_MPI
err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
if (t->subtype == task_subtype_tend) {
t->buff = malloc(sizeof(int) * t->ci->pcell_size);
err = MPI_Irecv(t->buff, t->ci->pcell_size, MPI_INT, t->ci->nodeID,
t->flags, MPI_COMM_WORLD, &t->req);
} else {
err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type,
t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
}
if (err != MPI_SUCCESS) {
mpi_error(err, "Failed to emit irecv for particle data.");
}
......@@ -990,8 +997,15 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
break;
case task_type_send:
#ifdef WITH_MPI
err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type,
t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
if (t->subtype == task_subtype_tend) {
t->buff = malloc(sizeof(int) * t->ci->pcell_size);
cell_pack_ti_ends(t->ci, t->buff);
err = MPI_Isend(t->buff, t->ci->pcell_size, MPI_INT, t->cj->nodeID,
t->flags, MPI_COMM_WORLD, &t->req);
} else {
err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type,
t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req);
}
if (err != MPI_SUCCESS) {
mpi_error(err, "Failed to emit isend for particle data.");
}
......@@ -1215,7 +1229,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks,
/* Init the unlocks. */
if ((s->unlocks = (struct task **)malloc(
sizeof(struct task *) * scheduler_init_nr_unlocks)) == NULL ||
sizeof(struct task *) *scheduler_init_nr_unlocks)) == NULL ||
(s->unlock_ind =
(int *)malloc(sizeof(int) * scheduler_init_nr_unlocks)) == NULL)
error("Failed to allocate unlocks.");
......
......@@ -82,6 +82,8 @@ struct task {
struct cell *ci, *cj;
void *buff;
#ifdef WITH_MPI
MPI_Request req;
#endif
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment