From 038c0da4aa14bec17d783d7009c4968eb9765f99 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet <gonnet@google.com> Date: Mon, 20 Jun 2016 21:07:45 +0200 Subject: [PATCH] pack/unpack the cell ti_end_min values on the fly, makes the final communication step a lot leaner. --- src/cell.c | 34 ++++++++++++++++++++++++++++++++++ src/cell.h | 2 ++ src/engine.c | 8 ++++---- src/runner.c | 10 +++++++++- src/scheduler.c | 42 ++++++++++++++++++++++++++++-------------- src/task.h | 2 ++ 6 files changed, 79 insertions(+), 19 deletions(-) diff --git a/src/cell.c b/src/cell.c index e152cd6136..c9e35c1fb9 100644 --- a/src/cell.c +++ b/src/cell.c @@ -128,6 +128,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { } /* Return the total number of unpacked cells. */ + c->pcell_size = count; return count; } @@ -213,6 +214,39 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->progeny[k] = -1; /* Return the number of packed cells used. */ + c->pcell_size = count; + return count; +} + +int cell_pack_ti_ends(struct cell *c, int *ti_ends) { + + /* Pack this cell's data. */ + ti_ends[0] = c->ti_end_min; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_pack_ti_ends(c->progeny[k], &ti_ends[count]); + } + + /* Return the number of packed values. */ + return count; +} + +int cell_unpack_ti_ends(struct cell *c, int *ti_ends) { + + /* Unpack this cell's data. */ + c->ti_end_min = ti_ends[0]; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_unpack_ti_ends(c->progeny[k], &ti_ends[count]); + } + + /* Return the number of packed values. */ return count; } diff --git a/src/cell.h b/src/cell.h index 45ee3e8ad9..24b679caf4 100644 --- a/src/cell.h +++ b/src/cell.h @@ -185,6 +185,8 @@ int cell_glocktree(struct cell *c); void cell_gunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); +int cell_pack_ti_ends(struct cell *c, int *ti_ends); +int cell_unpack_ti_ends(struct cell *c, int *ti_ends); int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); diff --git a/src/engine.c b/src/engine.c index 3f266e47b9..cd8eb6d3c8 100644 --- a/src/engine.c +++ b/src/engine.c @@ -641,7 +641,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, t_rho = scheduler_addtask(s, task_type_send, task_subtype_none, 3 * ci->tag + 1, 0, ci, cj, 0); if (!(e->policy & engine_policy_fixdt)) - t_ti = scheduler_addtask(s, task_type_send, task_subtype_none, + t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, 3 * ci->tag + 2, 0, ci, cj, 0); /* The send_rho task depends on the cell's ghost task. */ @@ -700,7 +700,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none, 3 * c->tag + 1, 0, c, NULL, 0); if (!(e->policy & engine_policy_fixdt)) - t_ti = scheduler_addtask(s, task_type_recv, task_subtype_none, + t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, 3 * c->tag + 2, 0, c, NULL, 0); } c->recv_xv = t_xv; @@ -2241,9 +2241,9 @@ void engine_init_particles(struct engine *e) { /* Add MPI tasks if need be */ if (e->policy & engine_policy_mpi) { - mask |= 1 << task_type_send; mask |= 1 << task_type_recv; + submask |= 1 << task_subtype_tend; } /* Now, launch the calculation */ @@ -2371,9 +2371,9 @@ void engine_step(struct engine *e) { /* Add MPI tasks if need be */ if (e->policy & engine_policy_mpi) { - mask |= 1 << task_type_send; mask |= 1 << task_type_recv; + submask |= 1 << task_subtype_tend; } /* Send off the runners. */ diff --git a/src/runner.c b/src/runner.c index 7843fde0a5..249479546b 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1114,9 +1114,17 @@ void *runner_main(void *data) { runner_do_kick_fixdt(r, ci, 1); break; case task_type_send: + if (t->subtype == task_subtype_tend) { + free(t->buff); + } break; case task_type_recv: - runner_do_recv_cell(r, ci, 1); + if (t->subtype == task_subtype_tend) { + cell_unpack_ti_ends(ci, t->buff); + free(t->buff); + } else { + runner_do_recv_cell(r, ci, 1); + } break; case task_type_grav_external: runner_do_grav_external(r, t->ci, 1); diff --git a/src/scheduler.c b/src/scheduler.c index 9e542af551..8447436529 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -69,8 +69,8 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task **unlocks_new; int *unlock_ind_new; s->size_unlocks *= 2; - if ((unlocks_new = (struct task **)malloc(sizeof(struct task *) * - s->size_unlocks)) == NULL || + if ((unlocks_new = (struct task **)malloc( + sizeof(struct task *) *s->size_unlocks)) == NULL || (unlock_ind_new = (int *)malloc(sizeof(int) * s->size_unlocks)) == NULL) error("Failed to re-allocate unlocks."); memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * s->nr_unlocks); @@ -98,11 +98,13 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, void scheduler_splittasks(struct scheduler *s) { - const int pts[7][8] = { - {-1, 12, 10, 9, 4, 3, 1, 0}, {-1, -1, 11, 10, 5, 4, 2, 1}, - {-1, -1, -1, 12, 7, 6, 4, 3}, {-1, -1, -1, -1, 8, 7, 5, 4}, - {-1, -1, -1, -1, -1, 12, 10, 9}, {-1, -1, -1, -1, -1, -1, 11, 10}, - {-1, -1, -1, -1, -1, -1, -1, 12}}; + const int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0}, + {-1, -1, 11, 10, 5, 4, 2, 1}, + {-1, -1, -1, 12, 7, 6, 4, 3}, + {-1, -1, -1, -1, 8, 7, 5, 4}, + {-1, -1, -1, -1, -1, 12, 10, 9}, + {-1, -1, -1, -1, -1, -1, 11, 10}, + {-1, -1, -1, -1, -1, -1, -1, 12}}; const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; @@ -707,8 +709,7 @@ void scheduler_reset(struct scheduler *s, int size) { if (s->tasks_ind != NULL) free(s->tasks_ind); /* Allocate the new lists. */ - if ((s->tasks = (struct task *)malloc(sizeof(struct task) * size)) == - NULL || + if ((s->tasks = (struct task *)malloc(sizeof(struct task) *size)) == NULL || (s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL) error("Failed to allocate task lists."); } @@ -975,8 +976,14 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (t->subtype == task_subtype_tend) { + t->buff = malloc(sizeof(int) * t->ci->pcell_size); + err = MPI_Irecv(t->buff, t->ci->pcell_size, MPI_INT, t->ci->nodeID, + t->flags, MPI_COMM_WORLD, &t->req); + } else { + err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit irecv for particle data."); } @@ -990,8 +997,15 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (t->subtype == task_subtype_tend) { + t->buff = malloc(sizeof(int) * t->ci->pcell_size); + cell_pack_ti_ends(t->ci, t->buff); + err = MPI_Isend(t->buff, t->ci->pcell_size, MPI_INT, t->cj->nodeID, + t->flags, MPI_COMM_WORLD, &t->req); + } else { + err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit isend for particle data."); } @@ -1215,7 +1229,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, /* Init the unlocks. */ if ((s->unlocks = (struct task **)malloc( - sizeof(struct task *) * scheduler_init_nr_unlocks)) == NULL || + sizeof(struct task *) *scheduler_init_nr_unlocks)) == NULL || (s->unlock_ind = (int *)malloc(sizeof(int) * scheduler_init_nr_unlocks)) == NULL) error("Failed to allocate unlocks."); diff --git a/src/task.h b/src/task.h index 84146fa7ca..8dea503e42 100644 --- a/src/task.h +++ b/src/task.h @@ -82,6 +82,8 @@ struct task { struct cell *ci, *cj; + void *buff; + #ifdef WITH_MPI MPI_Request req; #endif -- GitLab