diff --git a/src/engine.c b/src/engine.c index 41103b06253eed6de3046d5efc82de2a3c242290..91e69aaba5ece90dd36f41895fae4fc88cb8d327 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1420,14 +1420,6 @@ void engine_maketasks(struct engine *e) { /* Re-set the scheduler. */ scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell); - /* Add the space sorting tasks. */ - for (int i = 0; i < e->nr_threads; i++) { - scheduler_addtask(sched, task_type_part_sort, task_subtype_none, i, 0, NULL, - NULL, 0); - scheduler_addtask(sched, task_type_gpart_sort, task_subtype_none, i, 0, - NULL, NULL, 0); - } - /* Construct the firt hydro loop over neighbours */ engine_make_hydroloop_tasks(e); @@ -2676,15 +2668,6 @@ void engine_init(struct engine *e, struct space *s, scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, scheduler_flag_steal, e->nodeID, &e->threadpool); - /* Create the sorting tasks. */ - for (int i = 0; i < e->nr_threads; i++) { - scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0, - NULL, NULL, 0); - - scheduler_addtask(&e->sched, task_type_gpart_sort, task_subtype_none, i, 0, - NULL, NULL, 0); - } - scheduler_ranktasks(&e->sched); /* Allocate and init the threads. */ diff --git a/src/runner.c b/src/runner.c index fe86e65b879ba77300d1e140246d6e0b77c08893..7a2bcd8563c62250f44183aef3f4b1457623633e 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1395,12 +1395,6 @@ void *runner_main(void *data) { case task_type_grav_external: runner_do_grav_external(r, t->ci, 1); break; - case task_type_part_sort: - space_do_parts_sort(); - break; - case task_type_gpart_sort: - space_do_gparts_sort(); - break; case task_type_comm_root: break; default: diff --git a/src/scheduler.c b/src/scheduler.c index 2b61c4ea73ae403b70d96e294e82fc2b3486575d..ca455d91ad58e3b83c88885e78d8402fa9f3691f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -123,11 +123,6 @@ void scheduler_splittasks(struct scheduler *s) { break; } - /* Skip sorting tasks. */ - if (t->type == task_type_part_sort) continue; - - if (t->type == task_type_gpart_sort) continue; - /* Empty task? */ if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) { t->type = task_type_none; diff --git a/src/space.c b/src/space.c index 75c66d7a7476aa88315deab82e9bb18669315b67..a36906f6818c6bab0dbef717626fe537289f1715 100644 --- a/src/space.c +++ b/src/space.c @@ -50,9 +50,6 @@ #include "threadpool.h" #include "tools.h" -/* Shared sort structure. */ -struct parallel_sort space_sort_struct; - /* Split size. */ int space_splitsize = space_splitsize_default; int space_subsize = space_subsize_default; @@ -548,7 +545,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { for (size_t k = 0; k < nr_parts; k++) if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; - /* Verify space_sort_struct. */ + /* Verify sort_struct. */ /* for ( k = 1 ; k < nr_parts ; k++ ) { if ( ind[k-1] > ind[k] ) { error( "Sort failed!" ); @@ -681,31 +678,34 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, const ticks tic = getticks(); - /*Populate the global parallel_sort structure with the input data */ - space_sort_struct.parts = s->parts; - space_sort_struct.xparts = s->xparts; - space_sort_struct.ind = ind; - space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; - if ((space_sort_struct.stack = malloc(sizeof(struct qstack) * - space_sort_struct.stack_size)) == NULL) + /* Populate a parallel_sort structure with the input data */ + struct parallel_sort sort_struct; + sort_struct.parts = s->parts; + sort_struct.xparts = s->xparts; + sort_struct.ind = ind; + sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; + if ((sort_struct.stack = malloc(sizeof(struct qstack) * + sort_struct.stack_size)) == NULL) error("Failed to allocate sorting stack."); - for (int i = 0; i < space_sort_struct.stack_size; i++) - space_sort_struct.stack[i].ready = 0; + for (int i = 0; i < sort_struct.stack_size; i++) + sort_struct.stack[i].ready = 0; /* Add the first interval. */ - space_sort_struct.stack[0].i = 0; - space_sort_struct.stack[0].j = N - 1; - space_sort_struct.stack[0].min = min; - space_sort_struct.stack[0].max = max; - space_sort_struct.stack[0].ready = 1; - space_sort_struct.first = 0; - space_sort_struct.last = 1; - space_sort_struct.waiting = 1; - - /* Launch the sorting tasks. */ - engine_launch(s->e, s->e->nr_threads, (1 << task_type_part_sort), 0); - - /* Verify space_sort_struct. */ + sort_struct.stack[0].i = 0; + sort_struct.stack[0].j = N - 1; + sort_struct.stack[0].min = min; + sort_struct.stack[0].max = max; + sort_struct.stack[0].ready = 1; + sort_struct.first = 0; + sort_struct.last = 1; + sort_struct.waiting = 1; + + /* Launch the sorting tasks with a stride of zero such that the same + map data is passed to each thread. */ + threadpool_map(&s->e->threadpool, space_parts_sort_mapper, + &sort_struct, s->e->threadpool.num_threads, 0, NULL); + + /* Verify sort_struct. */ /* for (int i = 1; i < N; i++) if (ind[i - 1] > ind[i]) error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, @@ -714,37 +714,40 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, message("Sorting succeeded."); */ /* Clean up. */ - free(space_sort_struct.stack); + free(sort_struct.stack); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); } -void space_do_parts_sort() { +void space_parts_sort_mapper(void *map_data, void *extra_data) { + + /* Unpack the mapping data. */ + struct parallel_sort *sort_struct = (struct parallel_sort *)map_data; /* Pointers to the sorting data. */ - int *ind = space_sort_struct.ind; - struct part *parts = space_sort_struct.parts; - struct xpart *xparts = space_sort_struct.xparts; + int *ind = sort_struct->ind; + struct part *parts = sort_struct->parts; + struct xpart *xparts = sort_struct->xparts; /* Main loop. */ - while (space_sort_struct.waiting) { + while (sort_struct->waiting) { /* Grab an interval off the queue. */ int qid = - atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size; + atomic_inc(&sort_struct->first) % sort_struct->stack_size; /* Wait for the entry to be ready, or for the sorting do be done. */ - while (!space_sort_struct.stack[qid].ready) - if (!space_sort_struct.waiting) return; + while (!sort_struct->stack[qid].ready) + if (!sort_struct->waiting) return; /* Get the stack entry. */ - ptrdiff_t i = space_sort_struct.stack[qid].i; - ptrdiff_t j = space_sort_struct.stack[qid].j; - int min = space_sort_struct.stack[qid].min; - int max = space_sort_struct.stack[qid].max; - space_sort_struct.stack[qid].ready = 0; + ptrdiff_t i = sort_struct->stack[qid].i; + ptrdiff_t j = sort_struct->stack[qid].j; + int min = sort_struct->stack[qid].min; + int max = sort_struct->stack[qid].max; + sort_struct->stack[qid].ready = 0; /* Loop over sub-intervals. */ while (1) { @@ -773,7 +776,7 @@ void space_do_parts_sort() { } } - /* Verify space_sort_struct. */ + /* Verify sort_struct. */ /* for (int k = i; k <= jj; k++) if (ind[k] > pivot) { message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k, @@ -792,18 +795,18 @@ void space_do_parts_sort() { /* Recurse on the left? */ if (jj > i && pivot > min) { - qid = atomic_inc(&space_sort_struct.last) % - space_sort_struct.stack_size; - while (space_sort_struct.stack[qid].ready) + qid = atomic_inc(&sort_struct->last) % + sort_struct->stack_size; + while (sort_struct->stack[qid].ready) ; - space_sort_struct.stack[qid].i = i; - space_sort_struct.stack[qid].j = jj; - space_sort_struct.stack[qid].min = min; - space_sort_struct.stack[qid].max = pivot; - if (atomic_inc(&space_sort_struct.waiting) >= - space_sort_struct.stack_size) + sort_struct->stack[qid].i = i; + sort_struct->stack[qid].j = jj; + sort_struct->stack[qid].min = min; + sort_struct->stack[qid].max = pivot; + if (atomic_inc(&sort_struct->waiting) >= + sort_struct->stack_size) error("Qstack overflow."); - space_sort_struct.stack[qid].ready = 1; + sort_struct->stack[qid].ready = 1; } /* Recurse on the right? */ @@ -817,18 +820,18 @@ void space_do_parts_sort() { /* Recurse on the right? */ if (pivot + 1 < max) { - qid = atomic_inc(&space_sort_struct.last) % - space_sort_struct.stack_size; - while (space_sort_struct.stack[qid].ready) + qid = atomic_inc(&sort_struct->last) % + sort_struct->stack_size; + while (sort_struct->stack[qid].ready) ; - space_sort_struct.stack[qid].i = jj + 1; - space_sort_struct.stack[qid].j = j; - space_sort_struct.stack[qid].min = pivot + 1; - space_sort_struct.stack[qid].max = max; - if (atomic_inc(&space_sort_struct.waiting) >= - space_sort_struct.stack_size) + sort_struct->stack[qid].i = jj + 1; + sort_struct->stack[qid].j = j; + sort_struct->stack[qid].min = pivot + 1; + sort_struct->stack[qid].max = max; + if (atomic_inc(&sort_struct->waiting) >= + sort_struct->stack_size) error("Qstack overflow."); - space_sort_struct.stack[qid].ready = 1; + sort_struct->stack[qid].ready = 1; } /* Recurse on the left? */ @@ -841,7 +844,7 @@ void space_do_parts_sort() { } /* loop over sub-intervals. */ - atomic_dec(&space_sort_struct.waiting); + atomic_dec(&sort_struct->waiting); } /* main loop. */ } @@ -862,30 +865,33 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, const ticks tic = getticks(); - /*Populate the global parallel_sort structure with the input data */ - space_sort_struct.gparts = s->gparts; - space_sort_struct.ind = ind; - space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; - if ((space_sort_struct.stack = malloc(sizeof(struct qstack) * - space_sort_struct.stack_size)) == NULL) + /*Populate a global parallel_sort structure with the input data */ + struct parallel_sort sort_struct; + sort_struct.gparts = s->gparts; + sort_struct.ind = ind; + sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; + if ((sort_struct.stack = malloc(sizeof(struct qstack) * + sort_struct.stack_size)) == NULL) error("Failed to allocate sorting stack."); - for (int i = 0; i < space_sort_struct.stack_size; i++) - space_sort_struct.stack[i].ready = 0; + for (int i = 0; i < sort_struct.stack_size; i++) + sort_struct.stack[i].ready = 0; /* Add the first interval. */ - space_sort_struct.stack[0].i = 0; - space_sort_struct.stack[0].j = N - 1; - space_sort_struct.stack[0].min = min; - space_sort_struct.stack[0].max = max; - space_sort_struct.stack[0].ready = 1; - space_sort_struct.first = 0; - space_sort_struct.last = 1; - space_sort_struct.waiting = 1; - - /* Launch the sorting tasks. */ - engine_launch(s->e, s->e->nr_threads, (1 << task_type_gpart_sort), 0); - - /* Verify space_sort_struct. */ + sort_struct.stack[0].i = 0; + sort_struct.stack[0].j = N - 1; + sort_struct.stack[0].min = min; + sort_struct.stack[0].max = max; + sort_struct.stack[0].ready = 1; + sort_struct.first = 0; + sort_struct.last = 1; + sort_struct.waiting = 1; + + /* Launch the sorting tasks with a stride of zero such that the same + map data is passed to each thread. */ + threadpool_map(&s->e->threadpool, space_gparts_sort_mapper, + &sort_struct, s->e->threadpool.num_threads, 0, NULL); + + /* Verify sort_struct. */ /* for (int i = 1; i < N; i++) if (ind[i - 1] > ind[i]) error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, @@ -894,36 +900,39 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, message("Sorting succeeded."); */ /* Clean up. */ - free(space_sort_struct.stack); + free(sort_struct.stack); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); } -void space_do_gparts_sort() { +void space_gparts_sort_mapper(void *map_data, void *extra_data) { + + /* Unpack the mapping data. */ + struct parallel_sort *sort_struct = (struct parallel_sort *)map_data; /* Pointers to the sorting data. */ - int *ind = space_sort_struct.ind; - struct gpart *gparts = space_sort_struct.gparts; + int *ind = sort_struct->ind; + struct gpart *gparts = sort_struct->gparts; /* Main loop. */ - while (space_sort_struct.waiting) { + while (sort_struct->waiting) { /* Grab an interval off the queue. */ int qid = - atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size; + atomic_inc(&sort_struct->first) % sort_struct->stack_size; /* Wait for the entry to be ready, or for the sorting do be done. */ - while (!space_sort_struct.stack[qid].ready) - if (!space_sort_struct.waiting) return; + while (!sort_struct->stack[qid].ready) + if (!sort_struct->waiting) return; /* Get the stack entry. */ - ptrdiff_t i = space_sort_struct.stack[qid].i; - ptrdiff_t j = space_sort_struct.stack[qid].j; - int min = space_sort_struct.stack[qid].min; - int max = space_sort_struct.stack[qid].max; - space_sort_struct.stack[qid].ready = 0; + ptrdiff_t i = sort_struct->stack[qid].i; + ptrdiff_t j = sort_struct->stack[qid].j; + int min = sort_struct->stack[qid].min; + int max = sort_struct->stack[qid].max; + sort_struct->stack[qid].ready = 0; /* Loop over sub-intervals. */ while (1) { @@ -949,7 +958,7 @@ void space_do_gparts_sort() { } } - /* Verify space_sort_struct. */ + /* Verify sort_struct. */ /* for (int k = i; k <= jj; k++) if (ind[k] > pivot) { message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k, @@ -968,18 +977,18 @@ void space_do_gparts_sort() { /* Recurse on the left? */ if (jj > i && pivot > min) { - qid = atomic_inc(&space_sort_struct.last) % - space_sort_struct.stack_size; - while (space_sort_struct.stack[qid].ready) + qid = atomic_inc(&sort_struct->last) % + sort_struct->stack_size; + while (sort_struct->stack[qid].ready) ; - space_sort_struct.stack[qid].i = i; - space_sort_struct.stack[qid].j = jj; - space_sort_struct.stack[qid].min = min; - space_sort_struct.stack[qid].max = pivot; - if (atomic_inc(&space_sort_struct.waiting) >= - space_sort_struct.stack_size) + sort_struct->stack[qid].i = i; + sort_struct->stack[qid].j = jj; + sort_struct->stack[qid].min = min; + sort_struct->stack[qid].max = pivot; + if (atomic_inc(&sort_struct->waiting) >= + sort_struct->stack_size) error("Qstack overflow."); - space_sort_struct.stack[qid].ready = 1; + sort_struct->stack[qid].ready = 1; } /* Recurse on the right? */ @@ -993,18 +1002,18 @@ void space_do_gparts_sort() { /* Recurse on the right? */ if (pivot + 1 < max) { - qid = atomic_inc(&space_sort_struct.last) % - space_sort_struct.stack_size; - while (space_sort_struct.stack[qid].ready) + qid = atomic_inc(&sort_struct->last) % + sort_struct->stack_size; + while (sort_struct->stack[qid].ready) ; - space_sort_struct.stack[qid].i = jj + 1; - space_sort_struct.stack[qid].j = j; - space_sort_struct.stack[qid].min = pivot + 1; - space_sort_struct.stack[qid].max = max; - if (atomic_inc(&space_sort_struct.waiting) >= - space_sort_struct.stack_size) + sort_struct->stack[qid].i = jj + 1; + sort_struct->stack[qid].j = j; + sort_struct->stack[qid].min = pivot + 1; + sort_struct->stack[qid].max = max; + if (atomic_inc(&sort_struct->waiting) >= + sort_struct->stack_size) error("Qstack overflow."); - space_sort_struct.stack[qid].ready = 1; + sort_struct->stack[qid].ready = 1; } /* Recurse on the left? */ @@ -1017,7 +1026,7 @@ void space_do_gparts_sort() { } /* loop over sub-intervals. */ - atomic_dec(&space_sort_struct.waiting); + atomic_dec(&sort_struct->waiting); } /* main loop. */ } diff --git a/src/space.h b/src/space.h index ff046562c720b092d89db6672c355672a275adf4..e70e77f15f0b1ea898fb328746b484a6e61f6738 100644 --- a/src/space.h +++ b/src/space.h @@ -127,7 +127,6 @@ struct parallel_sort { unsigned int stack_size; volatile unsigned int first, last, waiting; }; -extern struct parallel_sort space_sort_struct; /* function prototypes. */ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, @@ -151,6 +150,8 @@ void space_map_parts_xparts(struct space *s, struct cell *c)); void space_map_cells_post(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data); +void space_parts_sort_mapper(void *map_data, void *extra_data); +void space_gparts_sort_mapper(void *map_data, void *extra_data); void space_rebuild(struct space *s, double h_max, int verbose); void space_recycle(struct space *s, struct cell *c); void space_split(struct space *s, struct cell *cells, int verbose); diff --git a/src/task.c b/src/task.c index 964916ce5e0881fa9ad5afb25edac38ab06cf689..f7a04454b70688830f9577999c789cb3be7ee619 100644 --- a/src/task.c +++ b/src/task.c @@ -47,10 +47,9 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", "pair", "sub", - "init", "ghost", "drift", "kick", "send", - "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", - "grav_external", "part_sort", "gpart_sort", "comm_root "}; + "none", "sort", "self", "pair", "sub", "init", + "ghost", "drift", "kick", "send", "recv", "grav_pp", + "grav_mm", "grav_up", "grav_down", "grav_external", "comm_root "}; const char *subtaskID_names[task_type_count] = {"none", "density", "force", "grav"}; @@ -83,9 +82,7 @@ float task_overlap(const struct task *ta, const struct task *tb) { /* First check if any of the two tasks are of a type that don't use cells. */ if (ta == NULL || tb == NULL || ta->type == task_type_none || - ta->type == task_type_part_sort || ta->type == task_type_gpart_sort || - tb->type == task_type_none || tb->type == task_type_part_sort || - tb->type == task_type_gpart_sort) + tb->type == task_type_none) return 0.0f; /* Compute the union of the cell data. */ diff --git a/src/task.h b/src/task.h index 503d2e35e0afcc4598c5ca20487309504398c4c1..8193ce970fd67779aa6c8d62e0be5e0b5ea7cd83 100644 --- a/src/task.h +++ b/src/task.h @@ -49,8 +49,6 @@ enum task_types { task_type_grav_up, task_type_grav_down, task_type_grav_external, - task_type_part_sort, - task_type_gpart_sort, task_type_comm_root, task_type_count }; diff --git a/src/threadpool.h b/src/threadpool.h index 729543fd59bc5cf0a8110bd2b54ef78a0d7c7de1..6e97cf73083f04d26c7e53871f4059162190e5bd 100644 --- a/src/threadpool.h +++ b/src/threadpool.h @@ -45,7 +45,7 @@ struct threadpool { void *map_data, *map_extra_data; volatile size_t map_data_count, map_data_size, map_data_stride; volatile threadpool_map_function map_function; - + /* Counter for the number of threads that are done. */ volatile int num_threads_done; };