diff --git a/src/engine.c b/src/engine.c index 1ce6aeec95a57e93416a9b9796954fc2ece2d5de..e4d77df941a70a592ff7e68d50780bf341910939 100644 --- a/src/engine.c +++ b/src/engine.c @@ -754,9 +754,12 @@ void engine_maketasks(struct engine *e) { scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell); /* Add the space sorting tasks. */ - for (int i = 0; i < e->nr_threads; i++) + for (int i = 0; i < e->nr_threads; i++) { scheduler_addtask(sched, task_type_part_sort, task_subtype_none, i, 0, NULL, NULL, 0); + scheduler_addtask(sched, task_type_gpart_sort, task_subtype_none, i, 0, + NULL, NULL, 0); + } /* Run through the highest level of cells and add pairs. */ for (int i = 0; i < cdim[0]; i++) @@ -2049,9 +2052,13 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, s->nr_queues = nr_queues; /* Create the sorting tasks. */ - for (int i = 0; i < e->nr_threads; i++) - scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0, NULL, - NULL, 0); + for (int i = 0; i < e->nr_threads; i++) { + scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0, + NULL, NULL, 0); + + scheduler_addtask(&e->sched, task_type_gpart_sort, task_subtype_none, i, 0, + NULL, NULL, 0); + } scheduler_ranktasks(&e->sched); diff --git a/src/runner.c b/src/runner.c index 7591091b2b54ae5ce7fbea5be4e1462f783e24ef..5a7f84c040011cf669be3b81405c88b6057750f3 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1060,6 +1060,9 @@ void *runner_main(void *data) { case task_type_part_sort: space_do_parts_sort(); break; + case task_type_gpart_sort: + space_do_gparts_sort(); + break; case task_type_split_cell: space_do_split(e->s, t->ci); break; diff --git a/src/scheduler.c b/src/scheduler.c index d8002e8da69e3dac0c1637cbb33b78190ba17398..58cfcb7aec7ffe994e396393e4d72b2196c8fff0 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -129,6 +129,8 @@ void scheduler_splittasks(struct scheduler *s) { /* Skip sorting tasks. */ if (t->type == task_type_part_sort) continue; + if (t->type == task_type_gpart_sort) continue; + /* Empty task? */ if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) { t->type = task_type_none; diff --git a/src/space.c b/src/space.c index 941bbc8f48f520b809fd1ca7026e88e690c6f88a..9a255e38ca9afb9c93833d03bbb8c088bdbefb7b 100644 --- a/src/space.c +++ b/src/space.c @@ -487,7 +487,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { #endif /* Sort the parts according to their cells. */ - space_gparts_sort(s->gparts, gind, nr_gparts, 0, s->nr_cells - 1); + space_parts_sort(s, ind, nr_gparts, 0, s->nr_cells - 1, verbose); /* Re-link the parts. */ for (int k = 0; k < nr_gparts; k++) @@ -554,7 +554,6 @@ void space_split(struct space *s, struct cell *cells, int verbose) { * @param max highest index. * @param verbose Are we talkative ? */ - void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, int verbose) { @@ -725,103 +724,140 @@ void space_do_parts_sort() { } /* main loop. */ } -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, - int max) { - - struct qstack { - volatile size_t i, j; - volatile int min, max; - volatile int ready; - }; - struct qstack *qstack; - int qstack_size = 2 * (max - min) + 10; - volatile unsigned int first, last, waiting; - - int pivot; - ptrdiff_t i, ii, j, jj, temp_i; - int qid; - struct gpart temp_p; - - /* for ( int k = 0 ; k < N ; k++ ) - if ( ind[k] > max || ind[k] < min ) - error( "ind[%i]=%i is not in [%i,%i]." , k , ind[k] , min , max ); */ - - /* Allocate the stack. */ - if ((qstack = malloc(sizeof(struct qstack) * qstack_size)) == NULL) - error("Failed to allocate qstack."); - - /* Init the interval stack. */ - qstack[0].i = 0; - qstack[0].j = N - 1; - qstack[0].min = min; - qstack[0].max = max; - qstack[0].ready = 1; - for (i = 1; i < qstack_size; i++) qstack[i].ready = 0; - first = 0; - last = 1; - waiting = 1; +/** + * @brief Sort the g-particles and condensed particles according to the given + *indices. + * + * @param s The #space. + * @param ind The indices with respect to which the parts are sorted. + * @param N The number of parts + * @param min Lowest index. + * @param max highest index. + * @param verbose Are we talkative ? + */ +void space_gparts_sort(struct space *s, size_t *ind, size_t N, int min, int max, + int verbose) { + + ticks tic = getticks(); + + /*Populate the global parallel_sort structure with the input data */ + space_sort_struct.gparts = s->gparts; + space_sort_struct.ind = ind; + space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; + if ((space_sort_struct.stack = malloc(sizeof(struct qstack) * + space_sort_struct.stack_size)) == NULL) + error("Failed to allocate sorting stack."); + for (int i = 0; i < space_sort_struct.stack_size; i++) + space_sort_struct.stack[i].ready = 0; + + /* Add the first interval. */ + space_sort_struct.stack[0].i = 0; + space_sort_struct.stack[0].j = N - 1; + space_sort_struct.stack[0].min = min; + space_sort_struct.stack[0].max = max; + space_sort_struct.stack[0].ready = 1; + space_sort_struct.first = 0; + space_sort_struct.last = 1; + space_sort_struct.waiting = 1; + + /* Launch the sorting tasks. */ + engine_launch(s->e, s->e->nr_threads, (1 << task_type_gpart_sort), 0); + + /* Verify space_sort_struct. */ + /* for (int i = 1; i < N; i++) + if (ind[i - 1] > ind[i]) + error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, + ind[i - 1], i, + ind[i], min, max); + message("Sorting succeeded."); */ + + /* Clean up. */ + free(space_sort_struct.stack); + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +void space_do_gparts_sort() { + + /* Pointers to the sorting data. */ + size_t *ind = space_sort_struct.ind; + struct gpart *gparts = space_sort_struct.gparts; /* Main loop. */ - while (waiting > 0) { + while (space_sort_struct.waiting) { /* Grab an interval off the queue. */ - qid = (first++) % qstack_size; + int qid = + atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size; + + /* Wait for the entry to be ready, or for the sorting do be done. */ + while (!space_sort_struct.stack[qid].ready) + if (!space_sort_struct.waiting) return; /* Get the stack entry. */ - i = qstack[qid].i; - j = qstack[qid].j; - min = qstack[qid].min; - max = qstack[qid].max; - qstack[qid].ready = 0; + ptrdiff_t i = space_sort_struct.stack[qid].i; + ptrdiff_t j = space_sort_struct.stack[qid].j; + int min = space_sort_struct.stack[qid].min; + int max = space_sort_struct.stack[qid].max; + space_sort_struct.stack[qid].ready = 0; /* Loop over sub-intervals. */ while (1) { /* Bring beer. */ - pivot = (min + max) / 2; + const int pivot = (min + max) / 2; + /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.", + i, j, min, max, pivot); */ /* One pass of QuickSort's partitioning. */ - ii = i; - jj = j; + ptrdiff_t ii = i; + ptrdiff_t jj = j; while (ii < jj) { while (ii <= j && ind[ii] <= pivot) ii++; while (jj >= i && ind[jj] > pivot) jj--; if (ii < jj) { - temp_i = ind[ii]; + size_t temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i; - temp_p = gparts[ii]; + struct gpart temp_p = gparts[ii]; gparts[ii] = gparts[jj]; gparts[jj] = temp_p; } } /* Verify space_sort_struct. */ - /* for ( int k = i ; k <= jj ; k++ ) - if ( ind[k] > pivot ) { - message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, - N=%i." , k , ind[k] , pivot , i , j , N ); - error( "Partition failed (<=pivot)." ); - } - for ( int k = jj+1 ; k <= j ; k++ ) - if ( ind[k] <= pivot ) { - message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, - N=%i." , k , ind[k] , pivot , i , j , N ); - error( "Partition failed (>pivot)." ); - } */ + /* for (int k = i; k <= jj; k++) + if (ind[k] > pivot) { + message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k, + ind[k], pivot, i, j); + error("Partition failed (<=pivot)."); + } + for (int k = jj + 1; k <= j; k++) + if (ind[k] <= pivot) { + message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k, + ind[k], pivot, i, j); + error("Partition failed (>pivot)."); + } */ /* Split-off largest interval. */ if (jj - i > j - jj + 1) { /* Recurse on the left? */ if (jj > i && pivot > min) { - qid = (last++) % qstack_size; - qstack[qid].i = i; - qstack[qid].j = jj; - qstack[qid].min = min; - qstack[qid].max = pivot; - qstack[qid].ready = 1; - if ((waiting++) >= qstack_size) error("Qstack overflow."); + qid = atomic_inc(&space_sort_struct.last) % + space_sort_struct.stack_size; + while (space_sort_struct.stack[qid].ready) + ; + space_sort_struct.stack[qid].i = i; + space_sort_struct.stack[qid].j = jj; + space_sort_struct.stack[qid].min = min; + space_sort_struct.stack[qid].max = pivot; + if (atomic_inc(&space_sort_struct.waiting) >= + space_sort_struct.stack_size) + error("Qstack overflow."); + space_sort_struct.stack[qid].ready = 1; } /* Recurse on the right? */ @@ -835,13 +871,18 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, /* Recurse on the right? */ if (pivot + 1 < max) { - qid = (last++) % qstack_size; - qstack[qid].i = jj + 1; - qstack[qid].j = j; - qstack[qid].min = pivot + 1; - qstack[qid].max = max; - qstack[qid].ready = 1; - if ((waiting++) >= qstack_size) error("Qstack overflow."); + qid = atomic_inc(&space_sort_struct.last) % + space_sort_struct.stack_size; + while (space_sort_struct.stack[qid].ready) + ; + space_sort_struct.stack[qid].i = jj + 1; + space_sort_struct.stack[qid].j = j; + space_sort_struct.stack[qid].min = pivot + 1; + space_sort_struct.stack[qid].max = max; + if (atomic_inc(&space_sort_struct.waiting) >= + space_sort_struct.stack_size) + error("Qstack overflow."); + space_sort_struct.stack[qid].ready = 1; } /* Recurse on the left? */ @@ -854,18 +895,9 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, } /* loop over sub-intervals. */ - waiting--; + atomic_dec(&space_sort_struct.waiting); } /* main loop. */ - - /* Verify space_sort_struct. */ - /* for ( i = 1 ; i < N ; i++ ) - if ( ind[i-1] > ind[i] ) - error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i - , ind[i] ); */ - - /* Clean up. */ - free(qstack); } /** diff --git a/src/space.h b/src/space.h index 91485ff7e2ebe9da8ab927748589ae9f71320803..db9463e03084fa52dc94ae58aae31e668faee547 100644 --- a/src/space.h +++ b/src/space.h @@ -116,6 +116,7 @@ struct qstack { }; struct parallel_sort { struct part *parts; + struct gpart *gparts; struct xpart *xparts; size_t *ind; struct qstack *stack; @@ -127,8 +128,8 @@ extern struct parallel_sort space_sort_struct; /* function prototypes. */ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, int verbose); -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, - int max); +void space_gparts_sort(struct space *s, size_t *ind, size_t N, int min, int max, + int verbose); struct cell *space_getcell(struct space *s); int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift); @@ -150,5 +151,6 @@ void space_recycle(struct space *s, struct cell *c); void space_split(struct space *s, struct cell *cells, int verbose); void space_do_split(struct space *s, struct cell *c); void space_do_parts_sort(); +void space_do_gparts_sort(); void space_link_cleanup(struct space *s); #endif /* SWIFT_SPACE_H */ diff --git a/src/task.c b/src/task.c index 6e9a715760c9a32ede0191bdb0595c39b995439d..91c202ad96b14bb9417f7b52f8e8d8b9c83496a8 100644 --- a/src/task.c +++ b/src/task.c @@ -43,10 +43,10 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", "pair", "sub", "init", - "ghost", "drift", "kick", "send", "recv", "grav_pp", - "grav_mm", "grav_up", "grav_down", "part_sort", "gpart_sort", - "split_cell", "rewait"}; + "none", "sort", "self", "pair", "sub", + "init", "ghost", "drift", "kick", "send", + "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", + "part_sort", "gpart_sort", "split_cell", "rewait"}; const char *subtaskID_names[task_type_count] = {"none", "density", "force", "grav"}; @@ -79,9 +79,10 @@ float task_overlap(const struct task *ta, const struct task *tb) { /* First check if any of the two tasks are of a type that don't use cells. */ if (ta == NULL || tb == NULL || ta->type == task_type_none || - ta->type == task_type_part_sort || ta->type == task_type_split_cell || - ta->type == task_type_rewait || tb->type == task_type_none || - tb->type == task_type_part_sort || tb->type == task_type_split_cell || + ta->type == task_type_part_sort || ta->type == task_type_gpart_sort || + ta->type == task_type_split_cell || ta->type == task_type_rewait || + tb->type == task_type_none || tb->type == task_type_part_sort || + tb->type == task_type_gpart_sort || tb->type == task_type_split_cell || tb->type == task_type_rewait) return 0.0f;