Commit e7d42bcd authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Parallel version of g-particle sorting

parent eb8ebccc
......@@ -754,9 +754,12 @@ void engine_maketasks(struct engine *e) {
scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell);
/* Add the space sorting tasks. */
for (int i = 0; i < e->nr_threads; i++)
for (int i = 0; i < e->nr_threads; i++) {
scheduler_addtask(sched, task_type_part_sort, task_subtype_none, i, 0, NULL,
NULL, 0);
scheduler_addtask(sched, task_type_gpart_sort, task_subtype_none, i, 0,
NULL, NULL, 0);
}
/* Run through the highest level of cells and add pairs. */
for (int i = 0; i < cdim[0]; i++)
......@@ -2049,9 +2052,13 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
s->nr_queues = nr_queues;
/* Create the sorting tasks. */
for (int i = 0; i < e->nr_threads; i++)
scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0, NULL,
NULL, 0);
for (int i = 0; i < e->nr_threads; i++) {
scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0,
NULL, NULL, 0);
scheduler_addtask(&e->sched, task_type_gpart_sort, task_subtype_none, i, 0,
NULL, NULL, 0);
}
scheduler_ranktasks(&e->sched);
......
......@@ -1060,6 +1060,9 @@ void *runner_main(void *data) {
case task_type_part_sort:
space_do_parts_sort();
break;
case task_type_gpart_sort:
space_do_gparts_sort();
break;
case task_type_split_cell:
space_do_split(e->s, t->ci);
break;
......
......@@ -129,6 +129,8 @@ void scheduler_splittasks(struct scheduler *s) {
/* Skip sorting tasks. */
if (t->type == task_type_part_sort) continue;
if (t->type == task_type_gpart_sort) continue;
/* Empty task? */
if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) {
t->type = task_type_none;
......
......@@ -487,7 +487,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
#endif
/* Sort the parts according to their cells. */
space_gparts_sort(s->gparts, gind, nr_gparts, 0, s->nr_cells - 1);
space_parts_sort(s, ind, nr_gparts, 0, s->nr_cells - 1, verbose);
/* Re-link the parts. */
for (int k = 0; k < nr_gparts; k++)
......@@ -554,7 +554,6 @@ void space_split(struct space *s, struct cell *cells, int verbose) {
* @param max highest index.
* @param verbose Are we talkative ?
*/
void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
int verbose) {
......@@ -725,103 +724,140 @@ void space_do_parts_sort() {
} /* main loop. */
}
void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
int max) {
struct qstack {
volatile size_t i, j;
volatile int min, max;
volatile int ready;
};
struct qstack *qstack;
int qstack_size = 2 * (max - min) + 10;
volatile unsigned int first, last, waiting;
int pivot;
ptrdiff_t i, ii, j, jj, temp_i;
int qid;
struct gpart temp_p;
/* for ( int k = 0 ; k < N ; k++ )
if ( ind[k] > max || ind[k] < min )
error( "ind[%i]=%i is not in [%i,%i]." , k , ind[k] , min , max ); */
/* Allocate the stack. */
if ((qstack = malloc(sizeof(struct qstack) * qstack_size)) == NULL)
error("Failed to allocate qstack.");
/* Init the interval stack. */
qstack[0].i = 0;
qstack[0].j = N - 1;
qstack[0].min = min;
qstack[0].max = max;
qstack[0].ready = 1;
for (i = 1; i < qstack_size; i++) qstack[i].ready = 0;
first = 0;
last = 1;
waiting = 1;
/**
* @brief Sort the g-particles and condensed particles according to the given
*indices.
*
* @param s The #space.
* @param ind The indices with respect to which the parts are sorted.
* @param N The number of parts
* @param min Lowest index.
* @param max highest index.
* @param verbose Are we talkative ?
*/
void space_gparts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
int verbose) {
ticks tic = getticks();
/*Populate the global parallel_sort structure with the input data */
space_sort_struct.gparts = s->gparts;
space_sort_struct.ind = ind;
space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
space_sort_struct.stack_size)) == NULL)
error("Failed to allocate sorting stack.");
for (int i = 0; i < space_sort_struct.stack_size; i++)
space_sort_struct.stack[i].ready = 0;
/* Add the first interval. */
space_sort_struct.stack[0].i = 0;
space_sort_struct.stack[0].j = N - 1;
space_sort_struct.stack[0].min = min;
space_sort_struct.stack[0].max = max;
space_sort_struct.stack[0].ready = 1;
space_sort_struct.first = 0;
space_sort_struct.last = 1;
space_sort_struct.waiting = 1;
/* Launch the sorting tasks. */
engine_launch(s->e, s->e->nr_threads, (1 << task_type_gpart_sort), 0);
/* Verify space_sort_struct. */
/* for (int i = 1; i < N; i++)
if (ind[i - 1] > ind[i])
error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
ind[i - 1], i,
ind[i], min, max);
message("Sorting succeeded."); */
/* Clean up. */
free(space_sort_struct.stack);
if (verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
void space_do_gparts_sort() {
/* Pointers to the sorting data. */
size_t *ind = space_sort_struct.ind;
struct gpart *gparts = space_sort_struct.gparts;
/* Main loop. */
while (waiting > 0) {
while (space_sort_struct.waiting) {
/* Grab an interval off the queue. */
qid = (first++) % qstack_size;
int qid =
atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
/* Wait for the entry to be ready, or for the sorting do be done. */
while (!space_sort_struct.stack[qid].ready)
if (!space_sort_struct.waiting) return;
/* Get the stack entry. */
i = qstack[qid].i;
j = qstack[qid].j;
min = qstack[qid].min;
max = qstack[qid].max;
qstack[qid].ready = 0;
ptrdiff_t i = space_sort_struct.stack[qid].i;
ptrdiff_t j = space_sort_struct.stack[qid].j;
int min = space_sort_struct.stack[qid].min;
int max = space_sort_struct.stack[qid].max;
space_sort_struct.stack[qid].ready = 0;
/* Loop over sub-intervals. */
while (1) {
/* Bring beer. */
pivot = (min + max) / 2;
const int pivot = (min + max) / 2;
/* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
i, j, min, max, pivot); */
/* One pass of QuickSort's partitioning. */
ii = i;
jj = j;
ptrdiff_t ii = i;
ptrdiff_t jj = j;
while (ii < jj) {
while (ii <= j && ind[ii] <= pivot) ii++;
while (jj >= i && ind[jj] > pivot) jj--;
if (ii < jj) {
temp_i = ind[ii];
size_t temp_i = ind[ii];
ind[ii] = ind[jj];
ind[jj] = temp_i;
temp_p = gparts[ii];
struct gpart temp_p = gparts[ii];
gparts[ii] = gparts[jj];
gparts[jj] = temp_p;
}
}
/* Verify space_sort_struct. */
/* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (<=pivot)." );
}
for ( int k = jj+1 ; k <= j ; k++ )
if ( ind[k] <= pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (>pivot)." );
} */
/* for (int k = i; k <= jj; k++)
if (ind[k] > pivot) {
message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
ind[k], pivot, i, j);
error("Partition failed (<=pivot).");
}
for (int k = jj + 1; k <= j; k++)
if (ind[k] <= pivot) {
message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
ind[k], pivot, i, j);
error("Partition failed (>pivot).");
} */
/* Split-off largest interval. */
if (jj - i > j - jj + 1) {
/* Recurse on the left? */
if (jj > i && pivot > min) {
qid = (last++) % qstack_size;
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
qstack[qid].ready = 1;
if ((waiting++) >= qstack_size) error("Qstack overflow.");
qid = atomic_inc(&space_sort_struct.last) %
space_sort_struct.stack_size;
while (space_sort_struct.stack[qid].ready)
;
space_sort_struct.stack[qid].i = i;
space_sort_struct.stack[qid].j = jj;
space_sort_struct.stack[qid].min = min;
space_sort_struct.stack[qid].max = pivot;
if (atomic_inc(&space_sort_struct.waiting) >=
space_sort_struct.stack_size)
error("Qstack overflow.");
space_sort_struct.stack[qid].ready = 1;
}
/* Recurse on the right? */
......@@ -835,13 +871,18 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
/* Recurse on the right? */
if (pivot + 1 < max) {
qid = (last++) % qstack_size;
qstack[qid].i = jj + 1;
qstack[qid].j = j;
qstack[qid].min = pivot + 1;
qstack[qid].max = max;
qstack[qid].ready = 1;
if ((waiting++) >= qstack_size) error("Qstack overflow.");
qid = atomic_inc(&space_sort_struct.last) %
space_sort_struct.stack_size;
while (space_sort_struct.stack[qid].ready)
;
space_sort_struct.stack[qid].i = jj + 1;
space_sort_struct.stack[qid].j = j;
space_sort_struct.stack[qid].min = pivot + 1;
space_sort_struct.stack[qid].max = max;
if (atomic_inc(&space_sort_struct.waiting) >=
space_sort_struct.stack_size)
error("Qstack overflow.");
space_sort_struct.stack[qid].ready = 1;
}
/* Recurse on the left? */
......@@ -854,18 +895,9 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
} /* loop over sub-intervals. */
waiting--;
atomic_dec(&space_sort_struct.waiting);
} /* main loop. */
/* Verify space_sort_struct. */
/* for ( i = 1 ; i < N ; i++ )
if ( ind[i-1] > ind[i] )
error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i
, ind[i] ); */
/* Clean up. */
free(qstack);
}
/**
......
......@@ -116,6 +116,7 @@ struct qstack {
};
struct parallel_sort {
struct part *parts;
struct gpart *gparts;
struct xpart *xparts;
size_t *ind;
struct qstack *stack;
......@@ -127,8 +128,8 @@ extern struct parallel_sort space_sort_struct;
/* function prototypes. */
void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
int verbose);
void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min,
int max);
void space_gparts_sort(struct space *s, size_t *ind, size_t N, int min, int max,
int verbose);
struct cell *space_getcell(struct space *s);
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
double *shift);
......@@ -150,5 +151,6 @@ void space_recycle(struct space *s, struct cell *c);
void space_split(struct space *s, struct cell *cells, int verbose);
void space_do_split(struct space *s, struct cell *c);
void space_do_parts_sort();
void space_do_gparts_sort();
void space_link_cleanup(struct space *s);
#endif /* SWIFT_SPACE_H */
......@@ -43,10 +43,10 @@
/* Task type names. */
const char *taskID_names[task_type_count] = {
"none", "sort", "self", "pair", "sub", "init",
"ghost", "drift", "kick", "send", "recv", "grav_pp",
"grav_mm", "grav_up", "grav_down", "part_sort", "gpart_sort",
"split_cell", "rewait"};
"none", "sort", "self", "pair", "sub",
"init", "ghost", "drift", "kick", "send",
"recv", "grav_pp", "grav_mm", "grav_up", "grav_down",
"part_sort", "gpart_sort", "split_cell", "rewait"};
const char *subtaskID_names[task_type_count] = {"none", "density",
"force", "grav"};
......@@ -79,9 +79,10 @@ float task_overlap(const struct task *ta, const struct task *tb) {
/* First check if any of the two tasks are of a type that don't
use cells. */
if (ta == NULL || tb == NULL || ta->type == task_type_none ||
ta->type == task_type_part_sort || ta->type == task_type_split_cell ||
ta->type == task_type_rewait || tb->type == task_type_none ||
tb->type == task_type_part_sort || tb->type == task_type_split_cell ||
ta->type == task_type_part_sort || ta->type == task_type_gpart_sort ||
ta->type == task_type_split_cell || ta->type == task_type_rewait ||
tb->type == task_type_none || tb->type == task_type_part_sort ||
tb->type == task_type_gpart_sort || tb->type == task_type_split_cell ||
tb->type == task_type_rewait)
return 0.0f;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment