Commit 7222e223 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

merge with master.

parents 3ce7876b 9f1fd7ec
......@@ -172,7 +172,7 @@ void engine_redistribute(struct engine *e) {
dest[k] = cells[cid].nodeID;
counts[nodeID * nr_nodes + dest[k]] += 1;
}
parts_sort(s->parts, s->xparts, dest, s->nr_parts, 0, nr_nodes - 1);
space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1);
/* Get all the counts from all the nodes. */
if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
......@@ -994,6 +994,11 @@ void engine_maketasks(struct engine *e) {
/* Re-set the scheduler. */
scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell);
/* Add the space sorting tasks. */
for (i = 0; i < e->nr_threads; i++)
scheduler_addtask(sched, task_type_psort, task_subtype_none, i, 0, NULL,
NULL, 0);
/* Run through the highest level of cells and add pairs. */
for (i = 0; i < cdim[0]; i++)
for (j = 0; j < cdim[1]; j++)
......@@ -1710,15 +1715,6 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask) {
error("Error while waiting for barrier.");
}
void hassorted(struct cell *c) {
if (c->sorted) error("Spurious sorted flags.");
if (c->split)
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL) hassorted(c->progeny[k]);
}
/**
* @brief Let the #engine loose to compute the forces.
*
......@@ -2172,13 +2168,18 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
s->nr_queues = nr_queues;
/* Append a kick1 task to each cell. */
scheduler_reset(&e->sched, s->tot_cells);
scheduler_reset(&e->sched, s->tot_cells + e->nr_threads);
for (k = 0; k < s->nr_cells; k++)
s->cells[k].kick1 =
scheduler_addtask(&e->sched, task_type_kick1, task_subtype_none, 0, 0,
&s->cells[k], NULL, 0);
scheduler_ranktasks(&e->sched);
/* Create the sorting tasks. */
for (i = 0; i < e->nr_threads; i++)
scheduler_addtask(&e->sched, task_type_psort, task_subtype_none, i, 0, NULL,
NULL, 0);
/* Allocate and init the threads. */
if ((e->runners =
(struct runner *)malloc(sizeof(struct runner) * nr_threads)) == NULL)
......
......@@ -129,6 +129,7 @@ struct engine {
void engine_barrier(struct engine *e, int tid);
void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
int nr_queues, int nr_nodes, int nodeID, int policy);
void engine_launch(struct engine *e, int nr_runners, unsigned int mask);
void engine_prepare(struct engine *e);
void engine_step(struct engine *e);
void engine_maketasks(struct engine *e);
......
......@@ -1220,8 +1220,6 @@ void *runner_main(void *data) {
super = ci->super;
else if (cj != NULL && cj->super != NULL && cj->super->owner == r->qid)
super = cj->super;
/* else
super = NULL; */
}
/* Different types of tasks... */
......@@ -1290,7 +1288,10 @@ void *runner_main(void *data) {
case task_type_grav_down:
runner_dograv_down(r, t->ci);
break;
case task_type_rewait:
case task_type_psort:
space_do_parts_sort();
break;
case task_type_rewait:
for (struct task *t2 = (struct task *)t->ci;
t2 != (struct task *)t->cj; t2++) {
for (k = 0; k < t2->nr_unlock_tasks; k++)
......
......@@ -43,6 +43,9 @@
#include "lock.h"
#include "runner.h"
/* Shared sort structure. */
struct parallel_sort space_sort_struct;
/* Split size. */
int space_splitsize = space_splitsize_default;
int space_subsize = space_subsize_default;
......@@ -389,7 +392,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
/* Sort the parts according to their cells. */
// tic = getticks();
parts_sort(s->parts, s->xparts, ind, nr_parts, 0, s->nr_cells - 1);
space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1);
// message( "parts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
// * 1000 );
......@@ -397,7 +400,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
for (k = 0; k < nr_parts; k++)
if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k];
/* Verify sort. */
/* Verify space_sort_struct. */
/* for ( k = 1 ; k < nr_parts ; k++ ) {
if ( ind[k-1] > ind[k] ) {
error( "Sort failed!" );
......@@ -473,88 +476,97 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
* @brief Sort the particles and condensed particles according to the given
*indices.
*
* @param parts The list of #part
* @param xparts The list of reduced particles
* @param ind The indices with respect to which the parts are sorted.
* @param N The number of parts
* @param min Lowest index.
* @param max highest index.
*/
void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
int min, int max) {
struct qstack {
volatile int i, j, min, max;
volatile int ready;
};
struct qstack *qstack;
unsigned int qstack_size = 2 * (max - min) + 10;
volatile unsigned int first, last, waiting;
int pivot;
int i, ii, j, jj, temp_i, qid;
struct part temp_p;
struct xpart temp_xp;
/* for ( int k = 0 ; k < N ; k++ )
if ( ind[k] > max || ind[k] < min )
error( "ind[%i]=%i is not in [%i,%i]." , k , ind[k] , min , max ); */
void space_parts_sort(struct space *s, int *ind, int N, int min, int max) {
// Populate a parallel_sort structure with the input data.
struct parallel_sort sort;
space_sort_struct.parts = s->parts;
space_sort_struct.xparts = s->xparts;
space_sort_struct.ind = ind;
space_sort_struct.stack_size = 2 * (max - min) + 10;
if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
space_sort_struct.stack_size)) == NULL)
error("Failed to allocate sorting stack.");
for (int i = 0; i < space_sort_struct.stack_size; i++)
space_sort_struct.stack[i].ready = 0;
// Add the first interval.
space_sort_struct.stack[0].i = 0;
space_sort_struct.stack[0].j = N - 1;
space_sort_struct.stack[0].min = min;
space_sort_struct.stack[0].max = max;
space_sort_struct.stack[0].ready = 1;
space_sort_struct.first = 0;
space_sort_struct.last = 1;
space_sort_struct.waiting = 1;
// Launch the sorting tasks.
engine_launch(s->e, s->e->nr_threads, (1 << task_type_psort));
/* Verify space_sort_struct. */
/* for (int i = 1; i < N; i++)
if (ind[i - 1] > ind[i])
error("Sorting failed (ind[%i]=%i,ind[%i]=%i).", i - 1, ind[i - 1], i,
ind[i]); */
// Clean up.
free(space_sort_struct.stack);
}
/* Allocate the stack. */
if ((qstack = malloc(sizeof(struct qstack) * qstack_size)) == NULL)
error("Failed to allocate qstack.");
void space_do_parts_sort() {
/* Init the interval stack. */
qstack[0].i = 0;
qstack[0].j = N - 1;
qstack[0].min = min;
qstack[0].max = max;
qstack[0].ready = 1;
for (i = 1; i < qstack_size; i++) qstack[i].ready = 0;
first = 0;
last = 1;
waiting = 1;
/* Pointers to the sorting data. */
int *ind = space_sort_struct.ind;
struct part *parts = space_sort_struct.parts;
struct xpart *xparts = space_sort_struct.xparts;
/* Main loop. */
while (waiting > 0) {
while (space_sort_struct.waiting > 0) {
/* Grab an interval off the queue. */
qid = (first++) % qstack_size;
int qid =
atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
/* Get the stack entry. */
i = qstack[qid].i;
j = qstack[qid].j;
min = qstack[qid].min;
max = qstack[qid].max;
qstack[qid].ready = 0;
while (!space_sort_struct.stack[qid].ready)
if (!space_sort_struct.waiting) return;
int i = space_sort_struct.stack[qid].i;
int j = space_sort_struct.stack[qid].j;
int min = space_sort_struct.stack[qid].min;
int max = space_sort_struct.stack[qid].max;
space_sort_struct.stack[qid].ready = 0;
/* Loop over sub-intervals. */
while (1) {
/* Bring beer. */
pivot = (min + max) / 2;
const int pivot = (min + max) / 2;
/* One pass of QuickSort's partitioning. */
ii = i;
jj = j;
int ii = i;
int jj = j;
while (ii < jj) {
while (ii <= j && ind[ii] <= pivot) ii++;
while (jj >= i && ind[jj] > pivot) jj--;
if (ii < jj) {
temp_i = ind[ii];
int temp_i = ind[ii];
ind[ii] = ind[jj];
ind[jj] = temp_i;
temp_p = parts[ii];
struct part temp_p = parts[ii];
parts[ii] = parts[jj];
parts[jj] = temp_p;
temp_xp = xparts[ii];
struct xpart temp_xp = xparts[ii];
xparts[ii] = xparts[jj];
xparts[jj] = temp_xp;
}
}
/* Verify sort. */
/* Verify space_sort_struct. */
/* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
......@@ -573,13 +585,16 @@ void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
/* Recurse on the left? */
if (jj > i && pivot > min) {
qid = (last++) % qstack_size;
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
qstack[qid].ready = 1;
if (waiting++ >= qstack_size) error("Qstack overflow.");
qid = atomic_inc(&space_sort_struct.last) %
space_sort_struct.stack_size;
space_sort_struct.stack[qid].i = i;
space_sort_struct.stack[qid].j = jj;
space_sort_struct.stack[qid].min = min;
space_sort_struct.stack[qid].max = pivot;
space_sort_struct.stack[qid].ready = 1;
if (atomic_inc(&space_sort_struct.waiting) >=
space_sort_struct.stack_size)
error("Qstack overflow.");
}
/* Recurse on the right? */
......@@ -592,14 +607,17 @@ void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
} else {
/* Recurse on the right? */
if (pivot + 1 < max) {
qid = (last++) % qstack_size;
qstack[qid].i = jj + 1;
qstack[qid].j = j;
qstack[qid].min = pivot + 1;
qstack[qid].max = max;
qstack[qid].ready = 1;
if ((waiting++) >= qstack_size) error("Qstack overflow.");
if (jj + 1 < j && pivot + 1 < max) {
qid = atomic_inc(&space_sort_struct.last) %
space_sort_struct.stack_size;
space_sort_struct.stack[qid].i = jj + 1;
space_sort_struct.stack[qid].j = j;
space_sort_struct.stack[qid].min = pivot + 1;
space_sort_struct.stack[qid].max = max;
space_sort_struct.stack[qid].ready = 1;
if (atomic_inc(&space_sort_struct.waiting) >=
space_sort_struct.stack_size)
error("Qstack overflow.");
}
/* Recurse on the left? */
......@@ -612,18 +630,9 @@ void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
} /* loop over sub-intervals. */
waiting--;
atomic_dec(&space_sort_struct.waiting);
} /* main loop. */
/* Verify sort. */
/* for ( i = 1 ; i < N ; i++ )
if ( ind[i-1] > ind[i] )
error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i
, ind[i] ); */
/* Clean up. */
free(qstack);
}
void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) {
......@@ -694,7 +703,7 @@ void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) {
}
}
/* Verify sort. */
/* Verify space_sort_struct. */
/* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
......@@ -756,7 +765,7 @@ void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) {
} /* main loop. */
/* Verify sort. */
/* Verify space_sort_struct. */
/* for ( i = 1 ; i < N ; i++ )
if ( ind[i-1] > ind[i] )
error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i
......
......@@ -111,9 +111,23 @@ struct space {
int nr_parts_foreign, size_parts_foreign;
};
/* Interval stack necessary for parallel particle sorting. */
struct qstack {
volatile int i, j, min, max;
volatile int ready;
};
struct parallel_sort {
struct part *parts;
struct xpart *xparts;
int *ind;
struct qstack *stack;
unsigned int stack_size;
volatile unsigned int first, last, waiting;
};
extern struct parallel_sort space_sort_struct;
/* function prototypes. */
void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N,
int min, int max);
void space_parts_sort(struct space *s, int *ind, int N, int min, int max);
void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max);
struct cell *space_getcell(struct space *s);
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
......@@ -130,5 +144,6 @@ void space_map_cells_post(struct space *s, int full,
void space_rebuild(struct space *s, double h_max, int verbose);
void space_recycle(struct space *s, struct cell *c);
void space_split(struct space *s, struct cell *c);
void space_do_parts_sort();
#endif /* SWIFT_SPACE_H */
......@@ -46,7 +46,7 @@ const char *taskID_names[task_type_count] = {
"none", "sort", "self", "pair", "sub",
"ghost", "kick1", "kick2", "send", "recv",
"link", "grav_pp", "grav_mm", "grav_up", "grav_down",
"rewait"};
"psort", "rewait"};
/**
* @brief Unlock the cell held by this task.
......@@ -252,4 +252,4 @@ void task_addunlock_old(struct task *ta, struct task *tb) {
ta->nr_unlock_tasks += 1;
lock_unlock_blind(&ta->lock);
}
\ No newline at end of file
}
......@@ -44,6 +44,7 @@ enum task_types {
task_type_grav_mm,
task_type_grav_up,
task_type_grav_down,
task_type_psort,
task_type_rewait,
task_type_count
};
......@@ -92,4 +93,4 @@ void task_addunlock(struct task *ta, struct task *tb);
void task_unlock(struct task *t);
int task_lock(struct task *t);
#endif /* SWIFT_TASK_H */
\ No newline at end of file
#endif /* SWIFT_TASK_H */
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment