Commit 1a631e1d authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'master' into parallel_scheduler

Conflicts:
	src/runner.c
	src/space.c
	src/task.c
	src/task.h
parents de72d25a 0689a0cd
/******************************************************************************* /*******************************************************************************
* This file is part of SWIFT. * This file is part of SWIFT.
* Copyright (c) 2013 Matthieu Schaller (matthieu.schaller@durham.ac.uk), * Copyright (c) 2013- 2015:
* Pedro Gonnet (pedro.gonnet@durham.ac.uk). * Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk),
* Peter W. Draper (p.w.draper@durham.ac.uk).
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published * it under the terms of the GNU Lesser General Public License as published
...@@ -20,8 +22,10 @@ ...@@ -20,8 +22,10 @@
#include <stdio.h> #include <stdio.h>
#include "config.h"
#include "const.h" #include "const.h"
#include "part.h" #include "part.h"
#include "debug.h"
/** /**
* @brief Looks for the particle with the given id and prints its information to * @brief Looks for the particle with the given id and prints its information to
...@@ -98,3 +102,140 @@ void printParticle_single(struct part *p) { ...@@ -98,3 +102,140 @@ void printParticle_single(struct part *p) {
p->rho_dh, p->density.div_v, p->u, p->force.u_dt, p->force.balsara, p->rho_dh, p->density.div_v, p->u, p->force.u_dt, p->force.balsara,
p->force.POrho2, p->force.v_sig, p->dt); p->force.POrho2, p->force.v_sig, p->dt);
} }
#ifdef HAVE_METIS
/**
* @brief Dump the METIS graph in standard format, simple format and weights
* only, to a file.
*
* @description The standard format output can be read into the METIS
* command-line tools. The simple format is just the cell connectivity (this
* should not change between calls). The weights format is the standard one,
* minus the cell connectivity.
*
* The output filenames are generated from the prefix and the sequence number
* of calls. So the first is called <prefix>_std_001.dat, <prefix>_simple_001.dat,
* <prefix>_weights_001.dat, etc.
*
* @param prefix base output filename
* @param nvertices the number of vertices
* @param nvertexweights the number vertex weights
* @param cellconruns first part of cell connectivity info (CSR)
* @param cellcon second part of cell connectivity info (CSR)
* @param vertexweights weights of vertices
* @param vertexsizes size of vertices
* @param edgeweights weights of edges
*/
void dumpMETISGraph(const char *prefix, idx_t nvertices, idx_t nvertexweights,
idx_t *cellconruns, idx_t *cellcon, idx_t *vertexweights,
idx_t *vertexsizes, idx_t *edgeweights) {
FILE *stdfile = NULL;
FILE *simplefile = NULL;
FILE *weightfile = NULL;
char fname[200];
idx_t i;
idx_t j;
int haveedgeweight = 0;
int havevertexsize = 0;
int havevertexweight = 0;
static int nseq = 0;
nseq++;
if (vertexweights != NULL) {
for (i = 0; i < nvertices * nvertexweights; i++) {
if (vertexweights[i] != 1) {
havevertexweight = 1;
break;
}
}
}
if (vertexsizes != NULL) {
for (i = 0; i < nvertices; i++) {
if (vertexsizes[i] != 1) {
havevertexsize = 1;
break;
}
}
}
if (edgeweights != NULL) {
for (i = 0; i < cellconruns[nvertices]; i++) {
if (edgeweights[i] != 1) {
haveedgeweight = 1;
break;
}
}
}
/* Open output files. */
sprintf(fname, "%s_std_%03d.dat", prefix, nseq);
stdfile = fopen( fname, "w" );
sprintf(fname, "%s_simple_%03d.dat", prefix, nseq);
simplefile = fopen( fname, "w" );
if (havevertexweight || havevertexsize || haveedgeweight) {
sprintf(fname, "%s_weights_%03d.dat", prefix, nseq);
weightfile = fopen( fname, "w" );
}
/* Write the header lines. */
fprintf(stdfile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
fprintf(simplefile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
if (havevertexweight || havevertexsize || haveedgeweight) {
fprintf(weightfile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
fprintf(stdfile, " %d%d%d", havevertexsize, havevertexweight, haveedgeweight);
fprintf(weightfile, " %d%d%d", havevertexsize, havevertexweight, haveedgeweight);
if (havevertexweight) {
fprintf(stdfile, " %d", (int)nvertexweights);
fprintf(weightfile, " %d", (int)nvertexweights);
}
}
/* Write the rest of the graph. */
for (i = 0; i < nvertices; i++) {
fprintf(stdfile, "\n");
fprintf(simplefile, "\n");
if (weightfile != NULL) {
fprintf(weightfile, "\n");
}
if (havevertexsize) {
fprintf(stdfile, " %" PRIDX, vertexsizes[i]);
fprintf(weightfile, " %" PRIDX, vertexsizes[i]);
}
if (havevertexweight) {
for (j = 0; j < nvertexweights; j++) {
fprintf(stdfile, " %" PRIDX, vertexweights[i * nvertexweights + j]);
fprintf(weightfile, " %" PRIDX, vertexweights[i * nvertexweights + j]);
}
}
for (j = cellconruns[i]; j < cellconruns[i + 1]; j++) {
fprintf(stdfile, " %" PRIDX, cellcon[j] + 1);
fprintf(simplefile, " %" PRIDX, cellcon[j] + 1);
if (haveedgeweight) {
fprintf(stdfile, " %" PRIDX, edgeweights[j]);
fprintf(weightfile, " %" PRIDX, edgeweights[j]);
}
}
}
fprintf(stdfile, "\n");
fprintf(simplefile, "\n");
if (weightfile != NULL) {
fprintf(weightfile, "\n");
}
fclose(stdfile);
fclose(simplefile);
if (weightfile != NULL) {
fclose(weightfile);
}
}
#endif
...@@ -27,4 +27,11 @@ void printParticle(struct part *parts, long long int i, int N); ...@@ -27,4 +27,11 @@ void printParticle(struct part *parts, long long int i, int N);
void printgParticle(struct gpart *parts, long long int i, int N); void printgParticle(struct gpart *parts, long long int i, int N);
void printParticle_single(struct part *p); void printParticle_single(struct part *p);
#ifdef HAVE_METIS
#include "metis.h"
void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon,
idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize,
idx_t *adjwgt);
#endif
#endif /* SWIFT_DEBUG_H */ #endif /* SWIFT_DEBUG_H */
...@@ -169,6 +169,9 @@ void engine_redistribute(struct engine *e) { ...@@ -169,6 +169,9 @@ void engine_redistribute(struct engine *e) {
} }
const int cid = cell_getid(cdim, parts[k].x[0] * ih[0], const int cid = cell_getid(cdim, parts[k].x[0] * ih[0],
parts[k].x[1] * ih[1], parts[k].x[2] * ih[2]); parts[k].x[1] * ih[1], parts[k].x[2] * ih[2]);
/* if (cid < 0 || cid >= s->nr_cells)
error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].",
cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */
dest[k] = cells[cid].nodeID; dest[k] = cells[cid].nodeID;
counts[nodeID * nr_nodes + dest[k]] += 1; counts[nodeID * nr_nodes + dest[k]] += 1;
} }
...@@ -303,7 +306,8 @@ void engine_repartition(struct engine *e) { ...@@ -303,7 +306,8 @@ void engine_repartition(struct engine *e) {
int nr_nodes = e->nr_nodes, nodeID = e->nodeID; int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
float wscale = 1e-3, vscale = 1e-3, wscale_buff; float wscale = 1e-3, vscale = 1e-3, wscale_buff;
idx_t wtot = 0; idx_t wtot = 0;
const idx_t wmax = 1e9 / e->nr_nodes; idx_t wmax = 1e9 / e->nr_nodes;
idx_t wmin;
/* Clear the repartition flag. */ /* Clear the repartition flag. */
e->forcerepart = 0; e->forcerepart = 0;
...@@ -486,6 +490,24 @@ void engine_repartition(struct engine *e) { ...@@ -486,6 +490,24 @@ void engine_repartition(struct engine *e) {
/* As of here, only one node needs to compute the partition. */ /* As of here, only one node needs to compute the partition. */
if (nodeID == 0) { if (nodeID == 0) {
/* Final rescale of all weights to avoid a large range. Large ranges have
* been seen to cause an incomplete graph. */
wmin = wmax;
wmax = 0.0;
for (k = 0; k < 26 * nr_cells; k++) {
wmax = weights_e[k] > wmax ? weights_e[k] : wmax;
wmin = weights_e[k] < wmin ? weights_e[k] : wmin;
}
if ((wmax - wmin) > engine_maxmetisweight) {
wscale = engine_maxmetisweight / (wmax - wmin);
for (k = 0; k < 26 * nr_cells; k++) {
weights_e[k] = (weights_e[k] - wmin) * wscale + 1;
}
for (k = 0; k < nr_cells; k++) {
weights_v[k] = (weights_v[k] - wmin) * wscale + 1;
}
}
/* Check that the edge weights are fully symmetric. */ /* Check that the edge weights are fully symmetric. */
/* for ( cid = 0 ; cid < nr_cells ; cid++ ) /* for ( cid = 0 ; cid < nr_cells ; cid++ )
for ( k = 0 ; k < 26 ; k++ ) { for ( k = 0 ; k < 26 ; k++ ) {
...@@ -544,16 +566,47 @@ void engine_repartition(struct engine *e) { ...@@ -544,16 +566,47 @@ void engine_repartition(struct engine *e) {
/* Call METIS. */ /* Call METIS. */
idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes; idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes;
idx_t objval; idx_t objval;
/* Dump graph in METIS format */
/*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
weights_v, NULL, weights_e);*/
if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v, if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v,
NULL, weights_e, &idx_nr_nodes, NULL, NULL, NULL, weights_e, &idx_nr_nodes, NULL, NULL,
options, &objval, nodeIDs) != METIS_OK) options, &objval, nodeIDs) != METIS_OK)
error("Call to METIS_PartGraphKway failed."); error("Call to METIS_PartGraphRecursive failed.");
/* Dump the 3d array of cell IDs. */ /* Dump the 3d array of cell IDs. */
/* printf( "engine_repartition: nodeIDs = reshape( [" ); /* printf( "engine_repartition: nodeIDs = reshape( [" );
for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ ) for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
printf( "%i " , (int)nodeIDs[ i ] ); printf( "%i " , (int)nodeIDs[ i ] );
printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */ printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
/* Check that the nodeIDs are ok. */
for (k = 0; k < nr_cells; k++)
if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes)
error("Got bad nodeID %"PRIDX" for cell %i.", nodeIDs[k], k);
/* Check that the partition is complete and all nodes have some work. */
int present[nr_nodes];
int failed = 0;
for (i = 0; i < nr_nodes; i++) present[i] = 0;
for (i = 0; i < nr_cells; i++) present[nodeIDs[i]]++;
for (i = 0; i < nr_nodes; i++) {
if (! present[i]) {
failed = 1;
message("Node %d is not present after repartition", i);
}
}
/* If partition failed continue with the current one, but make this
* clear. */
if (failed) {
message("WARNING: METIS repartition has failed, continuing with "
"the current partition, load balance will not be optimal");
for (k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
}
} }
/* Broadcast the result of the partition. */ /* Broadcast the result of the partition. */
...@@ -2168,7 +2221,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, ...@@ -2168,7 +2221,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
s->nr_queues = nr_queues; s->nr_queues = nr_queues;
/* Append a kick1 task to each cell. */ /* Append a kick1 task to each cell. */
scheduler_reset(&e->sched, s->tot_cells + e->nr_threads); scheduler_reset(&e->sched, 2 * s->tot_cells + e->nr_threads);
for (k = 0; k < s->nr_cells; k++) for (k = 0; k < s->nr_cells; k++)
s->cells[k].kick1 = s->cells[k].kick1 =
scheduler_addtask(&e->sched, task_type_kick1, task_subtype_none, 0, 0, scheduler_addtask(&e->sched, task_type_kick1, task_subtype_none, 0, 0,
......
...@@ -47,6 +47,8 @@ ...@@ -47,6 +47,8 @@
#define engine_maxproxies 64 #define engine_maxproxies 64
#define engine_tasksreweight 10 #define engine_tasksreweight 10
#define engine_maxmetisweight 10000.0f
/* The rank of the engine as a global variable (for messages). */ /* The rank of the engine as a global variable (for messages). */
extern int engine_rank; extern int engine_rank;
......
...@@ -1291,6 +1291,9 @@ void *runner_main(void *data) { ...@@ -1291,6 +1291,9 @@ void *runner_main(void *data) {
case task_type_psort: case task_type_psort:
space_do_parts_sort(); space_do_parts_sort();
break; break;
case task_type_split_cell:
space_split(e->s, t->ci);
break;
case task_type_rewait: case task_type_rewait:
for (struct task *t2 = (struct task *)t->ci; for (struct task *t2 = (struct task *)t->ci;
t2 != (struct task *)t->cj; t2++) { t2 != (struct task *)t->cj; t2++) {
......
...@@ -130,6 +130,9 @@ void scheduler_splittasks(struct scheduler *s) { ...@@ -130,6 +130,9 @@ void scheduler_splittasks(struct scheduler *s) {
break; break;
} }
/* Skip sorting tasks. */
if (t->type == task_type_psort) continue;
/* Empty task? */ /* Empty task? */
if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) { if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) {
t->type = task_type_none; t->type = task_type_none;
...@@ -1069,7 +1072,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { ...@@ -1069,7 +1072,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
} }
} }
/* Task definitely done. */ /* Task definitely done, signal any sleeping runners. */
if (!t->implicit) { if (!t->implicit) {
t->toc = getticks(); t->toc = getticks();
pthread_mutex_lock(&s->sleep_mutex); pthread_mutex_lock(&s->sleep_mutex);
......
...@@ -466,7 +466,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { ...@@ -466,7 +466,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
/* At this point, we have the upper-level cells, old or new. Now make /* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */ sure that the parts in each cell are ok. */
// tic = getticks(); // tic = getticks();
for (k = 0; k < s->nr_cells; k++) space_split(s, &cells[k]); // for (k = 0; k < s->nr_cells; k++) space_split(s, &cells[k]);
for (k = 0; k < s->nr_cells; k++)
scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none,
k, 0, &cells[k], NULL, 0);
engine_launch(s->e, s->e->nr_threads, 1 << task_type_split_cell);
// message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS // message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
// * 1000 ); // * 1000 );
...@@ -476,6 +480,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { ...@@ -476,6 +480,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
* @brief Sort the particles and condensed particles according to the given * @brief Sort the particles and condensed particles according to the given
*indices. *indices.
* *
* @param s The #space.
* @param ind The indices with respect to which the parts are sorted. * @param ind The indices with respect to which the parts are sorted.
* @param N The number of parts * @param N The number of parts
* @param min Lowest index. * @param min Lowest index.
...@@ -483,11 +488,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { ...@@ -483,11 +488,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
*/ */
void space_parts_sort(struct space *s, int *ind, int N, int min, int max) { void space_parts_sort(struct space *s, int *ind, int N, int min, int max) {
// Populate a parallel_sort structure with the input data. // Populate the global parallel_sort structure with the input data.
space_sort_struct.parts = s->parts; space_sort_struct.parts = s->parts;
space_sort_struct.xparts = s->xparts; space_sort_struct.xparts = s->xparts;
space_sort_struct.ind = ind; space_sort_struct.ind = ind;
space_sort_struct.stack_size = 2 * (max - min) + 10; space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
if ((space_sort_struct.stack = malloc(sizeof(struct qstack) * if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
space_sort_struct.stack_size)) == NULL) space_sort_struct.stack_size)) == NULL)
error("Failed to allocate sorting stack."); error("Failed to allocate sorting stack.");
...@@ -510,8 +515,9 @@ void space_parts_sort(struct space *s, int *ind, int N, int min, int max) { ...@@ -510,8 +515,9 @@ void space_parts_sort(struct space *s, int *ind, int N, int min, int max) {
/* Verify space_sort_struct. */ /* Verify space_sort_struct. */
/* for (int i = 1; i < N; i++) /* for (int i = 1; i < N; i++)
if (ind[i - 1] > ind[i]) if (ind[i - 1] > ind[i])
error("Sorting failed (ind[%i]=%i,ind[%i]=%i).", i - 1, ind[i - 1], i, error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i,
ind[i]); */ ind[i], min, max);
message("Sorting succeeded."); */
// Clean up. // Clean up.
free(space_sort_struct.stack); free(space_sort_struct.stack);
...@@ -525,15 +531,17 @@ void space_do_parts_sort() { ...@@ -525,15 +531,17 @@ void space_do_parts_sort() {
struct xpart *xparts = space_sort_struct.xparts; struct xpart *xparts = space_sort_struct.xparts;
/* Main loop. */ /* Main loop. */
while (space_sort_struct.waiting > 0) { while (space_sort_struct.waiting) {
/* Grab an interval off the queue. */ /* Grab an interval off the queue. */
int qid = int qid =
atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size; atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
/* Get the stack entry. */ /* Wait for the entry to be ready, or for the sorting do be done. */
while (!space_sort_struct.stack[qid].ready) while (!space_sort_struct.stack[qid].ready)
if (!space_sort_struct.waiting) return; if (!space_sort_struct.waiting) return;
/* Get the stack entry. */
int i = space_sort_struct.stack[qid].i; int i = space_sort_struct.stack[qid].i;
int j = space_sort_struct.stack[qid].j; int j = space_sort_struct.stack[qid].j;
int min = space_sort_struct.stack[qid].min; int min = space_sort_struct.stack[qid].min;
...@@ -545,6 +553,8 @@ void space_do_parts_sort() { ...@@ -545,6 +553,8 @@ void space_do_parts_sort() {
/* Bring beer. */ /* Bring beer. */
const int pivot = (min + max) / 2; const int pivot = (min + max) / 2;
/* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
i, j, min, max, pivot); */
/* One pass of QuickSort's partitioning. */ /* One pass of QuickSort's partitioning. */
int ii = i; int ii = i;
...@@ -566,18 +576,18 @@ void space_do_parts_sort() { ...@@ -566,18 +576,18 @@ void space_do_parts_sort() {
} }
/* Verify space_sort_struct. */ /* Verify space_sort_struct. */
/* for ( int k = i ; k <= jj ; k++ ) /* for (int k = i; k <= jj; k++)
if ( ind[k] > pivot ) { if (ind[k] > pivot) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
N=%i." , k , ind[k] , pivot , i , j , N ); ind[k], pivot, i, j);
error( "Partition failed (<=pivot)." ); error("Partition failed (<=pivot).");
} }
for ( int k = jj+1 ; k <= j ; k++ ) for (int k = jj + 1; k <= j; k++)
if ( ind[k] <= pivot ) { if (ind[k] <= pivot) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
N=%i." , k , ind[k] , pivot , i , j , N ); ind[k], pivot, i, j);
error( "Partition failed (>pivot)." ); error("Partition failed (>pivot).");
} */ } */
/* Split-off largest interval. */ /* Split-off largest interval. */
if (jj - i > j - jj + 1) { if (jj - i > j - jj + 1) {
...@@ -586,14 +596,15 @@ void space_do_parts_sort() { ...@@ -586,14 +596,15 @@ void space_do_parts_sort() {
if (jj > i && pivot > min) { if (jj > i && pivot > min) {
qid = atomic_inc(&space_sort_struct.last) % qid = atomic_inc(&space_sort_struct.last) %
space_sort_struct.stack_size; space_sort_struct.stack_size;
while (space_sort_struct.stack[qid].ready);
space_sort_struct.stack[qid].i = i; space_sort_struct.stack[qid].i = i;
space_sort_struct.stack[qid].j = jj; space_sort_struct.stack[qid].j = jj;
space_sort_struct.stack[qid].min = min; space_sort_struct.stack[qid].min = min;
space_sort_struct.stack[qid].max = pivot; space_sort_struct.stack[qid].max = pivot;
space_sort_struct.stack[qid].ready = 1;
if (atomic_inc(&space_sort_struct.waiting) >= if (atomic_inc(&space_sort_struct.waiting) >=
space_sort_struct.stack_size) space_sort_struct.stack_size)
error("Qstack overflow."); error("Qstack overflow.");
space_sort_struct.stack[qid].ready = 1;
} }
/* Recurse on the right? */ /* Recurse on the right? */
...@@ -606,17 +617,18 @@ void space_do_parts_sort() { ...@@ -606,17 +617,18 @@ void space_do_parts_sort() {
} else { } else {
/* Recurse on the right? */ /* Recurse on the right? */
if (jj + 1 < j && pivot + 1 < max) { if (pivot + 1 < max) {
qid = atomic_inc(&space_sort_struct.last) % qid = atomic_inc(&space_sort_struct.last) %
space_sort_struct.stack_size; space_sort_struct.stack_size;
while (space_sort_struct.stack[qid].ready);
space_sort_struct.stack[qid].i = jj + 1; space_sort_struct.stack[qid].i = jj + 1;
space_sort_struct.stack[qid].j = j; space_sort_struct.stack[qid].j = j;
space_sort_struct.stack[qid].min = pivot + 1; space_sort_struct.stack[qid].min = pivot + 1;
space_sort_struct.stack[qid].max = max; space_sort_struct.stack[qid].max = max;
space_sort_struct.stack[qid].ready = 1;
if (atomic_inc(&space_sort_struct.waiting) >= if (atomic_inc(&space_sort_struct.waiting) >=
space_sort_struct.stack_size) space_sort_struct.stack_size)
error("Qstack overflow."); error("Qstack overflow.");
space_sort_struct.stack[qid].ready = 1;
} }
/* Recurse on the left? */ /* Recurse on the left? */
......
...@@ -46,7 +46,7 @@ const char *taskID_names[task_type_count] = { ...@@ -46,7 +46,7 @@ const char *taskID_names[task_type_count] = {
"none", "sort", "self", "pair", "sub", "none", "sort", "self", "pair", "sub",
"ghost", "kick1", "kick2", "send", "recv", "ghost", "kick1", "kick2", "send", "recv",
"link", "grav_pp", "grav_mm", "grav_up", "grav_down", "link", "grav_pp", "grav_mm", "grav_up", "grav_down",
"psort", "rewait"}; "psort", "split_cell", "rewait"};
/** /**
* @brief Unlock the cell held by this task. * @brief Unlock the cell held by this task.
......
...@@ -45,6 +45,7 @@ enum task_types { ...@@ -45,6 +45,7 @@ enum task_types {
task_type_grav_up, task_type_grav_up,
task_type_grav_down, task_type_grav_down,
task_type_psort, task_type_psort,
task_type_split_cell,
task_type_rewait, task_type_rewait,
task_type_count task_type_count
}; };
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment