Commit 1a631e1d authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'master' into parallel_scheduler

Conflicts:
	src/runner.c
	src/space.c
	src/task.c
	src/task.h
parents de72d25a 0689a0cd
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2013 Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk).
* Copyright (c) 2013- 2015:
* Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk),
* Peter W. Draper (p.w.draper@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
......@@ -20,8 +22,10 @@
#include <stdio.h>
#include "config.h"
#include "const.h"
#include "part.h"
#include "debug.h"
/**
* @brief Looks for the particle with the given id and prints its information to
......@@ -98,3 +102,140 @@ void printParticle_single(struct part *p) {
p->rho_dh, p->density.div_v, p->u, p->force.u_dt, p->force.balsara,
p->force.POrho2, p->force.v_sig, p->dt);
}
#ifdef HAVE_METIS
/**
* @brief Dump the METIS graph in standard format, simple format and weights
* only, to a file.
*
* @description The standard format output can be read into the METIS
* command-line tools. The simple format is just the cell connectivity (this
* should not change between calls). The weights format is the standard one,
* minus the cell connectivity.
*
* The output filenames are generated from the prefix and the sequence number
* of calls. So the first is called <prefix>_std_001.dat, <prefix>_simple_001.dat,
* <prefix>_weights_001.dat, etc.
*
* @param prefix base output filename
* @param nvertices the number of vertices
* @param nvertexweights the number vertex weights
* @param cellconruns first part of cell connectivity info (CSR)
* @param cellcon second part of cell connectivity info (CSR)
* @param vertexweights weights of vertices
* @param vertexsizes size of vertices
* @param edgeweights weights of edges
*/
void dumpMETISGraph(const char *prefix, idx_t nvertices, idx_t nvertexweights,
idx_t *cellconruns, idx_t *cellcon, idx_t *vertexweights,
idx_t *vertexsizes, idx_t *edgeweights) {
FILE *stdfile = NULL;
FILE *simplefile = NULL;
FILE *weightfile = NULL;
char fname[200];
idx_t i;
idx_t j;
int haveedgeweight = 0;
int havevertexsize = 0;
int havevertexweight = 0;
static int nseq = 0;
nseq++;
if (vertexweights != NULL) {
for (i = 0; i < nvertices * nvertexweights; i++) {
if (vertexweights[i] != 1) {
havevertexweight = 1;
break;
}
}
}
if (vertexsizes != NULL) {
for (i = 0; i < nvertices; i++) {
if (vertexsizes[i] != 1) {
havevertexsize = 1;
break;
}
}
}
if (edgeweights != NULL) {
for (i = 0; i < cellconruns[nvertices]; i++) {
if (edgeweights[i] != 1) {
haveedgeweight = 1;
break;
}
}
}
/* Open output files. */
sprintf(fname, "%s_std_%03d.dat", prefix, nseq);
stdfile = fopen( fname, "w" );
sprintf(fname, "%s_simple_%03d.dat", prefix, nseq);
simplefile = fopen( fname, "w" );
if (havevertexweight || havevertexsize || haveedgeweight) {
sprintf(fname, "%s_weights_%03d.dat", prefix, nseq);
weightfile = fopen( fname, "w" );
}
/* Write the header lines. */
fprintf(stdfile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
fprintf(simplefile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
if (havevertexweight || havevertexsize || haveedgeweight) {
fprintf(weightfile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
fprintf(stdfile, " %d%d%d", havevertexsize, havevertexweight, haveedgeweight);
fprintf(weightfile, " %d%d%d", havevertexsize, havevertexweight, haveedgeweight);
if (havevertexweight) {
fprintf(stdfile, " %d", (int)nvertexweights);
fprintf(weightfile, " %d", (int)nvertexweights);
}
}
/* Write the rest of the graph. */
for (i = 0; i < nvertices; i++) {
fprintf(stdfile, "\n");
fprintf(simplefile, "\n");
if (weightfile != NULL) {
fprintf(weightfile, "\n");
}
if (havevertexsize) {
fprintf(stdfile, " %" PRIDX, vertexsizes[i]);
fprintf(weightfile, " %" PRIDX, vertexsizes[i]);
}
if (havevertexweight) {
for (j = 0; j < nvertexweights; j++) {
fprintf(stdfile, " %" PRIDX, vertexweights[i * nvertexweights + j]);
fprintf(weightfile, " %" PRIDX, vertexweights[i * nvertexweights + j]);
}
}
for (j = cellconruns[i]; j < cellconruns[i + 1]; j++) {
fprintf(stdfile, " %" PRIDX, cellcon[j] + 1);
fprintf(simplefile, " %" PRIDX, cellcon[j] + 1);
if (haveedgeweight) {
fprintf(stdfile, " %" PRIDX, edgeweights[j]);
fprintf(weightfile, " %" PRIDX, edgeweights[j]);
}
}
}
fprintf(stdfile, "\n");
fprintf(simplefile, "\n");
if (weightfile != NULL) {
fprintf(weightfile, "\n");
}
fclose(stdfile);
fclose(simplefile);
if (weightfile != NULL) {
fclose(weightfile);
}
}
#endif
......@@ -27,4 +27,11 @@ void printParticle(struct part *parts, long long int i, int N);
void printgParticle(struct gpart *parts, long long int i, int N);
void printParticle_single(struct part *p);
#ifdef HAVE_METIS
#include "metis.h"
void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon,
idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize,
idx_t *adjwgt);
#endif
#endif /* SWIFT_DEBUG_H */
......@@ -169,6 +169,9 @@ void engine_redistribute(struct engine *e) {
}
const int cid = cell_getid(cdim, parts[k].x[0] * ih[0],
parts[k].x[1] * ih[1], parts[k].x[2] * ih[2]);
/* if (cid < 0 || cid >= s->nr_cells)
error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].",
cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */
dest[k] = cells[cid].nodeID;
counts[nodeID * nr_nodes + dest[k]] += 1;
}
......@@ -303,7 +306,8 @@ void engine_repartition(struct engine *e) {
int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
float wscale = 1e-3, vscale = 1e-3, wscale_buff;
idx_t wtot = 0;
const idx_t wmax = 1e9 / e->nr_nodes;
idx_t wmax = 1e9 / e->nr_nodes;
idx_t wmin;
/* Clear the repartition flag. */
e->forcerepart = 0;
......@@ -486,6 +490,24 @@ void engine_repartition(struct engine *e) {
/* As of here, only one node needs to compute the partition. */
if (nodeID == 0) {
/* Final rescale of all weights to avoid a large range. Large ranges have
* been seen to cause an incomplete graph. */
wmin = wmax;
wmax = 0.0;
for (k = 0; k < 26 * nr_cells; k++) {
wmax = weights_e[k] > wmax ? weights_e[k] : wmax;
wmin = weights_e[k] < wmin ? weights_e[k] : wmin;
}
if ((wmax - wmin) > engine_maxmetisweight) {
wscale = engine_maxmetisweight / (wmax - wmin);
for (k = 0; k < 26 * nr_cells; k++) {
weights_e[k] = (weights_e[k] - wmin) * wscale + 1;
}
for (k = 0; k < nr_cells; k++) {
weights_v[k] = (weights_v[k] - wmin) * wscale + 1;
}
}
/* Check that the edge weights are fully symmetric. */
/* for ( cid = 0 ; cid < nr_cells ; cid++ )
for ( k = 0 ; k < 26 ; k++ ) {
......@@ -544,16 +566,47 @@ void engine_repartition(struct engine *e) {
/* Call METIS. */
idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes;
idx_t objval;
/* Dump graph in METIS format */
/*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
weights_v, NULL, weights_e);*/
if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v,
NULL, weights_e, &idx_nr_nodes, NULL, NULL,
options, &objval, nodeIDs) != METIS_OK)
error("Call to METIS_PartGraphKway failed.");
error("Call to METIS_PartGraphRecursive failed.");
/* Dump the 3d array of cell IDs. */
/* printf( "engine_repartition: nodeIDs = reshape( [" );
for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
printf( "%i " , (int)nodeIDs[ i ] );
printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
/* Check that the nodeIDs are ok. */
for (k = 0; k < nr_cells; k++)
if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes)
error("Got bad nodeID %"PRIDX" for cell %i.", nodeIDs[k], k);
/* Check that the partition is complete and all nodes have some work. */
int present[nr_nodes];
int failed = 0;
for (i = 0; i < nr_nodes; i++) present[i] = 0;
for (i = 0; i < nr_cells; i++) present[nodeIDs[i]]++;
for (i = 0; i < nr_nodes; i++) {
if (! present[i]) {
failed = 1;
message("Node %d is not present after repartition", i);
}
}
/* If partition failed continue with the current one, but make this
* clear. */
if (failed) {
message("WARNING: METIS repartition has failed, continuing with "
"the current partition, load balance will not be optimal");
for (k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
}
}
/* Broadcast the result of the partition. */
......@@ -2168,7 +2221,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
s->nr_queues = nr_queues;
/* Append a kick1 task to each cell. */
scheduler_reset(&e->sched, s->tot_cells + e->nr_threads);
scheduler_reset(&e->sched, 2 * s->tot_cells + e->nr_threads);
for (k = 0; k < s->nr_cells; k++)
s->cells[k].kick1 =
scheduler_addtask(&e->sched, task_type_kick1, task_subtype_none, 0, 0,
......
......@@ -47,6 +47,8 @@
#define engine_maxproxies 64
#define engine_tasksreweight 10
#define engine_maxmetisweight 10000.0f
/* The rank of the engine as a global variable (for messages). */
extern int engine_rank;
......
......@@ -1291,6 +1291,9 @@ void *runner_main(void *data) {
case task_type_psort:
space_do_parts_sort();
break;
case task_type_split_cell:
space_split(e->s, t->ci);
break;
case task_type_rewait:
for (struct task *t2 = (struct task *)t->ci;
t2 != (struct task *)t->cj; t2++) {
......
......@@ -130,6 +130,9 @@ void scheduler_splittasks(struct scheduler *s) {
break;
}
/* Skip sorting tasks. */
if (t->type == task_type_psort) continue;
/* Empty task? */
if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) {
t->type = task_type_none;
......@@ -1069,7 +1072,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
}
}
/* Task definitely done. */
/* Task definitely done, signal any sleeping runners. */
if (!t->implicit) {
t->toc = getticks();
pthread_mutex_lock(&s->sleep_mutex);
......
......@@ -466,7 +466,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
/* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */
// tic = getticks();
for (k = 0; k < s->nr_cells; k++) space_split(s, &cells[k]);
// for (k = 0; k < s->nr_cells; k++) space_split(s, &cells[k]);
for (k = 0; k < s->nr_cells; k++)
scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none,
k, 0, &cells[k], NULL, 0);
engine_launch(s->e, s->e->nr_threads, 1 << task_type_split_cell);
// message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
// * 1000 );
......@@ -476,6 +480,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
* @brief Sort the particles and condensed particles according to the given
*indices.
*
* @param s The #space.
* @param ind The indices with respect to which the parts are sorted.
* @param N The number of parts
* @param min Lowest index.
......@@ -483,11 +488,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
*/
void space_parts_sort(struct space *s, int *ind, int N, int min, int max) {
// Populate a parallel_sort structure with the input data.
// Populate the global parallel_sort structure with the input data.
space_sort_struct.parts = s->parts;
space_sort_struct.xparts = s->xparts;
space_sort_struct.ind = ind;
space_sort_struct.stack_size = 2 * (max - min) + 10;
space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
space_sort_struct.stack_size)) == NULL)
error("Failed to allocate sorting stack.");
......@@ -510,8 +515,9 @@ void space_parts_sort(struct space *s, int *ind, int N, int min, int max) {
/* Verify space_sort_struct. */
/* for (int i = 1; i < N; i++)
if (ind[i - 1] > ind[i])
error("Sorting failed (ind[%i]=%i,ind[%i]=%i).", i - 1, ind[i - 1], i,
ind[i]); */
error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i,
ind[i], min, max);
message("Sorting succeeded."); */
// Clean up.
free(space_sort_struct.stack);
......@@ -525,15 +531,17 @@ void space_do_parts_sort() {
struct xpart *xparts = space_sort_struct.xparts;
/* Main loop. */
while (space_sort_struct.waiting > 0) {
while (space_sort_struct.waiting) {
/* Grab an interval off the queue. */
int qid =
atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
/* Get the stack entry. */
/* Wait for the entry to be ready, or for the sorting do be done. */
while (!space_sort_struct.stack[qid].ready)
if (!space_sort_struct.waiting) return;
/* Get the stack entry. */
int i = space_sort_struct.stack[qid].i;
int j = space_sort_struct.stack[qid].j;
int min = space_sort_struct.stack[qid].min;
......@@ -545,6 +553,8 @@ void space_do_parts_sort() {
/* Bring beer. */
const int pivot = (min + max) / 2;
/* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
i, j, min, max, pivot); */
/* One pass of QuickSort's partitioning. */
int ii = i;
......@@ -566,18 +576,18 @@ void space_do_parts_sort() {
}
/* Verify space_sort_struct. */
/* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (<=pivot)." );
}
for ( int k = jj+1 ; k <= j ; k++ )
if ( ind[k] <= pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (>pivot)." );
} */
/* for (int k = i; k <= jj; k++)
if (ind[k] > pivot) {
message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
ind[k], pivot, i, j);
error("Partition failed (<=pivot).");
}
for (int k = jj + 1; k <= j; k++)
if (ind[k] <= pivot) {
message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
ind[k], pivot, i, j);
error("Partition failed (>pivot).");
} */
/* Split-off largest interval. */
if (jj - i > j - jj + 1) {
......@@ -586,14 +596,15 @@ void space_do_parts_sort() {
if (jj > i && pivot > min) {
qid = atomic_inc(&space_sort_struct.last) %
space_sort_struct.stack_size;
while (space_sort_struct.stack[qid].ready);
space_sort_struct.stack[qid].i = i;
space_sort_struct.stack[qid].j = jj;
space_sort_struct.stack[qid].min = min;
space_sort_struct.stack[qid].max = pivot;
space_sort_struct.stack[qid].ready = 1;
if (atomic_inc(&space_sort_struct.waiting) >=
space_sort_struct.stack_size)
error("Qstack overflow.");
space_sort_struct.stack[qid].ready = 1;
}
/* Recurse on the right? */
......@@ -606,17 +617,18 @@ void space_do_parts_sort() {
} else {
/* Recurse on the right? */
if (jj + 1 < j && pivot + 1 < max) {
if (pivot + 1 < max) {
qid = atomic_inc(&space_sort_struct.last) %
space_sort_struct.stack_size;
while (space_sort_struct.stack[qid].ready);
space_sort_struct.stack[qid].i = jj + 1;
space_sort_struct.stack[qid].j = j;
space_sort_struct.stack[qid].min = pivot + 1;
space_sort_struct.stack[qid].max = max;
space_sort_struct.stack[qid].ready = 1;
if (atomic_inc(&space_sort_struct.waiting) >=
space_sort_struct.stack_size)
error("Qstack overflow.");
space_sort_struct.stack[qid].ready = 1;
}
/* Recurse on the left? */
......
......@@ -46,7 +46,7 @@ const char *taskID_names[task_type_count] = {
"none", "sort", "self", "pair", "sub",
"ghost", "kick1", "kick2", "send", "recv",
"link", "grav_pp", "grav_mm", "grav_up", "grav_down",
"psort", "rewait"};
"psort", "split_cell", "rewait"};
/**
* @brief Unlock the cell held by this task.
......
......@@ -45,6 +45,7 @@ enum task_types {
task_type_grav_up,
task_type_grav_down,
task_type_psort,
task_type_split_cell,
task_type_rewait,
task_type_count
};
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment