Commit decf122a authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

add a number of checks on system sanity that happen between each step in engine_step.


Former-commit-id: 70bc02755eb2c0d36163afa582ef8338bb9b042e
parent f02d42e2
...@@ -773,7 +773,7 @@ int main ( int argc , char *argv[] ) { ...@@ -773,7 +773,7 @@ int main ( int argc , char *argv[] ) {
/* Initialize the engine with this space. */ /* Initialize the engine with this space. */
tic = getticks(); tic = getticks();
message( "nr_nodes is %i." , nr_nodes ); message( "nr_nodes is %i." , nr_nodes );
engine_init( &e , &s , dt_max , nr_threads , nr_queues , nr_nodes , myrank , ENGINE_POLICY | engine_policy_steal ); engine_init( &e , &s , dt_max , nr_threads , nr_queues , nr_nodes , myrank , ENGINE_POLICY | engine_policy_steal | engine_policy_paranoid );
if ( myrank == 0 ) if ( myrank == 0 )
message( "engine_init took %.3f ms." , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout); message( "engine_init took %.3f ms." , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout);
......
...@@ -23,8 +23,26 @@ ...@@ -23,8 +23,26 @@
#include "const.h" #include "const.h"
#include "part.h" #include "part.h"
#include "lock.h"
#include "multipole.h"
#include "space.h"
#include "cell.h"
/**
* @brief Dump the information pertaining to the given cell.
*/
void print_cell(struct cell *c) {
printf("## Cell 0x%0zx: loc=[%.3e,%.3e,%.3e], h=[%.3e,%.3e,%.3e], depth=%i, split=%i, maxdepth=%i.\n",
(size_t)c,
c->loc[0], c->loc[1], c->loc[2],
c->h[0], c->h[1], c->h[2],
c->depth,
c->split,
c->maxdepth);
}
/** /**
* @brief Looks for the particle with the given id and prints its information to the standard output. * @brief Looks for the particle with the given id and prints its information to the standard output.
* *
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
void print_cell(struct cell *c);
void printParticle(struct part *parts, long long int i, int N); void printParticle(struct part *parts, long long int i, int N);
void printgParticle(struct gpart *parts, long long int i, int N); void printgParticle(struct gpart *parts, long long int i, int N);
void printParticle_single ( struct part *p ); void printParticle_single ( struct part *p );
...@@ -78,6 +78,81 @@ ...@@ -78,6 +78,81 @@
int engine_rank; int engine_rank;
/**
* @brief Check if a single particle is OK.
*
* @return Zero if all checks passed, non-zero otherwise.
*/
int engine_check_part(struct part *p) {
if (p == NULL ||
p->mass == 0.0f ||
p->h == 0.0f ) {
message("Bad particle data.");
printParticle_single(p);
return 1;
} else if (p->x[0] == 0.0 && p->x[1] == 0.0 && p->x[2] == 0.0) {
message("Bad particle location.");
printParticle_single(p);
return 1;
} else {
return 0;
}
}
/**
* @brief Check if a cell's data is reasonable, also check if its particles
* are OK.
*
* @return Zero if all checks passed, non-zero otherwise.
*/
void engine_check_cell(struct cell *c, void *data) {
/* Check the cell data. */
if (c->count == 0) {
print_cell(c);
error("Empty cell.");
}
/* Check the particles. */
for (int k = 0; k < c->count; k++) {
if (engine_check_part(&c->parts[k])) {
print_cell(c);
error("Bad particle in cell.");
}
}
/* Check that the progeny, if any, contain all the particles. */
if (c->split) {
int count = 0;
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
count += c->progeny[k]->count;
}
}
if (count != c->count) {
print_cell(c);
error("Progeny cell counts don't add up.");
}
}
}
/**
* @brief Runs a series of checks to make sure we have no bad particles.
*/
void engine_check(struct engine *e) {
/* Check all particles directly. */
struct space *s = e->s;
for (int k = 0; k < s->nr_parts; k++) {
if (engine_check_part(&s->parts[k])) {
error("Bad particle s->parts[%i], aborting.", k);
}
}
/* Check each cell in the space. */
space_map_cells_post(s, 1, &engine_check_cell, NULL);
}
/** /**
* @brief Link a density/force task to a cell. * @brief Link a density/force task to a cell.
* *
...@@ -240,7 +315,6 @@ void engine_redistribute ( struct engine *e ) { ...@@ -240,7 +315,6 @@ void engine_redistribute ( struct engine *e ) {
MPI_Error_string( stats[k].MPI_ERROR , buff , &res ); MPI_Error_string( stats[k].MPI_ERROR , buff , &res );
message( "request %i has error '%s'." , k , buff ); message( "request %i has error '%s'." , k , buff );
} }
message( "counts is [ %i %i %i %i ]." , counts[0] , counts[1] , counts[2] , counts[3] );
error( "Failed during waitall for part data." ); error( "Failed during waitall for part data." );
} }
...@@ -1722,6 +1796,11 @@ void engine_step ( struct engine *e ) { ...@@ -1722,6 +1796,11 @@ void engine_step ( struct engine *e ) {
TIMER_TIC2 TIMER_TIC2
if (e->policy & engine_policy_paranoid) {
message("Checking system sanity...");
engine_check(e);
}
/* Get the maximum dt. */ /* Get the maximum dt. */
if ( e->policy & engine_policy_multistep ) { if ( e->policy & engine_policy_multistep ) {
dt_step = 2.0f*dt; dt_step = 2.0f*dt;
...@@ -1755,10 +1834,20 @@ void engine_step ( struct engine *e ) { ...@@ -1755,10 +1834,20 @@ void engine_step ( struct engine *e ) {
// printParticle(parts, k); // printParticle(parts, k);
// printParticle( e->s->parts , 3392063069037 , e->s->nr_parts ); // printParticle( e->s->parts , 3392063069037 , e->s->nr_parts );
if (e->policy & engine_policy_paranoid) {
message("Checking system sanity...");
engine_check(e);
}
/* Re-distribute the particles amongst the nodes? */ /* Re-distribute the particles amongst the nodes? */
if ( e->forcerepart ) if ( e->forcerepart )
engine_repartition( e ); engine_repartition( e );
if (e->policy & engine_policy_paranoid) {
message("Checking system sanity...");
engine_check(e);
}
/* Prepare the space. */ /* Prepare the space. */
engine_prepare( e ); engine_prepare( e );
...@@ -1779,6 +1868,12 @@ void engine_step ( struct engine *e ) { ...@@ -1779,6 +1868,12 @@ void engine_step ( struct engine *e ) {
(1 << task_type_grav_up) | (1 << task_type_grav_up) |
(1 << task_type_grav_down) | (1 << task_type_grav_down) |
(1 << task_type_link) ); (1 << task_type_link) );
if (e->policy & engine_policy_paranoid) {
message("Checking system sanity...");
engine_check(e);
}
TIMER_TOC(timer_runners); TIMER_TOC(timer_runners);
// engine_single_force( e->s->dim , 8328423931905 , e->s->parts , e->s->nr_parts , e->s->periodic ); // engine_single_force( e->s->dim , 8328423931905 , e->s->parts , e->s->nr_parts , e->s->periodic );
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#define engine_policy_cputight 64 #define engine_policy_cputight 64
#define engine_policy_mpi 128 #define engine_policy_mpi 128
#define engine_policy_setaffinity 256 #define engine_policy_setaffinity 256
#define engine_policy_paranoid 512
#define engine_queue_scale 1.2 #define engine_queue_scale 1.2
#define engine_maxtaskspercell 128 #define engine_maxtaskspercell 128
......
...@@ -870,7 +870,7 @@ void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct ce ...@@ -870,7 +870,7 @@ void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct ce
/** /**
* @brief Map a function to all particles in a aspace. * @brief Map a function to all particles in a space.
* *
* @param s The #space we are working in. * @param s The #space we are working in.
* @param full Map to all cells, including cells with sub-cells. * @param full Map to all cells, including cells with sub-cells.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment