Commit 1dcced1a authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

'Solved' bug by increasing size of unlock table. Still get wrong accelerations.

parent 4338bd27
......@@ -318,7 +318,7 @@ int main(int argc, char *argv[]) {
tic = getticks();
if (myrank == 0) message("nr_nodes is %i.", nr_nodes);
engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank,
ENGINE_POLICY | engine_policy_steal, 0, 1.);
ENGINE_POLICY | engine_policy_steal, 0, 1., 1e-7, 0.01);
if (myrank == 0)
message("engine_init took %.3f ms.",
((double)(getticks() - tic)) / CPU_TPS * 1000);
......@@ -396,7 +396,7 @@ int main(int argc, char *argv[]) {
/* Take a step. */
engine_step(&e);
if (j == 2) break;
if (j == 5) break;
if (with_outputs && j % 100 == 0) {
......
......@@ -1065,7 +1065,7 @@ void engine_maketasks(struct engine *e) {
error("Failed to allocate cell-task links.");
e->nr_links = 0;
space_link_cleanup(s);
//space_link_cleanup(s); // MATTHIEU
/* /\* Add the gravity up/down tasks at the top-level cells and push them
* down. *\/ */
......@@ -1760,16 +1760,16 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask) {
pthread_cond_broadcast(&e->sched.sleep_cond);
pthread_mutex_unlock(&e->sched.sleep_mutex);
message("Before barrier");
fflush(stdout);
//message("Before barrier");
//fflush(stdout);
/* Sit back and wait for the runners to come home. */
while (e->barrier_launch || e->barrier_running)
if (pthread_cond_wait(&e->barrier_cond, &e->barrier_mutex) != 0)
error("Error while waiting for barrier.");
message("After barrier");
fflush(stdout);
//message("After barrier");
//fflush(stdout);
}
/* void hassorted(struct cell *c) { */
......@@ -1929,13 +1929,17 @@ void engine_step(struct engine *e) {
/* Drift everybody */
engine_launch(e, e->nr_threads, 1 << task_type_drift);
scheduler_print_tasks(&e->sched, "tasks_after_drift.dat");
//error("Done drift");
/* Re-distribute the particles amongst the nodes? */
if (e->forcerepart) engine_repartition(e);
/* Prepare the space. */
engine_prepare(e);
engine_maketasks(e);
//engine_maketasks(e);
// engine_marktasks(e);
......@@ -1947,11 +1951,15 @@ void engine_step(struct engine *e) {
TIMER_TIC;
engine_launch(e, e->nr_threads,
(1 << task_type_sort) | (1 << task_type_self) |
(1 << task_type_pair) | (1 << task_type_sub) |
(1 << task_type_init) | (1 << task_type_ghost) |
(1 << task_type_kick) | (1 << task_type_send) |
(1 << task_type_recv) | (1 << task_type_link));
(1 << task_type_pair) | (1 << task_type_sub) |
(1 << task_type_init) | (1 << task_type_ghost) |
(1 << task_type_kick) | (1 << task_type_send) |
(1 << task_type_recv) | (1 << task_type_link));
scheduler_print_tasks(&e->sched, "tasks_after.dat");
//error("done step");
TIMER_TOC(timer_runners);
TIMER_TOC2(timer_step);
......@@ -2122,7 +2130,8 @@ void engine_split(struct engine *e, int *grid) {
void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
int nr_queues, int nr_nodes, int nodeID, int policy,
float timeBegin, float timeEnd) {
float timeBegin, float timeEnd,
float dt_min, float dt_max) {
int k;
#if defined(HAVE_SETAFFINITY)
......@@ -2171,6 +2180,8 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
e->nr_links = 0;
e->timeBegin = timeBegin;
e->timeEnd = timeEnd;
e->dt_min = dt_min;
e->dt_max = dt_max;
engine_rank = nodeID;
/* Make the space link back to the engine. */
......
......@@ -135,7 +135,8 @@ struct engine {
void engine_barrier(struct engine *e, int tid);
void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
int nr_queues, int nr_nodes, int nodeID, int policy,
float timeBegin, float timeEnd);
float timeBegin, float timeEnd,
float dt_min, float dt_max);
void engine_prepare(struct engine *e);
void engine_print(struct engine *e);
void engine_init_particles(struct engine *e);
......
......@@ -43,6 +43,7 @@
#include "timers.h"
#include "timestep.h"
/* Include the right variant of the SPH interactions */
#ifdef LEGACY_GADGET2_SPH
#include "runner_iact_legacy.h"
......@@ -54,10 +55,12 @@
#define PRINT_PART \
if (p->id == 1000) { \
message( \
"p->id=%lld p->h=%f p->N_ngb=%f p->rho=%f p->t_beg=%f p->t_end=%f", \
p->id, p->h, p->density.wcount, p->rho, p->t_begin, p->t_end); \
"p->id=%lld p->h=%3.2f p->N_ngb=%3.2f p->rho=%3.2f p->t_beg=%3.2f p->t_end=%3.2f pos=[%3.2f %3.2f %3.2f] a=[%3.2f %3.2f %3.2f]", \
p->id, p->h, p->density.wcount, p->rho, p->t_begin, p->t_end, p->x[0], p->x[1], p->x[2], p->a[0], p->a[1], p->a[2]); \
}
/* Convert cell location to ID. */
#define cell_getid(cdim, i, j, k) \
((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i)))
......@@ -821,14 +824,14 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) {
u = p->u *= expf(w);
/* Predict smoothing length */
// w = p->force.h_dt * ih * dt;
// if (fabsf(w) < 0.01f)
// h = p->h *=
// 1.0f +
// w * (1.0f + w * (0.5f + w * (1.0f / 6.0f + 1.0f / 24.0f * w)));
// else
// h = p->h *= expf(w);
w = p->force.h_dt * ih * dt;
if (fabsf(w) < 0.01f)
h = p->h *=
1.0f +
w * (1.0f + w * (0.5f + w * (1.0f / 6.0f + 1.0f / 24.0f * w)));
else
h = p->h *= expf(w);
/* Predict density */
w = -3.0f * p->force.h_dt * ih * dt;
if (fabsf(w) < 0.1f)
......@@ -864,14 +867,17 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) {
void runner_dokick(struct runner *r, struct cell *c, int timer) {
int k, count = 0, nr_parts = c->count, updated;
const float dt_max_timeline = r->e->timeEnd - r->e->timeBegin;
const float global_dt_min = r->e->dt_min, global_dt_max = r->e->dt_max;
const float t_current = r->e->time;
const int nr_parts = c->count;
int count = 0, updated;
float new_dt = 0.0f, new_dt_hydro = 0.0f, new_dt_grav = 0.0f,
current_dt = 0.0f;
float t_start, t_end, t_current = r->e->time, t_end_min = FLT_MAX,
t_end_max = 0., dt;
float dt_max_timeline = r->e->timeEnd - r->e->timeBegin, dt_timeline;
float dt_min = r->e->dt_min, dt_max = r->e->dt_max;
float h_max, dx_max;
float t_start, t_end, t_end_min = FLT_MAX, t_end_max = 0., dt;
float dt_timeline;
float h_max, dx_max, dt_min, dt_max;
double ekin = 0.0, epot = 0.0;
float mom[3] = {0.0f, 0.0f, 0.0f}, ang[3] = {0.0f, 0.0f, 0.0f};
float m, x[3], v_full[3];
......@@ -890,7 +896,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
dx_max = 0.0f;
/* Loop over the particles and kick the active ones. */
for (k = 0; k < nr_parts; k++) {
for (int k = 0; k < nr_parts; k++) {
/* Get a handle on the part. */
p = &parts[k];
......@@ -917,8 +923,8 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
if (current_dt > 0.0f) new_dt = fminf(new_dt, 2.0f * current_dt);
/* Limit timestep within the allowed range */
new_dt = fminf(new_dt, dt_max);
new_dt = fmaxf(new_dt, dt_min);
new_dt = fminf(new_dt, global_dt_max);
new_dt = fmaxf(new_dt, global_dt_min);
/* Put this timestep on the time line */
dt_timeline = dt_max_timeline;
......@@ -995,7 +1001,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) {
ang[2] = 0.0f;
/* Loop over the progeny. */
for (k = 0; k < 8; k++)
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL) {
struct cell *cp = c->progeny[k];
runner_dokick(r, cp, 0);
......@@ -1109,7 +1115,7 @@ void *runner_main(void *data) {
/* Different types of tasks... */
switch (t->type) {
case task_type_self:
// message("self");
//message("self");
if (t->subtype == task_subtype_density)
runner_doself1_density(r, ci);
else if (t->subtype == task_subtype_force)
......@@ -1118,6 +1124,7 @@ void *runner_main(void *data) {
error("Unknown task subtype.");
break;
case task_type_pair:
//message("pair unlocking %d tasks", t->nr_unlock_tasks);
if (t->subtype == task_subtype_density)
runner_dopair1_density(r, ci, cj);
else if (t->subtype == task_subtype_force)
......@@ -1139,12 +1146,15 @@ void *runner_main(void *data) {
error("Unknown task subtype.");
break;
case task_type_init:
//message("init");
runner_doinit(r, ci);
break;
case task_type_ghost:
//message("ghost");
runner_doghost(r, ci);
break;
case task_type_drift:
runner_dodrift(r, ci, 1);
break;
case task_type_kick:
......
......@@ -26,6 +26,22 @@
#include "part.h"
#include "vector.h"
/* #define PRINT_PARTS \ */
/* void; */
/*if (pi->id == 1000) { \
message( \
"pi->id=%lld pi->h=%f pi->N_ngb=%f pi->rho=%f pi->t_beg=%f pi->t_end=%f pos=[%f %f %f]", \
pi->id, pi->h, pi->density.wcount, pi->rho, pi->t_begin, pi->t_end, pi->x[0], pi->x[1], pi->x[2]); \
} \
if (pj->id == 1000) { \
message( \
"pj->id=%lld pj->h=%f pj->N_ngb=%f pj->rho=%f pj->t_beg=%f pj->t_end=%f pos=[%f %f %f]", \
pj->id, pj->h, pj->density.wcount, pj->rho, pj->t_begin, pj->t_end, pj->x[0], pj->x[1], pj->x[2]); \
}\
fflush(stdout);
*/
/**
* @file runner_iact_legacy.h
* @brief SPH interaction functions following the Gadget-2 version of SPH.
......@@ -58,6 +74,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
float dv[3], curlvr[3];
int k;
//PRINT_PARTS;
/* Get the masses. */
mi = pi->mass;
mj = pj->mass;
......@@ -232,6 +250,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
float dv[3], curlvr[3];
int k;
//PRINT_PARTS;
/* Get the masses. */
mj = pj->mass;
......
......@@ -46,6 +46,10 @@
#include "kernel.h"
#include "timers.h"
#define to_check 5394
#define num_checks 11
struct task *check[num_checks];
/**
* @brief Add an unlock_task to the given task.
*
......@@ -65,7 +69,7 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta,
ta = ta->unlock_tasks[task_maxunlock];
/* Get the index of the next free task. */
int ind = atomic_inc(&ta->nr_unlock_tasks);
const int ind = atomic_inc(&ta->nr_unlock_tasks);
/* Is there room in this task? */
if (ind < task_maxunlock) {
......@@ -113,7 +117,7 @@ void scheduler_splittasks(struct scheduler *s) {
{-1, -1, -1, -1, -1, -1, -1, 12}};
float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897,
0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788};
/* Loop through the tasks... */
redo = 0;
t_old = t = NULL;
......@@ -521,131 +525,131 @@ void scheduler_splittasks(struct scheduler *s) {
} /* pair interaction? */
/* Gravity interaction? */
else if (t->type == task_type_grav_mm) {
/* Get a handle on the cells involved. */
ci = t->ci;
cj = t->cj;
/* Self-interaction? */
if (cj == NULL) {
/* Ignore this task if the cell has no gparts. */
if (ci->gcount == 0) t->type = task_type_none;
/* If the cell is split, recurse. */
else if (ci->split) {
/* Make a single sub-task? */
if (scheduler_dosub && ci->count < space_subsize / ci->count) {
t->type = task_type_sub;
t->subtype = task_subtype_grav;
}
/* Otherwise, just split the task. */
else {
/* Split this task into tasks on its progeny. */
t->type = task_type_none;
for (j = 0; j < 8; j++)
if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) {
if (t->type == task_type_none) {
t->type = task_type_grav_mm;
t->ci = ci->progeny[j];
t->cj = NULL;
} else
t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none,
0, 0, ci->progeny[j], NULL, 0);
for (k = j + 1; k < 8; k++)
if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) {
if (t->type == task_type_none) {
t->type = task_type_grav_mm;
t->ci = ci->progeny[j];
t->cj = ci->progeny[k];
} else
t = scheduler_addtask(s, task_type_grav_mm,
task_subtype_none, 0, 0,
ci->progeny[j], ci->progeny[k], 0);
}
}
redo = (t->type != task_type_none);
}
}
/* Otherwise, just make a pp task out of it. */
else
t->type = task_type_grav_pp;
}
/* Nope, pair. */
else {
/* Make a sub-task? */
if (scheduler_dosub && ci->count < space_subsize / cj->count) {
t->type = task_type_sub;
t->subtype = task_subtype_grav;
}
/* Otherwise, split the task. */
else {
/* Get the opening angle theta. */
float dx[3], theta;
for (k = 0; k < 3; k++) {
dx[k] = fabsf(ci->loc[k] - cj->loc[k]);
if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k])
dx[k] = -dx[k] + s->space->dim[k];
if (dx[k] > 0.0f) dx[k] -= ci->h[k];
}
theta =
(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) /
(ci->h[0] * ci->h[0] + ci->h[1] * ci->h[1] + ci->h[2] * ci->h[2]);
/* Ignore this task if the cell has no gparts. */
if (ci->gcount == 0 || cj->gcount == 0) t->type = task_type_none;
/* Split the interaction? */
else if (theta < const_theta_max * const_theta_max) {
/* Are both ci and cj split? */
if (ci->split && cj->split) {
/* Split this task into tasks on its progeny. */
t->type = task_type_none;
for (j = 0; j < 8; j++)
if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) {
for (k = 0; k < 8; k++)
if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) {
if (t->type == task_type_none) {
t->type = task_type_grav_mm;
t->ci = ci->progeny[j];
t->cj = cj->progeny[k];
} else
t = scheduler_addtask(
s, task_type_grav_mm, task_subtype_none, 0, 0,
ci->progeny[j], cj->progeny[k], 0);
}
}
redo = (t->type != task_type_none);
}
/* Otherwise, make a pp task out of it. */
else
t->type = task_type_grav_pp;
}
}
} /* gravity pair interaction? */
} /* gravity interaction? */
/* /\* Gravity interaction? *\/ */
/* else if (t->type == task_type_grav_mm) { */
/* /\* Get a handle on the cells involved. *\/ */
/* ci = t->ci; */
/* cj = t->cj; */
/* /\* Self-interaction? *\/ */
/* if (cj == NULL) { */
/* /\* Ignore this task if the cell has no gparts. *\/ */
/* if (ci->gcount == 0) t->type = task_type_none; */
/* /\* If the cell is split, recurse. *\/ */
/* else if (ci->split) { */
/* /\* Make a single sub-task? *\/ */
/* if (scheduler_dosub && ci->count < space_subsize / ci->count) { */
/* t->type = task_type_sub; */
/* t->subtype = task_subtype_grav; */
/* } */
/* /\* Otherwise, just split the task. *\/ */
/* else { */
/* /\* Split this task into tasks on its progeny. *\/ */
/* t->type = task_type_none; */
/* for (j = 0; j < 8; j++) */
/* if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { */
/* if (t->type == task_type_none) { */
/* t->type = task_type_grav_mm; */
/* t->ci = ci->progeny[j]; */
/* t->cj = NULL; */
/* } else */
/* t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none, */
/* 0, 0, ci->progeny[j], NULL, 0); */
/* for (k = j + 1; k < 8; k++) */
/* if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) { */
/* if (t->type == task_type_none) { */
/* t->type = task_type_grav_mm; */
/* t->ci = ci->progeny[j]; */
/* t->cj = ci->progeny[k]; */
/* } else */
/* t = scheduler_addtask(s, task_type_grav_mm, */
/* task_subtype_none, 0, 0, */
/* ci->progeny[j], ci->progeny[k], 0); */
/* } */
/* } */
/* redo = (t->type != task_type_none); */
/* } */
/* } */
/* /\* Otherwise, just make a pp task out of it. *\/ */
/* else */
/* t->type = task_type_grav_pp; */
/* } */
/* /\* Nope, pair. *\/ */
/* else { */
/* /\* Make a sub-task? *\/ */
/* if (scheduler_dosub && ci->count < space_subsize / cj->count) { */
/* t->type = task_type_sub; */
/* t->subtype = task_subtype_grav; */
/* } */
/* /\* Otherwise, split the task. *\/ */
/* else { */
/* /\* Get the opening angle theta. *\/ */
/* float dx[3], theta; */
/* for (k = 0; k < 3; k++) { */
/* dx[k] = fabsf(ci->loc[k] - cj->loc[k]); */
/* if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k]) */
/* dx[k] = -dx[k] + s->space->dim[k]; */
/* if (dx[k] > 0.0f) dx[k] -= ci->h[k]; */
/* } */
/* theta = */
/* (dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) / */
/* (ci->h[0] * ci->h[0] + ci->h[1] * ci->h[1] + ci->h[2] * ci->h[2]); */
/* /\* Ignore this task if the cell has no gparts. *\/ */
/* if (ci->gcount == 0 || cj->gcount == 0) t->type = task_type_none; */
/* /\* Split the interaction? *\/ */
/* else if (theta < const_theta_max * const_theta_max) { */
/* /\* Are both ci and cj split? *\/ */
/* if (ci->split && cj->split) { */
/* /\* Split this task into tasks on its progeny. *\/ */
/* t->type = task_type_none; */
/* for (j = 0; j < 8; j++) */
/* if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { */
/* for (k = 0; k < 8; k++) */
/* if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) { */
/* if (t->type == task_type_none) { */
/* t->type = task_type_grav_mm; */
/* t->ci = ci->progeny[j]; */
/* t->cj = cj->progeny[k]; */
/* } else */
/* t = scheduler_addtask( */
/* s, task_type_grav_mm, task_subtype_none, 0, 0, */
/* ci->progeny[j], cj->progeny[k], 0); */
/* } */
/* } */
/* redo = (t->type != task_type_none); */
/* } */
/* /\* Otherwise, make a pp task out of it. *\/ */
/* else */
/* t->type = task_type_grav_pp; */
/* } */
/* } */
/* } /\* gravity pair interaction? *\/ */
/* } /\* gravity interaction? *\/ */
} /* loop over all tasks. */
}
......@@ -679,7 +683,7 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype,
/* Get a pointer to the new task. */
t = &s->tasks[ind];
if (t->type == task_type_sort) message("sort!");
//if (t->type == task_type_sort) message("sort!");
/* Copy the data. */
t->type = type;
......@@ -906,6 +910,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask) {
int k, j, nr_tasks = s->nr_tasks, *tid = s->tasks_ind;
struct task *t, *tasks = s->tasks;
struct task *store = NULL;
int count = 0;
// ticks tic;
// message("begin");
......@@ -914,61 +920,102 @@ void scheduler_start(struct scheduler *s, unsigned int mask) {
/* Store the mask */
s->mask = mask;
FILE *file = fopen("tasks.dat", "w");
for (k = 0;k<num_checks; ++k)
check[k] = NULL;
/* Run through the tasks and set their waits. */
// tic = getticks();
for (k = nr_tasks - 1; k >= 0; k--) {
t = &tasks[tid[k]];
t->wait = 0;
t->wait = 1;
t->rid = -1;
if (!((1 << t->type) & s->mask) || t->skip) continue;
if(k==to_check) {
//message("LOOP1: task %d type=%s-%s unlock=%d wait=%d", k, taskID_names[t->type], subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait);
store = t;
}
if (!((1 << t->type) & mask) || t->skip) continue;
for (j = 0; j < t->nr_unlock_tasks; j++) {
atomic_inc(&t->unlock_tasks[j]->wait);
/* if(t->unlock_tasks[j] == store) { */
/* message("task %d type=%s-%s unlocks the pair unlock=%d wait=%d %p", k, taskID_names[t->type], subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait, t); */
/* message("Link index: %6li", t->nr_unlock_tasks == task_maxunlock + 1 ? t->unlock_tasks[task_maxunlock] - s->tasks : -1); */
/* check[count] = t; */
/* ++count; */
/* } */