diff --git a/examples/main.c b/examples/main.c index 5789f03c257cf24a2e48d608551b21de09b68764..2b772fc23d4f21dce8be59327d45283a74698493 100644 --- a/examples/main.c +++ b/examples/main.c @@ -318,7 +318,7 @@ int main(int argc, char *argv[]) { tic = getticks(); if (myrank == 0) message("nr_nodes is %i.", nr_nodes); engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank, - ENGINE_POLICY | engine_policy_steal, 0, 1.); + ENGINE_POLICY | engine_policy_steal, 0, 1., 1e-7, 0.01); if (myrank == 0) message("engine_init took %.3f ms.", ((double)(getticks() - tic)) / CPU_TPS * 1000); @@ -396,7 +396,7 @@ int main(int argc, char *argv[]) { /* Take a step. */ engine_step(&e); - if (j == 2) break; + if (j == 5) break; if (with_outputs && j % 100 == 0) { diff --git a/src/engine.c b/src/engine.c index 45ea35266bee99e706fb44d6f7b900c261e43ce3..91f879472d894a7ae427f8512531fcc423595260 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1065,7 +1065,7 @@ void engine_maketasks(struct engine *e) { error("Failed to allocate cell-task links."); e->nr_links = 0; - space_link_cleanup(s); + //space_link_cleanup(s); // MATTHIEU /* /\* Add the gravity up/down tasks at the top-level cells and push them * down. *\/ */ @@ -1760,16 +1760,16 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask) { pthread_cond_broadcast(&e->sched.sleep_cond); pthread_mutex_unlock(&e->sched.sleep_mutex); - message("Before barrier"); - fflush(stdout); + //message("Before barrier"); + //fflush(stdout); /* Sit back and wait for the runners to come home. */ while (e->barrier_launch || e->barrier_running) if (pthread_cond_wait(&e->barrier_cond, &e->barrier_mutex) != 0) error("Error while waiting for barrier."); - message("After barrier"); - fflush(stdout); + //message("After barrier"); + //fflush(stdout); } /* void hassorted(struct cell *c) { */ @@ -1929,13 +1929,17 @@ void engine_step(struct engine *e) { /* Drift everybody */ engine_launch(e, e->nr_threads, 1 << task_type_drift); + scheduler_print_tasks(&e->sched, "tasks_after_drift.dat"); + + //error("Done drift"); + /* Re-distribute the particles amongst the nodes? */ if (e->forcerepart) engine_repartition(e); /* Prepare the space. */ engine_prepare(e); - engine_maketasks(e); + //engine_maketasks(e); // engine_marktasks(e); @@ -1947,11 +1951,15 @@ void engine_step(struct engine *e) { TIMER_TIC; engine_launch(e, e->nr_threads, (1 << task_type_sort) | (1 << task_type_self) | - (1 << task_type_pair) | (1 << task_type_sub) | - (1 << task_type_init) | (1 << task_type_ghost) | - (1 << task_type_kick) | (1 << task_type_send) | - (1 << task_type_recv) | (1 << task_type_link)); + (1 << task_type_pair) | (1 << task_type_sub) | + (1 << task_type_init) | (1 << task_type_ghost) | + (1 << task_type_kick) | (1 << task_type_send) | + (1 << task_type_recv) | (1 << task_type_link)); + + scheduler_print_tasks(&e->sched, "tasks_after.dat"); + //error("done step"); + TIMER_TOC(timer_runners); TIMER_TOC2(timer_step); @@ -2122,7 +2130,8 @@ void engine_split(struct engine *e, int *grid) { void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, int nr_queues, int nr_nodes, int nodeID, int policy, - float timeBegin, float timeEnd) { + float timeBegin, float timeEnd, + float dt_min, float dt_max) { int k; #if defined(HAVE_SETAFFINITY) @@ -2171,6 +2180,8 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, e->nr_links = 0; e->timeBegin = timeBegin; e->timeEnd = timeEnd; + e->dt_min = dt_min; + e->dt_max = dt_max; engine_rank = nodeID; /* Make the space link back to the engine. */ diff --git a/src/engine.h b/src/engine.h index 025dc219afa80b673ef5628583b80d8858a0db15..20ba4f155d30f5fd7e0f10b59e387f61e017507f 100644 --- a/src/engine.h +++ b/src/engine.h @@ -135,7 +135,8 @@ struct engine { void engine_barrier(struct engine *e, int tid); void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, int nr_queues, int nr_nodes, int nodeID, int policy, - float timeBegin, float timeEnd); + float timeBegin, float timeEnd, + float dt_min, float dt_max); void engine_prepare(struct engine *e); void engine_print(struct engine *e); void engine_init_particles(struct engine *e); diff --git a/src/runner.c b/src/runner.c index 3b613dde252a5619636ae7c5af0a6cf5d7a10717..1beb2c3c3b849c90f4dce658c5c846685c834c5e 100644 --- a/src/runner.c +++ b/src/runner.c @@ -43,6 +43,7 @@ #include "timers.h" #include "timestep.h" + /* Include the right variant of the SPH interactions */ #ifdef LEGACY_GADGET2_SPH #include "runner_iact_legacy.h" @@ -54,10 +55,12 @@ #define PRINT_PART \ if (p->id == 1000) { \ message( \ - "p->id=%lld p->h=%f p->N_ngb=%f p->rho=%f p->t_beg=%f p->t_end=%f", \ - p->id, p->h, p->density.wcount, p->rho, p->t_begin, p->t_end); \ + "p->id=%lld p->h=%3.2f p->N_ngb=%3.2f p->rho=%3.2f p->t_beg=%3.2f p->t_end=%3.2f pos=[%3.2f %3.2f %3.2f] a=[%3.2f %3.2f %3.2f]", \ + p->id, p->h, p->density.wcount, p->rho, p->t_begin, p->t_end, p->x[0], p->x[1], p->x[2], p->a[0], p->a[1], p->a[2]); \ } + + /* Convert cell location to ID. */ #define cell_getid(cdim, i, j, k) \ ((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i))) @@ -821,14 +824,14 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { u = p->u *= expf(w); /* Predict smoothing length */ - // w = p->force.h_dt * ih * dt; - // if (fabsf(w) < 0.01f) - // h = p->h *= - // 1.0f + - // w * (1.0f + w * (0.5f + w * (1.0f / 6.0f + 1.0f / 24.0f * w))); - // else - // h = p->h *= expf(w); - + w = p->force.h_dt * ih * dt; + if (fabsf(w) < 0.01f) + h = p->h *= + 1.0f + + w * (1.0f + w * (0.5f + w * (1.0f / 6.0f + 1.0f / 24.0f * w))); + else + h = p->h *= expf(w); + /* Predict density */ w = -3.0f * p->force.h_dt * ih * dt; if (fabsf(w) < 0.1f) @@ -864,14 +867,17 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { void runner_dokick(struct runner *r, struct cell *c, int timer) { - int k, count = 0, nr_parts = c->count, updated; + const float dt_max_timeline = r->e->timeEnd - r->e->timeBegin; + const float global_dt_min = r->e->dt_min, global_dt_max = r->e->dt_max; + const float t_current = r->e->time; + const int nr_parts = c->count; + + int count = 0, updated; float new_dt = 0.0f, new_dt_hydro = 0.0f, new_dt_grav = 0.0f, current_dt = 0.0f; - float t_start, t_end, t_current = r->e->time, t_end_min = FLT_MAX, - t_end_max = 0., dt; - float dt_max_timeline = r->e->timeEnd - r->e->timeBegin, dt_timeline; - float dt_min = r->e->dt_min, dt_max = r->e->dt_max; - float h_max, dx_max; + float t_start, t_end, t_end_min = FLT_MAX, t_end_max = 0., dt; + float dt_timeline; + float h_max, dx_max, dt_min, dt_max; double ekin = 0.0, epot = 0.0; float mom[3] = {0.0f, 0.0f, 0.0f}, ang[3] = {0.0f, 0.0f, 0.0f}; float m, x[3], v_full[3]; @@ -890,7 +896,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { dx_max = 0.0f; /* Loop over the particles and kick the active ones. */ - for (k = 0; k < nr_parts; k++) { + for (int k = 0; k < nr_parts; k++) { /* Get a handle on the part. */ p = &parts[k]; @@ -917,8 +923,8 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { if (current_dt > 0.0f) new_dt = fminf(new_dt, 2.0f * current_dt); /* Limit timestep within the allowed range */ - new_dt = fminf(new_dt, dt_max); - new_dt = fmaxf(new_dt, dt_min); + new_dt = fminf(new_dt, global_dt_max); + new_dt = fmaxf(new_dt, global_dt_min); /* Put this timestep on the time line */ dt_timeline = dt_max_timeline; @@ -995,7 +1001,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { ang[2] = 0.0f; /* Loop over the progeny. */ - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { struct cell *cp = c->progeny[k]; runner_dokick(r, cp, 0); @@ -1109,7 +1115,7 @@ void *runner_main(void *data) { /* Different types of tasks... */ switch (t->type) { case task_type_self: - // message("self"); + //message("self"); if (t->subtype == task_subtype_density) runner_doself1_density(r, ci); else if (t->subtype == task_subtype_force) @@ -1118,6 +1124,7 @@ void *runner_main(void *data) { error("Unknown task subtype."); break; case task_type_pair: + //message("pair unlocking %d tasks", t->nr_unlock_tasks); if (t->subtype == task_subtype_density) runner_dopair1_density(r, ci, cj); else if (t->subtype == task_subtype_force) @@ -1139,12 +1146,15 @@ void *runner_main(void *data) { error("Unknown task subtype."); break; case task_type_init: + //message("init"); runner_doinit(r, ci); break; case task_type_ghost: + //message("ghost"); runner_doghost(r, ci); break; case task_type_drift: + runner_dodrift(r, ci, 1); break; case task_type_kick: diff --git a/src/runner_iact_legacy.h b/src/runner_iact_legacy.h index 5716cf4a7a84a9d9fcd4cef38a307e483389d4ab..79157e9284e12e66cb1083dc8c865eb39198af16 100644 --- a/src/runner_iact_legacy.h +++ b/src/runner_iact_legacy.h @@ -26,6 +26,22 @@ #include "part.h" #include "vector.h" +/* #define PRINT_PARTS \ */ +/* void; */ +/*if (pi->id == 1000) { \ + message( \ + "pi->id=%lld pi->h=%f pi->N_ngb=%f pi->rho=%f pi->t_beg=%f pi->t_end=%f pos=[%f %f %f]", \ + pi->id, pi->h, pi->density.wcount, pi->rho, pi->t_begin, pi->t_end, pi->x[0], pi->x[1], pi->x[2]); \ + } \ + if (pj->id == 1000) { \ + message( \ + "pj->id=%lld pj->h=%f pj->N_ngb=%f pj->rho=%f pj->t_beg=%f pj->t_end=%f pos=[%f %f %f]", \ + pj->id, pj->h, pj->density.wcount, pj->rho, pj->t_begin, pj->t_end, pj->x[0], pj->x[1], pj->x[2]); \ + }\ + fflush(stdout); +*/ + + /** * @file runner_iact_legacy.h * @brief SPH interaction functions following the Gadget-2 version of SPH. @@ -58,6 +74,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( float dv[3], curlvr[3]; int k; + //PRINT_PARTS; + /* Get the masses. */ mi = pi->mass; mj = pj->mass; @@ -232,6 +250,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float dv[3], curlvr[3]; int k; + //PRINT_PARTS; + /* Get the masses. */ mj = pj->mass; diff --git a/src/scheduler.c b/src/scheduler.c index a5a7c3dd19ab3966408dfb4dae3cfde3cf74e442..a7468d8abb9f3bca43e4fd29c96fdc516dc7889e 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -46,6 +46,10 @@ #include "kernel.h" #include "timers.h" +#define to_check 5394 +#define num_checks 11 +struct task *check[num_checks]; + /** * @brief Add an unlock_task to the given task. * @@ -65,7 +69,7 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, ta = ta->unlock_tasks[task_maxunlock]; /* Get the index of the next free task. */ - int ind = atomic_inc(&ta->nr_unlock_tasks); + const int ind = atomic_inc(&ta->nr_unlock_tasks); /* Is there room in this task? */ if (ind < task_maxunlock) { @@ -113,7 +117,7 @@ void scheduler_splittasks(struct scheduler *s) { {-1, -1, -1, -1, -1, -1, -1, 12}}; float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; - + /* Loop through the tasks... */ redo = 0; t_old = t = NULL; @@ -521,131 +525,131 @@ void scheduler_splittasks(struct scheduler *s) { } /* pair interaction? */ - /* Gravity interaction? */ - else if (t->type == task_type_grav_mm) { - - /* Get a handle on the cells involved. */ - ci = t->ci; - cj = t->cj; - - /* Self-interaction? */ - if (cj == NULL) { - - /* Ignore this task if the cell has no gparts. */ - if (ci->gcount == 0) t->type = task_type_none; - - /* If the cell is split, recurse. */ - else if (ci->split) { - - /* Make a single sub-task? */ - if (scheduler_dosub && ci->count < space_subsize / ci->count) { - - t->type = task_type_sub; - t->subtype = task_subtype_grav; - - } - - /* Otherwise, just split the task. */ - else { - - /* Split this task into tasks on its progeny. */ - t->type = task_type_none; - for (j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { - if (t->type == task_type_none) { - t->type = task_type_grav_mm; - t->ci = ci->progeny[j]; - t->cj = NULL; - } else - t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none, - 0, 0, ci->progeny[j], NULL, 0); - for (k = j + 1; k < 8; k++) - if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) { - if (t->type == task_type_none) { - t->type = task_type_grav_mm; - t->ci = ci->progeny[j]; - t->cj = ci->progeny[k]; - } else - t = scheduler_addtask(s, task_type_grav_mm, - task_subtype_none, 0, 0, - ci->progeny[j], ci->progeny[k], 0); - } - } - redo = (t->type != task_type_none); - } - - } - - /* Otherwise, just make a pp task out of it. */ - else - t->type = task_type_grav_pp; - - } - - /* Nope, pair. */ - else { - - /* Make a sub-task? */ - if (scheduler_dosub && ci->count < space_subsize / cj->count) { - - t->type = task_type_sub; - t->subtype = task_subtype_grav; - - } - - /* Otherwise, split the task. */ - else { - - /* Get the opening angle theta. */ - float dx[3], theta; - for (k = 0; k < 3; k++) { - dx[k] = fabsf(ci->loc[k] - cj->loc[k]); - if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k]) - dx[k] = -dx[k] + s->space->dim[k]; - if (dx[k] > 0.0f) dx[k] -= ci->h[k]; - } - theta = - (dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) / - (ci->h[0] * ci->h[0] + ci->h[1] * ci->h[1] + ci->h[2] * ci->h[2]); - - /* Ignore this task if the cell has no gparts. */ - if (ci->gcount == 0 || cj->gcount == 0) t->type = task_type_none; - - /* Split the interaction? */ - else if (theta < const_theta_max * const_theta_max) { - - /* Are both ci and cj split? */ - if (ci->split && cj->split) { - - /* Split this task into tasks on its progeny. */ - t->type = task_type_none; - for (j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { - for (k = 0; k < 8; k++) - if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) { - if (t->type == task_type_none) { - t->type = task_type_grav_mm; - t->ci = ci->progeny[j]; - t->cj = cj->progeny[k]; - } else - t = scheduler_addtask( - s, task_type_grav_mm, task_subtype_none, 0, 0, - ci->progeny[j], cj->progeny[k], 0); - } - } - redo = (t->type != task_type_none); - - } - - /* Otherwise, make a pp task out of it. */ - else - t->type = task_type_grav_pp; - } - } - - } /* gravity pair interaction? */ - - } /* gravity interaction? */ + /* /\* Gravity interaction? *\/ */ + /* else if (t->type == task_type_grav_mm) { */ + + /* /\* Get a handle on the cells involved. *\/ */ + /* ci = t->ci; */ + /* cj = t->cj; */ + + /* /\* Self-interaction? *\/ */ + /* if (cj == NULL) { */ + + /* /\* Ignore this task if the cell has no gparts. *\/ */ + /* if (ci->gcount == 0) t->type = task_type_none; */ + + /* /\* If the cell is split, recurse. *\/ */ + /* else if (ci->split) { */ + + /* /\* Make a single sub-task? *\/ */ + /* if (scheduler_dosub && ci->count < space_subsize / ci->count) { */ + + /* t->type = task_type_sub; */ + /* t->subtype = task_subtype_grav; */ + + /* } */ + + /* /\* Otherwise, just split the task. *\/ */ + /* else { */ + + /* /\* Split this task into tasks on its progeny. *\/ */ + /* t->type = task_type_none; */ + /* for (j = 0; j < 8; j++) */ + /* if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { */ + /* if (t->type == task_type_none) { */ + /* t->type = task_type_grav_mm; */ + /* t->ci = ci->progeny[j]; */ + /* t->cj = NULL; */ + /* } else */ + /* t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none, */ + /* 0, 0, ci->progeny[j], NULL, 0); */ + /* for (k = j + 1; k < 8; k++) */ + /* if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) { */ + /* if (t->type == task_type_none) { */ + /* t->type = task_type_grav_mm; */ + /* t->ci = ci->progeny[j]; */ + /* t->cj = ci->progeny[k]; */ + /* } else */ + /* t = scheduler_addtask(s, task_type_grav_mm, */ + /* task_subtype_none, 0, 0, */ + /* ci->progeny[j], ci->progeny[k], 0); */ + /* } */ + /* } */ + /* redo = (t->type != task_type_none); */ + /* } */ + + /* } */ + + /* /\* Otherwise, just make a pp task out of it. *\/ */ + /* else */ + /* t->type = task_type_grav_pp; */ + + /* } */ + + /* /\* Nope, pair. *\/ */ + /* else { */ + + /* /\* Make a sub-task? *\/ */ + /* if (scheduler_dosub && ci->count < space_subsize / cj->count) { */ + + /* t->type = task_type_sub; */ + /* t->subtype = task_subtype_grav; */ + + /* } */ + + /* /\* Otherwise, split the task. *\/ */ + /* else { */ + + /* /\* Get the opening angle theta. *\/ */ + /* float dx[3], theta; */ + /* for (k = 0; k < 3; k++) { */ + /* dx[k] = fabsf(ci->loc[k] - cj->loc[k]); */ + /* if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k]) */ + /* dx[k] = -dx[k] + s->space->dim[k]; */ + /* if (dx[k] > 0.0f) dx[k] -= ci->h[k]; */ + /* } */ + /* theta = */ + /* (dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) / */ + /* (ci->h[0] * ci->h[0] + ci->h[1] * ci->h[1] + ci->h[2] * ci->h[2]); */ + + /* /\* Ignore this task if the cell has no gparts. *\/ */ + /* if (ci->gcount == 0 || cj->gcount == 0) t->type = task_type_none; */ + + /* /\* Split the interaction? *\/ */ + /* else if (theta < const_theta_max * const_theta_max) { */ + + /* /\* Are both ci and cj split? *\/ */ + /* if (ci->split && cj->split) { */ + + /* /\* Split this task into tasks on its progeny. *\/ */ + /* t->type = task_type_none; */ + /* for (j = 0; j < 8; j++) */ + /* if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { */ + /* for (k = 0; k < 8; k++) */ + /* if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) { */ + /* if (t->type == task_type_none) { */ + /* t->type = task_type_grav_mm; */ + /* t->ci = ci->progeny[j]; */ + /* t->cj = cj->progeny[k]; */ + /* } else */ + /* t = scheduler_addtask( */ + /* s, task_type_grav_mm, task_subtype_none, 0, 0, */ + /* ci->progeny[j], cj->progeny[k], 0); */ + /* } */ + /* } */ + /* redo = (t->type != task_type_none); */ + + /* } */ + + /* /\* Otherwise, make a pp task out of it. *\/ */ + /* else */ + /* t->type = task_type_grav_pp; */ + /* } */ + /* } */ + + /* } /\* gravity pair interaction? *\/ */ + + /* } /\* gravity interaction? *\/ */ } /* loop over all tasks. */ } @@ -679,7 +683,7 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype, /* Get a pointer to the new task. */ t = &s->tasks[ind]; - if (t->type == task_type_sort) message("sort!"); + //if (t->type == task_type_sort) message("sort!"); /* Copy the data. */ t->type = type; @@ -906,6 +910,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask) { int k, j, nr_tasks = s->nr_tasks, *tid = s->tasks_ind; struct task *t, *tasks = s->tasks; + struct task *store = NULL; + int count = 0; // ticks tic; // message("begin"); @@ -914,61 +920,102 @@ void scheduler_start(struct scheduler *s, unsigned int mask) { /* Store the mask */ s->mask = mask; - FILE *file = fopen("tasks.dat", "w"); - + for (k = 0;k<num_checks; ++k) + check[k] = NULL; + /* Run through the tasks and set their waits. */ // tic = getticks(); for (k = nr_tasks - 1; k >= 0; k--) { t = &tasks[tid[k]]; - t->wait = 0; + t->wait = 1; t->rid = -1; - if (!((1 << t->type) & s->mask) || t->skip) continue; + + if(k==to_check) { + + //message("LOOP1: task %d type=%s-%s unlock=%d wait=%d", k, taskID_names[t->type], subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait); + + store = t; + + } + + if (!((1 << t->type) & mask) || t->skip) continue; for (j = 0; j < t->nr_unlock_tasks; j++) { atomic_inc(&t->unlock_tasks[j]->wait); + + /* if(t->unlock_tasks[j] == store) { */ + /* message("task %d type=%s-%s unlocks the pair unlock=%d wait=%d %p", k, taskID_names[t->type], subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait, t); */ + /* message("Link index: %6li", t->nr_unlock_tasks == task_maxunlock + 1 ? t->unlock_tasks[task_maxunlock] - s->tasks : -1); */ + + /* check[count] = t; */ + /* ++count; */ + /* } */ + /* if(t->unlock_tasks[j] == &tasks[9563] ) { */ /* message("task %d %s %s unlocking task %d %s %s\n", */ - /* k, taskID_names[t->type], subtaskID_names[t->subtype], - */ - /* 9563, taskID_names[t->unlock_tasks[j]->type], - * subtaskID_names[t->unlock_tasks[j]->type]); */ + /* k, taskID_names[t->type], subtaskID_names[t->subtype], */ + /* 9563, taskID_names[t->unlock_tasks[j]->type], */ + /* subtaskID_names[t->unlock_tasks[j]->type]); */ /* } */ } } - for (k = nr_tasks - 1; k >= 0; k--) { - t = &tasks[tid[k]]; - // if (t->type == task_type_sort) - // message("%d %s %s %d %d %d\n", k, taskID_names[t->type], - // subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait, t->skip); - if (!((1 << t->type) & s->mask) || t->skip) continue; - fprintf(file, "%d %s %s %d %d\n", k, taskID_names[t->type], - subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait); - } - // message( "waiting tasks took %.3f ms." , (double)( getticks() - tic ) / // CPU_TPS * 1000 ); - fclose(file); + scheduler_print_tasks(s, "tasks_start.dat"); - // message("All waits set"); - fflush(stdout); + //message("All waits set nr_tasks=%d", nr_tasks); + //fflush(stdout); /* Don't enqueue link tasks directly. */ - s->mask &= ~(1 << task_type_link); + mask &= ~(1 << task_type_link); + s->mask = mask; - /* Loop over the tasks and enqueue whoever is ready. */ - // tic = getticks(); for (k = 0; k < nr_tasks; k++) { t = &tasks[tid[k]]; - if (((1 << t->type) & s->mask) && !t->skip) { - if (t->wait == 0) { - scheduler_enqueue(s, t); - pthread_cond_broadcast(&s->sleep_cond); - } else - break; + + /* if(k==to_check) { */ + /* message("LOOP2: task %5d type=%s-%s unlock=%d wait=%d t=%p", k, taskID_names[t->type], subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait, t); */ + /* fflush(stdout); */ + /* } */ + + for (j = 0; j < t->nr_unlock_tasks; j++) { + if(t->unlock_tasks[j] == store) { + //message("task %d type=%s-%s unlocks the pair unlock=%d wait=%d %p", k, taskID_names[t->type], subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait, t); + //message("Link index: %6li", t->nr_unlock_tasks == task_maxunlock + 1 ? t->unlock_tasks[task_maxunlock] - s->tasks : -1); + + check[count] = t; + ++count; + } + } + } + + /* Loop over the tasks and enqueue whoever is ready. */ + // tic = getticks(); + for (k = 0; k < s->nr_tasks; k++) { + t = &tasks[tid[k]]; + + /* if (((1 << t->type) & mask) && !t->skip) { */ + /* if (t->wait == 0) { */ + /* scheduler_enqueue(s, t); */ + /* pthread_cond_broadcast(&s->sleep_cond); */ + /* } else */ + /* break; */ + /* } */ + + if (atomic_dec(&t->wait) == 1 && + ((1 << t->type) & s->mask) && + !t->skip) { + + scheduler_enqueue(s, t); + pthread_cond_broadcast(&s->sleep_cond); + + } + + } scheduler_dump_queue(s); // message("Done enqueieing");fflush(stdout); @@ -996,13 +1043,44 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { // fflush(stdout); // } + /* Fail if this task has already been enqueued before. */ + if (t->rid >= 0) error("Task has already been enqueued."); + + + for(int k=0; k<num_checks; ++k) { + + if(t == check[k]) { + //message("task %5d type=%s-%s unlock=%d wait=%d %p", 0, taskID_names[t->type], subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait, t); + + + } + + } + /* Ignore skipped tasks and tasks not in the mask. */ - if (t->skip || ((1 << t->type) & ~(s->mask) && t->type != task_type_link) || - atomic_cas(&t->rid, -1, 0) != -1) + if (t->skip || ((1 << t->type) & ~(s->mask) && t->type != task_type_link)) { return; + } + + + for(int k=0; k<num_checks; ++k) { + if(t == check[k]) { + //message("not ignored !"); + + } + + } + + /* If this is an implicit task, just pretend it's done. */ if (t->implicit) { + + for(int k=0; k<num_checks; ++k) { + if(t == check[k]) { + //message("implicit"); + } + } for (int j = 0; j < t->nr_unlock_tasks; j++) { struct task *t2 = t->unlock_tasks[j]; if (atomic_dec(&t2->wait) == 1) scheduler_enqueue(s, t2); @@ -1096,6 +1174,18 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { struct task *scheduler_done(struct scheduler *s, struct task *t) { + + for(int k=0; k<num_checks; ++k) { + + if(t == check[k]) { + //message("task %5d type=%s-%s unlock=%d wait=%d %p", 0, taskID_names[t->type], subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait, t); + + + } + + } + + /* Release whatever locks this task held. */ if (!t->implicit) task_unlock(t); @@ -1104,6 +1194,22 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { for (int k = 0; k < t->nr_unlock_tasks; k++) { struct task *t2 = t->unlock_tasks[k]; int res = atomic_dec(&t2->wait); + /* if (t->type == task_type_init) */ + /* message("Done with init ! Unlocking a %s task. %d dependencies left", */ + /* taskID_names[t2->type], res); */ + /* if (t->type == task_type_pair) */ + /* message("Done with pair ! Unlocking a %s task. %d dependencies left", */ + /* taskID_names[t2->type], res); */ + + for(int k=0; k<num_checks; ++k) { + + if(t2 == check[k]) { + //message("Unlocking the task %p", t2); + } + + } + + if (res < 1) { error("Negative wait!"); } else if (res == 1) { @@ -1327,3 +1433,28 @@ void scheduler_dump_queue(struct scheduler *s) { fclose(file); } } + +/** + * @brief Prints the list of tasks to a file + * + * @param s The #scheduler + * @param fileName Name of the file to write to + */ +void scheduler_print_tasks(struct scheduler *s, char *fileName) { + + const int nr_tasks = s->nr_tasks, *tid = s->tasks_ind; + struct task *t, *tasks = s->tasks; + + FILE *file = fopen(fileName, "w"); + + fprintf(file, "# Rank Name Subname unlocks waits\n"); + + for (int k = nr_tasks - 1; k >= 0; k--) { + t = &tasks[tid[k]]; + if (!((1 << t->type)) || t->skip) continue; + fprintf(file, "%d %s %s %d %d\n", k, taskID_names[t->type], + subtaskID_names[t->subtype], t->nr_unlock_tasks, t->wait); + } + + fclose(file); +} diff --git a/src/scheduler.h b/src/scheduler.h index 4925236ca4f5d74812c1296e18cb7693b454b886..e78ca7c8dea8997c89b4a8cd140ba5ac0b810055 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -104,5 +104,6 @@ struct task *scheduler_done(struct scheduler *s, struct task *t); struct task *scheduler_unlock(struct scheduler *s, struct task *t); void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb); void scheduler_dump_queue(struct scheduler *s); +void scheduler_print_tasks(struct scheduler *s, char *fileName); #endif /* SWIFT_SCHEDULER_H */ diff --git a/src/task.h b/src/task.h index 818a998bca07b50b406b3b844f016d08a47c0bf4..b8a5c7cce9455e3c68a3bb99ba57805e00b10de8 100644 --- a/src/task.h +++ b/src/task.h @@ -26,7 +26,7 @@ /* Some constants. */ #define task_maxwait 3 -#define task_maxunlock 15 +#define task_maxunlock 100 /* The different task types. */ enum task_types {