diff --git a/examples/main.c b/examples/main.c index 77cb1fbd494c1746d9c7c8be496e51eb5546ce9a..922303ddf114170f7cd66027a92f101c10194c99 100644 --- a/examples/main.c +++ b/examples/main.c @@ -77,6 +77,7 @@ int main(int argc, char *argv[]) { int nr_nodes = 1, myrank = 0; FILE *file_thread; int with_outputs = 1; + int verbose = 0; unsigned long long cpufreq = 0; #ifdef WITH_MPI @@ -150,7 +151,7 @@ int main(int argc, char *argv[]) { bzero(&s, sizeof(struct space)); /* Parse the options */ - while ((c = getopt(argc, argv, "a:c:d:e:f:h:m:oP:q:R:s:t:w:y:z:")) != -1) + while ((c = getopt(argc, argv, "a:c:d:e:f:h:m:oP:q:R:s:t:vw:y:z:")) != -1) switch (c) { case 'a': if (sscanf(optarg, "%lf", &scaling) != 1) @@ -226,8 +227,8 @@ int main(int argc, char *argv[]) { error("Error parsing number of queues."); break; case 'R': - /* Repartition type "n", "b", "v", "e" or "x". - * Note only none is available without METIS. */ +/* Repartition type "n", "b", "v", "e" or "x". + * Note only none is available without METIS. */ #ifdef WITH_MPI switch (optarg[0]) { case 'n': @@ -261,6 +262,9 @@ int main(int argc, char *argv[]) { if (sscanf(optarg, "%d", &nr_threads) != 1) error("Error parsing number of threads."); break; + case 'v': + verbose = 1; + break; case 'w': if (sscanf(optarg, "%d", &space_subsize) != 1) error("Error parsing sub size."); @@ -329,7 +333,7 @@ int main(int argc, char *argv[]) { clocks_set_cpufreq(cpufreq); cpufreq = clocks_get_cpufreq(); if (myrank == 0) - message("CPU frequency used for tick conversion: %llu Hz", cpufreq); + message("CPU frequency used for tick conversion: %llu Hz", cpufreq); /* Check we have sensible time step bounds */ if (dt_min > dt_max) @@ -341,8 +345,7 @@ int main(int argc, char *argv[]) { /* Read particles and space information from (GADGET) IC */ - if (myrank == 0) - clocks_gettime(&tic); + if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) read_ic_parallel(ICfileName, dim, &parts, &N, &periodic, myrank, nr_nodes, @@ -386,10 +389,9 @@ int main(int argc, char *argv[]) { if (nr_queues < 0) nr_queues = nr_threads; /* Initialize the space with this data. */ - if (myrank == 0) - clocks_gettime(&tic); + if (myrank == 0) clocks_gettime(&tic); space_init(&s, dim, parts, N, periodic, h_max, myrank == 0); - if (myrank == 0) { + if (myrank == 0 && verbose) { clocks_gettime(&toc); message("space_init took %.3f %s.", clocks_diff(&tic, &toc), clocks_getunit()); @@ -423,13 +425,12 @@ int main(int argc, char *argv[]) { } /* Initialize the engine with this space. */ - if (myrank == 0) - clocks_gettime(&tic); + if (myrank == 0) clocks_gettime(&tic); if (myrank == 0) message("nr_nodes is %i.", nr_nodes); engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank, ENGINE_POLICY | engine_policy_steal | engine_policy_hydro, 0, - time_end, dt_min, dt_max); - if (myrank == 0) { + time_end, dt_min, dt_max, (myrank == 0 && verbose)); + if (myrank == 0 && verbose) { clocks_gettime(&toc); message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), clocks_getunit()); @@ -445,8 +446,7 @@ int main(int argc, char *argv[]) { if (with_outputs) { /* Write the state of the system as it is before starting time integration. */ - if (myrank == 0) - clocks_gettime(&tic); + if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, @@ -458,7 +458,7 @@ int main(int argc, char *argv[]) { #else write_output_single(&e, &us); #endif - if (myrank == 0) { + if (myrank == 0 && verbose) { clocks_gettime(&toc); message("writing particle properties took %.3f %s.", clocks_diff(&tic, &toc), clocks_getunit()); @@ -486,7 +486,8 @@ int main(int argc, char *argv[]) { if (myrank == 0) printf( "# Step Time time-step Number of updates CPU Wall-clock time " - "[%s]\n", clocks_getunit()); + "[%s]\n", + clocks_getunit()); /* Let loose a runner on the space. */ for (j = 0; !engine_is_done(&e); j++) { @@ -506,6 +507,7 @@ int main(int argc, char *argv[]) { if (with_outputs && j % 100 == 0) { + if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, @@ -517,6 +519,12 @@ int main(int argc, char *argv[]) { #else write_output_single(&e, &us); #endif + if (myrank == 0 && verbose) { + clocks_gettime(&toc); + message("writing particle properties took %.3f %s.", + clocks_diff(&tic, &toc), clocks_getunit()); + fflush(stdout); + } } /* Dump the task data using the given frequency. */ @@ -617,6 +625,8 @@ int main(int argc, char *argv[]) { #endif if (with_outputs) { + + if (myrank == 0) clocks_gettime(&tic); /* Write final output. */ #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) @@ -629,6 +639,12 @@ int main(int argc, char *argv[]) { #else write_output_single(&e, &us); #endif + if (myrank == 0 && verbose) { + clocks_gettime(&toc); + message("writing particle properties took %.3f %s.", + clocks_diff(&tic, &toc), clocks_getunit()); + fflush(stdout); + } } #ifdef WITH_MPI diff --git a/src/engine.c b/src/engine.c index 3e8570cde08e2faf05658ddd42d2fce7a427e4e0..44bda19967a15f59a81942d633a422422139c45e 100644 --- a/src/engine.c +++ b/src/engine.c @@ -149,6 +149,7 @@ void engine_redistribute(struct engine *e) { int *cdim = s->cdim; struct cell *cells = s->cells; int nr_cells = s->nr_cells; + ticks tic = getticks(); /* Start by sorting the particles according to their nodes and getting the counts. The counts array is indexed as @@ -274,13 +275,18 @@ void engine_redistribute(struct engine *e) { /* Be verbose about what just happened. */ for (int k = 0; k < nr_cells; k++) if (cells[k].nodeID == nodeID) my_cells += 1; - message("node %i now has %i parts in %i cells.", nodeID, nr_parts, my_cells); + if (e->verbose) + message("node %i now has %i parts in %i cells.", nodeID, nr_parts, + my_cells); /* Clean up other stuff. */ free(reqs); free(counts); free(dest); + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("SWIFT was not compiled with MPI support."); #endif @@ -296,6 +302,8 @@ void engine_repartition(struct engine *e) { #if defined(WITH_MPI) && defined(HAVE_METIS) + ticks tic = getticks(); + /* Clear the repartition flag. */ enum repartition_type reparttype = e->forcerepart; e->forcerepart = REPART_NONE; @@ -323,6 +331,9 @@ void engine_repartition(struct engine *e) { /* Tell the engine it should re-build whenever possible */ e->forcerebuild = 1; + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("SWIFT was not compiled with MPI and METIS support."); #endif @@ -472,6 +483,7 @@ void engine_exchange_cells(struct engine *e) { MPI_Request reqs_in[engine_maxproxies]; MPI_Request reqs_out[engine_maxproxies]; MPI_Status status; + ticks tic = getticks(); /* Run through the cells and get the size of the ones that will be sent off. */ @@ -564,6 +576,10 @@ void engine_exchange_cells(struct engine *e) { /* Free the pcell buffer. */ free(pcells); + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + #else error("SWIFT was not compiled with MPI support."); #endif @@ -591,6 +607,7 @@ int engine_exchange_strays(struct engine *e, int offset, int *ind, int N) { MPI_Status status; struct proxy *p; struct space *s = e->s; + ticks tic = getticks(); /* Re-set the proxies. */ for (k = 0; k < e->nr_proxies; k++) e->proxies[k].nr_parts_out = 0; @@ -635,7 +652,7 @@ int engine_exchange_strays(struct engine *e, int offset, int *ind, int N) { enough space to accommodate them. */ int count_in = 0; for (k = 0; k < e->nr_proxies; k++) count_in += e->proxies[k].nr_parts_in; - message("sent out %i particles, got %i back.", N, count_in); + if (e->verbose) message("sent out %i particles, got %i back.", N, count_in); if (offset + count_in > s->size_parts) { s->size_parts = (offset + count_in) * 1.05; struct part *parts_new = NULL; @@ -704,6 +721,10 @@ int engine_exchange_strays(struct engine *e, int offset, int *ind, int N) { MPI_SUCCESS) error("MPI_Waitall on sends failed."); + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + /* Return the number of harvested parts. */ return count; @@ -730,6 +751,7 @@ void engine_maketasks(struct engine *e) { int *cdim = s->cdim; struct task *t, *t2; struct cell *ci, *cj; + ticks tic = getticks(); /* Re-set the scheduler. */ scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell); @@ -984,6 +1006,10 @@ void engine_maketasks(struct engine *e) { /* Set the tasks age. */ e->tasks_age = 0; + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -999,7 +1025,7 @@ int engine_marktasks(struct engine *e) { struct task *t, *tasks = s->tasks; float ti_end = e->ti_current; struct cell *ci, *cj; - // ticks tic = getticks(); + ticks tic = getticks(); /* Much less to do here if we're on a fixed time-step. */ if ((e->policy & engine_policy_fixdt) == engine_policy_fixdt) { @@ -1116,8 +1142,9 @@ int engine_marktasks(struct engine *e) { } } - // message( "took %.3f %s." , clocks_from_ticks(getticks() - tic), - // clocks_getunit()); + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); /* All is well... */ return 0; @@ -1163,38 +1190,33 @@ void engine_print(struct engine *e) { */ void engine_rebuild(struct engine *e) { + + ticks tic = getticks(); + /* Clear the forcerebuild flag, whatever it was. */ e->forcerebuild = 0; /* Re-build the space. */ - // tic = getticks(); - space_rebuild(e->s, 0.0, e->nodeID == 0); -// message( "space_rebuild took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); + space_rebuild(e->s, 0.0, e->verbose); /* If in parallel, exchange the cell structure. */ #ifdef WITH_MPI - // tic = getticks(); engine_exchange_cells(e); -// message( "engine_exchange_cells took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); #endif /* Re-build the tasks. */ - // tic = getticks(); engine_maketasks(e); - // message( "engine_maketasks took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Run through the tasks and mark as skip or not. */ - // tic = getticks(); if (engine_marktasks(e)) error("engine_marktasks failed after space_rebuild."); - // message( "engine_marktasks took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Print the status of the system */ engine_print(e); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -1207,45 +1229,37 @@ void engine_prepare(struct engine *e) { int rebuild; - TIMER_TIC + TIMER_TIC; /* Run through the tasks and mark as skip or not. */ - // tic = getticks(); rebuild = (e->forcerebuild || engine_marktasks(e)); -// message( "space_marktasks took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Collect the values of rebuild from all nodes. */ #ifdef WITH_MPI - // tic = getticks(); int buff; if (MPI_Allreduce(&rebuild, &buff, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate the rebuild flag across nodes."); rebuild = buff; -// message( "rebuild allreduce took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); #endif e->tic_step = getticks(); /* Did this not go through? */ if (rebuild) { - // tic = getticks(); engine_rebuild(e); - // message( "engine_rebuild took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()); } /* Re-rank the tasks every now and then. */ if (e->tasks_age % engine_tasksreweight == 1) { - // tic = getticks(); scheduler_reweight(&e->sched); - // message( "scheduler_reweight took %.3f %s." , - // clocks_from_ticks(getticks() -tic), clocks_getunit()); } e->tasks_age += 1; TIMER_TOC(timer_prepare); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -1653,6 +1667,7 @@ void engine_makeproxies(struct engine *e) { struct space *s = e->s; struct cell *cells = s->cells; struct proxy *proxies = e->proxies; + ticks tic = getticks(); /* Prepare the proxies and the proxy index. */ if (e->proxy_ind == NULL) @@ -1735,6 +1750,9 @@ void engine_makeproxies(struct engine *e) { } } + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("SWIFT was not compiled with MPI support."); #endif @@ -1817,24 +1835,55 @@ static bool hyperthreads_present(void) { void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, int nr_queues, int nr_nodes, int nodeID, int policy, - float timeBegin, float timeEnd, float dt_min, float dt_max) { + float timeBegin, float timeEnd, float dt_min, float dt_max, + int verbose) { + + /* Store the values. */ + e->s = s; + e->nr_threads = nr_threads; + e->policy = policy; + e->step = 0; + e->nullstep = 0; + e->nr_nodes = nr_nodes; + e->nodeID = nodeID; + e->proxy_ind = NULL; + e->nr_proxies = 0; + e->forcerebuild = 1; + e->forcerepart = REPART_NONE; + e->links = NULL; + e->nr_links = 0; + e->timeBegin = timeBegin; + e->timeEnd = timeEnd; + e->timeOld = timeBegin; + e->time = timeBegin; + e->ti_old = 0; + e->ti_current = 0; + e->timeStep = 0.; + e->dt_min = dt_min; + e->dt_max = dt_max; + e->file_stats = NULL; + e->verbose = verbose; + e->wallclock_time = 0.f; + engine_rank = nodeID; + + /* Make the space link back to the engine. */ + s->e = e; - int i, k; #if defined(HAVE_SETAFFINITY) - int nr_cores = sysconf(_SC_NPROCESSORS_ONLN); - int j, cpuid[nr_cores]; + const int nr_cores = sysconf(_SC_NPROCESSORS_ONLN); + int cpuid[nr_cores]; cpu_set_t cpuset; if ((policy & engine_policy_cputight) == engine_policy_cputight) { - for (k = 0; k < nr_cores; k++) cpuid[k] = k; + for (int k = 0; k < nr_cores; k++) cpuid[k] = k; } else { /* Get next highest power of 2. */ int maxint = 1; while (maxint < nr_cores) maxint *= 2; cpuid[0] = 0; - k = 1; - for (i = 1; i < maxint; i *= 2) - for (j = maxint / i / 2; j < maxint; j += maxint / i) + int k = 1; + for (int i = 1; i < maxint; i *= 2) + for (int j = maxint / i / 2; j < maxint; j += maxint / i) if (j < nr_cores && j != 0) cpuid[k++] = j; #if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) @@ -1842,22 +1891,24 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, if (numa_available() >= 0) { if (nodeID == 0) message("prefer NUMA-local CPUs"); - int home = numa_node_of_cpu(sched_getcpu()), half = nr_cores / 2; - bool done = false, swap_hyperthreads = hyperthreads_present(); + const int home = numa_node_of_cpu(sched_getcpu()); + const int half = nr_cores / 2; + const int swap_hyperthreads = hyperthreads_present(); + bool done = false; if (swap_hyperthreads && nodeID == 0) message("prefer physical cores to hyperthreads"); while (!done) { done = true; - for (i = 1; i < nr_cores; i++) { - int node_a = numa_node_of_cpu(cpuid[i - 1]); - int node_b = numa_node_of_cpu(cpuid[i]); + for (int i = 1; i < nr_cores; i++) { + const int node_a = numa_node_of_cpu(cpuid[i - 1]); + const int node_b = numa_node_of_cpu(cpuid[i]); /* Avoid using local hyperthreads over unused remote physical cores. * Assume two hyperthreads, and that cpuid >= half partitions them. */ - int thread_a = swap_hyperthreads && cpuid[i - 1] >= half; - int thread_b = swap_hyperthreads && cpuid[i] >= half; + const int thread_a = swap_hyperthreads && cpuid[i - 1] >= half; + const int thread_b = swap_hyperthreads && cpuid[i] >= half; bool swap = thread_a > thread_b; if (thread_a == thread_b) @@ -1881,42 +1932,12 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, #else printf("%s engine_init: cpu map is [ ", clocks_get_timeofday()); #endif - for (i = 0; i < nr_cores; i++) printf("%i ", cpuid[i]); + for (int i = 0; i < nr_cores; i++) printf("%i ", cpuid[i]); printf("].\n"); } } #endif - /* Store the values. */ - e->s = s; - e->nr_threads = nr_threads; - e->policy = policy; - e->step = 0; - e->nullstep = 0; - e->nr_nodes = nr_nodes; - e->nodeID = nodeID; - e->proxy_ind = NULL; - e->nr_proxies = 0; - e->forcerebuild = 1; - e->forcerepart = REPART_NONE; - e->links = NULL; - e->nr_links = 0; - e->timeBegin = timeBegin; - e->timeEnd = timeEnd; - e->timeOld = timeBegin; - e->time = timeBegin; - e->ti_old = 0; - e->ti_current = 0; - e->timeStep = 0.; - e->dt_min = dt_min; - e->dt_max = dt_max; - e->file_stats = NULL; - e->wallclock_time = 0.f; - engine_rank = nodeID; - - /* Make the space link back to the engine. */ - s->e = e; - /* Are we doing stuff in parallel? */ if (nr_nodes > 1) { #ifndef WITH_MPI @@ -2023,7 +2044,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, s->nr_queues = nr_queues; /* Create the sorting tasks. */ - for (i = 0; i < e->nr_threads; i++) + for (int i = 0; i < e->nr_threads; i++) scheduler_addtask(&e->sched, task_type_psort, task_subtype_none, i, 0, NULL, NULL, 0); @@ -2033,7 +2054,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, if ((e->runners = (struct runner *)malloc(sizeof(struct runner) * nr_threads)) == NULL) error("Failed to allocate threads array."); - for (k = 0; k < nr_threads; k++) { + for (int k = 0; k < nr_threads; k++) { e->runners[k].id = k; e->runners[k].e = e; e->barrier_running += 1; diff --git a/src/engine.h b/src/engine.h index cd189f6ac64b0809eec7f9db8cba898a6377cf27..faf48fc09197036ad968a565138237e8d3bcd459 100644 --- a/src/engine.h +++ b/src/engine.h @@ -158,6 +158,9 @@ struct engine { struct link *links; int nr_links, size_links; + /* Are we talkative ? */ + int verbose; + #ifdef WITH_MPI /* MPI data type for the particle transfers */ MPI_Datatype part_mpi_type; @@ -169,7 +172,8 @@ struct engine { void engine_barrier(struct engine *e, int tid); void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, int nr_queues, int nr_nodes, int nodeID, int policy, - float timeBegin, float timeEnd, float dt_min, float dt_max); + float timeBegin, float timeEnd, float dt_min, float dt_max, + int verbose); void engine_launch(struct engine *e, int nr_runners, unsigned int mask, unsigned int submask); void engine_prepare(struct engine *e); diff --git a/src/runner.c b/src/runner.c index 9207a4373119fb181e7fb8544a0608761d338480..7eedb6adc72755ba12faed5429edad43d3849451 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1061,7 +1061,7 @@ void *runner_main(void *data) { space_do_parts_sort(); break; case task_type_split_cell: - space_split(e->s, t->ci); + space_do_split(e->s, t->ci); break; case task_type_rewait: scheduler_do_rewait((struct task *)t->ci, (struct task *)t->cj, diff --git a/src/space.c b/src/space.c index d51facd974f0091c5119560fe3aaa19ca8ddde94..cde551ef0a1cf6de969c2712fc8df302141c3d87 100644 --- a/src/space.c +++ b/src/space.c @@ -161,7 +161,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { float h_max = s->cell_min / kernel_gamma / space_stretch, dmin; int i, j, k, cdim[3], nr_parts = s->nr_parts; struct cell *restrict c; - // ticks tic; + ticks tic = getticks(); /* Run through the parts and get the current h_max. */ // tic = getticks(); @@ -291,6 +291,10 @@ void space_regrid(struct space *s, double cell_max, int verbose) { } s->maxdepth = 0; } + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -309,7 +313,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { struct part *restrict p; int *ind; double ih[3], dim[3]; - // ticks tic; + ticks tic = getticks(); /* Be verbose about this. */ // message( "re)building space..." ); fflush(stdout); @@ -394,10 +398,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { #endif /* Sort the parts according to their cells. */ - // tic = getticks(); - space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1); - // message( "parts_sort took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()); + space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose); /* Re-link the gparts. */ for (k = 0; k < nr_parts; k++) @@ -437,10 +438,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* TODO: Here we should exchange the gparts as well! */ /* Sort the parts according to their cells. */ - // tic = getticks(); - gparts_sort(s->gparts, ind, nr_gparts, 0, s->nr_cells - 1); - // message( "gparts_sort took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()); + space_gparts_sort(s->gparts, ind, nr_gparts, 0, s->nr_cells - 1); /* Re-link the parts. */ for (k = 0; k < nr_gparts; k++) @@ -468,15 +466,32 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ - // tic = getticks(); - // for (k = 0; k < s->nr_cells; k++) space_split(s, &cells[k]); - for (k = 0; k < s->nr_cells; k++) + space_split(s, cells, verbose); + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Split particles between cells of a hierarchy + * + * @param s The #space. + * @param cells The cell hierarchy + * @param verbose Are we talkative ? + */ +void space_split(struct space *s, struct cell *cells, int verbose) { + + ticks tic = getticks(); + + for (int k = 0; k < s->nr_cells; k++) scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k, 0, &cells[k], NULL, 0); engine_launch(s->e, s->e->nr_threads, 1 << task_type_split_cell, 0); - // message( "space_split took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()); + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -488,10 +503,15 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { * @param N The number of parts * @param min Lowest index. * @param max highest index. + * @param verbose Are we talkative ? */ -void space_parts_sort(struct space *s, int *ind, int N, int min, int max) { - // Populate the global parallel_sort structure with the input data. +void space_parts_sort(struct space *s, int *ind, int N, int min, int max, + int verbose) { + + ticks tic = getticks(); + + /*Populate the global parallel_sort structure with the input data */ space_sort_struct.parts = s->parts; space_sort_struct.xparts = s->xparts; space_sort_struct.ind = ind; @@ -502,7 +522,7 @@ void space_parts_sort(struct space *s, int *ind, int N, int min, int max) { for (int i = 0; i < space_sort_struct.stack_size; i++) space_sort_struct.stack[i].ready = 0; - // Add the first interval. + /* Add the first interval. */ space_sort_struct.stack[0].i = 0; space_sort_struct.stack[0].j = N - 1; space_sort_struct.stack[0].min = min; @@ -512,7 +532,7 @@ void space_parts_sort(struct space *s, int *ind, int N, int min, int max) { space_sort_struct.last = 1; space_sort_struct.waiting = 1; - // Launch the sorting tasks. + /* Launch the sorting tasks. */ engine_launch(s->e, s->e->nr_threads, (1 << task_type_psort), 0); /* Verify space_sort_struct. */ @@ -523,8 +543,12 @@ void space_parts_sort(struct space *s, int *ind, int N, int min, int max) { ind[i], min, max); message("Sorting succeeded."); */ - // Clean up. + /* Clean up. */ free(space_sort_struct.stack); + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } void space_do_parts_sort() { @@ -652,7 +676,8 @@ void space_do_parts_sort() { } /* main loop. */ } -void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) { +void space_gparts_sort(struct gpart *gparts, int *ind, int N, int min, + int max) { struct qstack { volatile int i, j, min, max; @@ -974,7 +999,7 @@ void space_map_cells_pre(struct space *s, int full, * @param c The #cell under consideration. */ -void space_split(struct space *s, struct cell *c) { +void space_do_split(struct space *s, struct cell *c) { int k, count = c->count, gcount = c->gcount, maxdepth = 0; float h, h_max = 0.0f; @@ -1025,7 +1050,7 @@ void space_split(struct space *s, struct cell *c) { space_recycle(s, c->progeny[k]); c->progeny[k] = NULL; } else { - space_split(s, c->progeny[k]); + space_do_split(s, c->progeny[k]); h_max = fmaxf(h_max, c->progeny[k]->h_max); ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); diff --git a/src/space.h b/src/space.h index 91d3d9a4d34aefcdcc52e774e6feb492577275b6..8069721a71b9f47664290b6e34eef726efdb47c8 100644 --- a/src/space.h +++ b/src/space.h @@ -121,8 +121,9 @@ struct parallel_sort { extern struct parallel_sort space_sort_struct; /* function prototypes. */ -void space_parts_sort(struct space *s, int *ind, int N, int min, int max); -void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max); +void space_parts_sort(struct space *s, int *ind, int N, int min, int max, + int verbose); +void space_gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max); struct cell *space_getcell(struct space *s); int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift); @@ -140,7 +141,8 @@ void space_map_cells_post(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data); void space_rebuild(struct space *s, double h_max, int verbose); void space_recycle(struct space *s, struct cell *c); -void space_split(struct space *s, struct cell *c); +void space_split(struct space *s, struct cell *cells, int verbose); +void space_do_split(struct space *s, struct cell *c); void space_do_parts_sort(); void space_link_cleanup(struct space *s); #endif /* SWIFT_SPACE_H */