diff --git a/.gitignore b/.gitignore index 123f413aea9986f353bfa3310b113b1219233fcb..4f81f60dd7d2fd85074759e4fcc4f3e0ee21b187 100644 --- a/.gitignore +++ b/.gitignore @@ -81,9 +81,12 @@ tests/testMatrixInversion tests/testCbrt theory/latex/swift.pdf -theory/SPH/kernel/kernels.pdf -theory/SPH/kernel/kernel_derivatives.pdf -theory/SPH/kernel/kernel_definitions.pdf +theory/SPH/Kernels/kernels.pdf +theory/SPH/Kernels/kernel_derivatives.pdf +theory/SPH/Kernels/kernel_definitions.pdf +theory/SPH/Flavours/sph_flavours.pdf +theory/SPH/EoS/eos.pdf +theory/SPH/*.pdf theory/paper_pasc/pasc_paper.pdf m4/libtool.m4 diff --git a/INSTALL.swift b/INSTALL.swift index a07d5b24c2d8c75778e2a24d90f77724459ab61f..8e1635b0715426512503fd9dcde32f59a7ad1b62 100644 --- a/INSTALL.swift +++ b/INSTALL.swift @@ -83,39 +83,65 @@ SWIFT depends on a number of third party libraries that should be available before you can build it. -HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and write -particle data. One of the commands "h5cc" or "h5pcc" should be -available. If "h5pcc" is located them a parallel HDF5 built for the version -of MPI located should be provided. If the command is not available then it -can be located using the "--with-hfd5" configure option. The value should -be the full path to the "h5cc" or "h5pcc" commands. + - HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and + write particle data. One of the commands "h5cc" or "h5pcc" + should be available. If "h5pcc" is located them a parallel + HDF5 built for the version of MPI located should be + provided. If the command is not available then it can be + located using the "--with-hfd5" configure option. The value + should be the full path to the "h5cc" or "h5pcc" commands. -MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE. -Before running configure the "mpirun" command should be available in the -shell. If your command isn't called "mpirun" then define the "MPIRUN" -environment variable, either in the shell or when running configure. + - MPI: to run on more than one node an MPI library that fully + supports MPI_THREAD_MULTIPLE. Before running configure the + "mpirun" command should be available in the shell. If your + command isn't called "mpirun" then define the "MPIRUN" + environment variable, either in the shell or when running + configure. -The MPI compiler can be controlled using the MPICC variable, much like -the CC one. Use this when your MPI compiler has a none-standard name. + The MPI compiler can be controlled using the MPICC variable, + much like the CC one. Use this when your MPI compiler has a + none-standard name. -METIS: a build of the METIS library can be optionally used to optimize the -load between MPI nodes (requires an MPI library). This should be found in -the standard installation directories, or pointed at using the -"--with-metis" configuration option. In this case the top-level -installation directory of the METIS build should be given. Note to use -METIS you should at least supply "--with-metis". + - libtool: The build system relies on libtool. -libNUMA: a build of the NUMA library can be used to pin the threads to -the physical core of the machine SWIFT is running on. This is not always -necessary as the OS scheduler may do a good job at distributing the threads -among the different cores on each computing node. + Optional Dependencies + ===================== -DOXYGEN: the doxygen library is required to create the SWIFT API -documentation. + - METIS: a build of the METIS library can be optionally used to + optimize the load between MPI nodes (requires an MPI + library). This should be found in the standard installation + directories, or pointed at using the "--with-metis" + configuration option. In this case the top-level + installation directory of the METIS build should be + given. Note to use METIS you should at least supply + "--with-metis". + + + - libNUMA: a build of the NUMA library can be used to pin the threads + to the physical core of the machine SWIFT is running + on. This is not always necessary as the OS scheduler may + do a good job at distributing the threads among the + different cores on each computing node. + + + - TCMalloc: a build of the TCMalloc library (part of gperftools) can + be used to obtain faster allocations than the standard C + malloc function part of glibc. The option "-with-tcmalloc" + should be passed to the configuration script to use it. + + + - gperftools: a build of gperftools can be used to obtain good + profiling of the code. The option "-with-profiler" + needs to be passed to the configuration script to use + it. + + + - DOXYGEN: the doxygen library is required to create the SWIFT API + documentation. diff --git a/configure.ac b/configure.ac index 977a86a03d5e4c82139dfd56d3f1ee7afe3ea6e8..f6e2ea0db8e9829b719a0190eea7a1d891bfbbd6 100644 --- a/configure.ac +++ b/configure.ac @@ -355,7 +355,7 @@ fi AC_SUBST([TCMALLOC_LIBS]) AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"]) -# Check for -lprofiler usually part of the gpreftools along with tcmalloc. +# Check for -lprofiler usually part of the gperftools along with tcmalloc. have_profiler="no" AC_ARG_WITH([profiler], [AS_HELP_STRING([--with-profiler], diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml index 80714d87f4afa7d7e4d41ce7bf56faed856208ef..cd7db4f86f24f4d78db29a3dfe34bc536af6ed7d 100644 --- a/examples/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_12/eagle_12.yml @@ -27,7 +27,7 @@ Statistics: SPH: resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. - max_smoothing_length: 0.1 # Maximal smoothing length allowed (in internal units). + max_smoothing_length: 1.0 # Maximal smoothing length allowed (in internal units). CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. # Parameters related to the initial conditions diff --git a/examples/ExternalPointMass/run.sh b/examples/ExternalPointMass/run.sh index 9f90ca395a5c8cf83e67928b3fdbd4d8529ac254..4ac513f09cb8ac8dcefc256a68478e215b8bc320 100755 --- a/examples/ExternalPointMass/run.sh +++ b/examples/ExternalPointMass/run.sh @@ -7,4 +7,4 @@ then python makeIC.py 10000 fi -../swift -g -t 2 externalPointMass.yml 2>&1 | tee output.log +../swift -g -t 1 externalPointMass.yml 2>&1 | tee output.log diff --git a/examples/IsothermalPotential/GravityOnly/README b/examples/IsothermalPotential/GravityOnly/README index 90fb1872aa2301cab133b8a20a8cd8de724d4553..1621aaa8ab90420dcf69b5b9caea394d619b62cf 100644 --- a/examples/IsothermalPotential/GravityOnly/README +++ b/examples/IsothermalPotential/GravityOnly/README @@ -1,6 +1,3 @@ -; -; this probelm generates a set of gravity particles in an isothermal -; potential and follows their orbits. Tests verify consdevation of -; energy and angular momentum -; -; +This example generates a set of particles in an isothermal potential +and follows their orbits. IDL scripts verify the conservation of +energy and angular momentum. diff --git a/examples/main.c b/examples/main.c index 775cadf85a600d0478ddae64d1947d124cae0512..06efe75b32f790b10d1125fb5010bc3d1b4f9e24 100644 --- a/examples/main.c +++ b/examples/main.c @@ -120,6 +120,10 @@ int main(int argc, char *argv[]) { error("MPI_Comm_size failed with error %i.", res); if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS) error("Call to MPI_Comm_rank failed with error %i.", res); + + /* Make sure messages are stamped with the correct rank. */ + engine_rank = myrank; + if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) != MPI_SUCCESS) error("Call to MPI_Comm_set_errhandler failed with error %i.", res); @@ -131,6 +135,7 @@ int main(int argc, char *argv[]) { message("WARNING: you should use the non-MPI version of this program."); } fflush(stdout); + #endif /* Let's pin the main thread */ @@ -262,12 +267,21 @@ int main(int argc, char *argv[]) { message( "Executing a dry run. No i/o or time integration will be performed."); - /* Report CPU frequency. */ + /* Report CPU frequency.*/ cpufreq = clocks_get_cpufreq(); if (myrank == 0) { message("CPU frequency used for tick conversion: %llu Hz", cpufreq); } +/* Report host name(s). */ +#ifdef WITH_MPI + if (myrank == 0 || verbose > 1) { + message("Rank %d running on: %s", myrank, hostname()); + } +#else + message("Running on: %s", hostname()); +#endif + /* Do we choke on FP-exceptions ? */ if (with_fp_exceptions) { feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); @@ -563,8 +577,8 @@ int main(int argc, char *argv[]) { fprintf(file_thread, " %03i 0 0 0 0 %lli %lli 0 0 0 0 %lli\n", myrank, e.tic_step, e.toc_step, cpufreq); int count = 0; - for (int l = 0; l < e.sched.nr_tasks; l++) - if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) { + for (int l = 0; l < e.sched.nr_tasks; l++) { + if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) { fprintf( file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %i\n", myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type, @@ -579,11 +593,10 @@ int main(int argc, char *argv[]) { (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->gcount : 0, e.sched.tasks[l].flags); - fflush(stdout); - count++; } - message("rank %d counted %d tasks", myrank, count); - + fflush(stdout); + count++; + } fclose(file_thread); } @@ -599,8 +612,8 @@ int main(int argc, char *argv[]) { /* Add some information to help with the plots */ fprintf(file_thread, " %i %i %i %i %lli %lli %i %i %i %lli\n", -2, -1, -1, 1, e.tic_step, e.toc_step, 0, 0, 0, cpufreq); - for (int l = 0; l < e.sched.nr_tasks; l++) - if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) + for (int l = 0; l < e.sched.nr_tasks; l++) { + if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) { fprintf( file_thread, " %i %i %i %i %lli %lli %i %i %i %i\n", e.sched.tasks[l].rid, e.sched.tasks[l].type, @@ -610,6 +623,8 @@ int main(int argc, char *argv[]) { (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count, (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->gcount, (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount); + } + } fclose(file_thread); #endif } diff --git a/m4/ax_cc_maxopt.m4 b/m4/ax_cc_maxopt.m4 index d2a0937232e0e8bf9c3e7e79c20267bfa1d75880..d5b87c903c5af46b767d7b49e40963345c8128af 100644 --- a/m4/ax_cc_maxopt.m4 +++ b/m4/ax_cc_maxopt.m4 @@ -128,6 +128,8 @@ if test "$ac_test_CFLAGS" != "set"; then *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*|*4?6[[ef]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;; *000?f[[346]]?:*:*:*|?f[[346]]?:*:*:*|f[[346]]?:*:*:*) icc_flags="-xSSE3 -xP -xO -xN -xW -xK" ;; *00??f??:*:*:*|??f??:*:*:*|?f??:*:*:*|f??:*:*:*) icc_flags="-xSSE2 -xN -xW -xK" ;; + *5?6E?:*:*:*) icc_flags="-xCORE-AVX512" ;; + *5?67?:*:*:*) icc_flags="-xMIC-AVX512" ;; esac ;; esac ;; esac diff --git a/src/Makefile.am b/src/Makefile.am index f57246bb588914280b7812478ad4b23a3ecd1f9c..34e6e0ea2617474bb3c29482c1779a7af81de2e0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -44,7 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \ physical_constants.h physical_constants_cgs.h potential.h version.h \ hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \ - sourceterms_struct.h cbrt.h + sourceterms_struct.h statistics.h cbrt.h # Common source files @@ -53,7 +53,8 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ physical_constants.c potential.c hydro_properties.c \ - runner_doiact_fft.c threadpool.c cooling.c sourceterms.c + runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \ + statistics.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h cbrt.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/cbrt.h b/src/cbrt.h index 99ec97503d02c2cc5bd3cbd4ae191a0ab61a9f07..2686caa90e7a18dae571e7b8381adbaa8cd63e60 100644 --- a/src/cbrt.h +++ b/src/cbrt.h @@ -68,11 +68,9 @@ __attribute__((always_inline)) inline float icbrtf(float x_in) { exponent_new = -exponent_new / 3; const int exponent_rem = exponent + 3 * exponent_new; cast.as_uint = (exponent_new + 127) << 23; - float exponent_scale = cast.as_float; - exponent_scale *= - exponent_rem > 0 - ? (exponent_rem > 1 ? 5.61231024154687e-01f : 7.07106781186548e-01f) - : 8.90898718140339e-01f; + static const float scale[3] = {8.90898718140339e-01f, 7.07106781186548e-01f, + 5.61231024154687e-01f}; + const float exponent_scale = cast.as_float * scale[exponent_rem]; // Scale the result and set the correct sign. res = copysignf(res * exponent_scale, x_in); diff --git a/src/cell.c b/src/cell.c index 8728f32cb04f2f1de389386b632371a27d523bb9..a54906d667dd312fd35a7584047113a45183b46c 100644 --- a/src/cell.c +++ b/src/cell.c @@ -52,6 +52,7 @@ #include "gravity.h" #include "hydro.h" #include "hydro_properties.h" +#include "scheduler.h" #include "space.h" #include "timers.h" @@ -763,6 +764,9 @@ void cell_clean_links(struct cell *c, void *data) { c->force = NULL; c->nr_force = 0; + + c->grav = NULL; + c->nr_grav = 0; } /** @@ -823,6 +827,7 @@ void cell_check_multipole(struct cell *c, void *data) { error("Multipole CoM are different (%12.15e vs. %12.15e", ma.CoM[k], mb.CoM[k]); +#if const_gravity_multipole_order >= 2 if (fabsf(ma.I_xx - mb.I_xx) / fabsf(ma.I_xx + mb.I_xx) > 1e-5 && ma.I_xx > 1e-9) error("Multipole I_xx are different (%12.15e vs. %12.15e)", ma.I_xx, @@ -847,6 +852,7 @@ void cell_check_multipole(struct cell *c, void *data) { ma.I_yz > 1e-9) error("Multipole I_yz are different (%12.15e vs. %12.15e)", ma.I_yz, mb.I_yz); +#endif } } @@ -892,3 +898,137 @@ int cell_is_drift_needed(struct cell *c, int ti_current) { /* No neighbouring cell has active particles. Drift not necessary */ return 0; } + +/** + * @brief Un-skips all the tasks associated with a given cell and checks + * if the space needs to be rebuilt. + * + * @param c the #cell. + * @param s the #scheduler. + * + * @return 1 If the space needs rebuilding. 0 otherwise. + */ +int cell_unskip_tasks(struct cell *c, struct scheduler *s) { + + /* Un-skip the density tasks involved with this cell. */ + for (struct link *l = c->density; l != NULL; l = l->next) { + struct task *t = l->t; + const struct cell *ci = t->ci; + const struct cell *cj = t->cj; + scheduler_activate(s, t); + + /* Set the correct sorting flags */ + if (t->type == task_type_pair) { + if (!(ci->sorted & (1 << t->flags))) { + atomic_or(&ci->sorts->flags, (1 << t->flags)); + scheduler_activate(s, ci->sorts); + } + if (!(cj->sorted & (1 << t->flags))) { + atomic_or(&cj->sorts->flags, (1 << t->flags)); + scheduler_activate(s, cj->sorts); + } + } + + /* Check whether there was too much particle motion */ + if (t->type == task_type_pair || t->type == task_type_sub_pair) { + if (t->tight && + (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || + ci->dx_max > space_maxreldx * ci->h_max || + cj->dx_max > space_maxreldx * cj->h_max)) + return 1; + +#ifdef WITH_MPI + /* Activate the send/recv flags. */ + if (ci->nodeID != engine_rank) { + + /* Activate the tasks to recv foreign cell ci's data. */ + scheduler_activate(s, ci->recv_xv); + scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->recv_ti); + + /* Look for the local cell cj's send tasks. */ + struct link *l = NULL; + for (l = cj->send_xv; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_xv task."); + scheduler_activate(s, l->t); + + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + + } else if (cj->nodeID != engine_rank) { + + /* Activate the tasks to recv foreign cell cj's data. */ + scheduler_activate(s, cj->recv_xv); + scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->recv_ti); + /* Look for the local cell ci's send tasks. */ + struct link *l = NULL; + for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_xv task."); + scheduler_activate(s, l->t); + + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } +#endif + } + } + + /* Unskip all the other task types. */ + for (struct link *l = c->gradient; l != NULL; l = l->next) + scheduler_activate(s, l->t); + for (struct link *l = c->force; l != NULL; l = l->next) + scheduler_activate(s, l->t); + for (struct link *l = c->grav; l != NULL; l = l->next) + scheduler_activate(s, l->t); + if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); + if (c->ghost != NULL) scheduler_activate(s, c->ghost); + if (c->init != NULL) scheduler_activate(s, c->init); + if (c->kick != NULL) scheduler_activate(s, c->kick); + if (c->cooling != NULL) scheduler_activate(s, c->cooling); + if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); + + return 0; +} + +/** + * @brief Set the super-cell pointers for all cells in a hierarchy. + * + * @param c The top-level #cell to play with. + * @param super Pointer to the deepest cell with tasks in this part of the tree. + */ +void cell_set_super(struct cell *c, struct cell *super) { + + /* Are we in a cell with some kind of self/pair task ? */ + if (super == NULL && c->nr_tasks > 0) super = c; + + /* Set the super-cell */ + c->super = super; + + /* Recurse */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) cell_set_super(c->progeny[k], super); +} diff --git a/src/cell.h b/src/cell.h index f87420c86c5765f42fe04e8dee95558edff90cf9..ae22de0cb268bb5240c480b2eed9435ec218adf8 100644 --- a/src/cell.h +++ b/src/cell.h @@ -38,6 +38,7 @@ /* Avoid cyclic inclusions */ struct space; +struct scheduler; /* Max tag size set to 2^29 to take into account some MPI implementations * that use 2^31 as the upper bound on MPI tags and the fact that @@ -130,14 +131,9 @@ struct cell { /* Parent cell. */ struct cell *parent; - /* Super cell, i.e. the highest-level supercell that has hydro interactions. - */ + /* Super cell, i.e. the highest-level supercell that has pair/self tasks */ struct cell *super; - /* Super cell, i.e. the highest-level supercell that has gravity interactions. - */ - struct cell *gsuper; - /* The task computing this cell's sorts. */ struct task *sorts; int sortsize; @@ -162,9 +158,6 @@ struct cell { /* Tasks for gravity tree. */ struct task *grav_up, *grav_down; - /* Task for external gravity */ - struct task *grav_external; - /* Task for cooling */ struct task *cooling; @@ -183,12 +176,6 @@ struct cell { /* ID of the previous owner, e.g. runner. */ int owner; - /* Momentum of particles in cell. */ - double mom[3], ang_mom[3]; - - /* Mass, potential, internal and kinetic energy of particles in this cell. */ - double mass, e_pot, e_int, e_kin, e_rad, entropy; - /* Number of particles updated in this cell. */ int updated, g_updated; @@ -240,5 +227,7 @@ int cell_are_neighbours(const struct cell *restrict ci, void cell_check_multipole(struct cell *c, void *data); void cell_clean(struct cell *c); int cell_is_drift_needed(struct cell *c, int ti_current); +int cell_unskip_tasks(struct cell *c, struct scheduler *s); +void cell_set_super(struct cell *c, struct cell *super); #endif /* SWIFT_CELL_H */ diff --git a/src/const.h b/src/const.h index ef26525b7865bac4c0164f7678398ca307fc1d67..11ee65bfdfabbf43962c721b8599928c25fef7cc 100644 --- a/src/const.h +++ b/src/const.h @@ -85,7 +85,7 @@ #define SLOPE_LIMITER_CELL_WIDE /* Self gravity stuff. */ -#define const_gravity_multipole_order 2 +#define const_gravity_multipole_order 1 #define const_gravity_a_smooth 1.25f #define const_gravity_r_cut 4.5f #define const_gravity_eta 0.025f diff --git a/src/engine.c b/src/engine.c index 8de8bfe7116367502142df7d4bb0776c8bd2be4b..6f360907a35d4d8c37e0f9ee33bbefa7b3a1978f 100644 --- a/src/engine.c +++ b/src/engine.c @@ -63,6 +63,7 @@ #include "runner.h" #include "serial_io.h" #include "single_io.h" +#include "statistics.h" #include "timers.h" #include "tools.h" #include "units.h" @@ -111,119 +112,57 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { res->next = atomic_swap(l, res); } -/** - * @brief Generate the gravity hierarchical tasks for a hierarchy of cells - - * i.e. all the O(Npart) tasks. - * - * Tasks are only created here. The dependencies will be added later on. - * - * @param e The #engine. - * @param c The #cell. - * @param gsuper The gsuper #cell. - */ -void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, - struct cell *gsuper) { - - struct scheduler *s = &e->sched; - const int is_with_external_gravity = - (e->policy & engine_policy_external_gravity) == - engine_policy_external_gravity; - const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt; - - /* Is this the super-cell? */ - if (gsuper == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) { - - /* This is the super cell, i.e. the first with gravity tasks attached. */ - gsuper = c; - - /* Local tasks only... */ - if (c->nodeID == e->nodeID) { - - /* Add the init task. */ - if (c->init == NULL) - c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, - c, NULL, 0); - - /* Add the kick task that matches the policy. */ - if (is_fixdt) { - if (c->kick == NULL) - c->kick = scheduler_addtask(s, task_type_kick_fixdt, - task_subtype_none, 0, 0, c, NULL, 0); - } else { - if (c->kick == NULL) - c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, - 0, c, NULL, 0); - } - - if (is_with_external_gravity) - c->grav_external = scheduler_addtask( - s, task_type_grav_external, task_subtype_none, 0, 0, c, NULL, 0); - } - } - - /* Set the super-cell. */ - c->gsuper = gsuper; - - /* Recurse. */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_make_gravity_hierarchical_tasks(e, c->progeny[k], gsuper); -} - /** * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - * i.e. all the O(Npart) tasks. * * Tasks are only created here. The dependencies will be added later on. * + * Note that there is no need to recurse below the super-cell. + * * @param e The #engine. * @param c The #cell. - * @param super The super #cell. */ -void engine_make_hydro_hierarchical_tasks(struct engine *e, struct cell *c, - struct cell *super) { +void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { struct scheduler *s = &e->sched; - const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt; - const int is_with_cooling = - (e->policy & engine_policy_cooling) == engine_policy_cooling; - const int is_with_sourceterms = - (e->policy & engine_policy_sourceterms) == engine_policy_sourceterms; - - /* Is this the super-cell? */ - if (super == NULL && (c->density != NULL || (c->count > 0 && !c->split))) { + const int is_fixdt = (e->policy & engine_policy_fixdt); + const int is_hydro = (e->policy & engine_policy_hydro); + const int is_with_cooling = (e->policy & engine_policy_cooling); + const int is_with_sourceterms = (e->policy & engine_policy_sourceterms); - /* This is the super cell, i.e. the first with density tasks attached. */ - super = c; + /* Are we in a super-cell ? */ + if (c->super == c) { /* Local tasks only... */ if (c->nodeID == e->nodeID) { /* Add the init task. */ - if (c->init == NULL) - c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, - c, NULL, 0); + c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c, + NULL, 0); /* Add the kick task that matches the policy. */ if (is_fixdt) { - if (c->kick == NULL) - c->kick = scheduler_addtask(s, task_type_kick_fixdt, - task_subtype_none, 0, 0, c, NULL, 0); + c->kick = scheduler_addtask(s, task_type_kick_fixdt, task_subtype_none, + 0, 0, c, NULL, 0); } else { - if (c->kick == NULL) - c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, - 0, c, NULL, 0); + c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, 0, + c, NULL, 0); } /* Generate the ghost task. */ - c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, - c, NULL, 0); + if (is_hydro) + c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, + 0, c, NULL, 0); + #ifdef EXTRA_HYDRO_LOOP /* Generate the extra ghost task. */ - c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost, - task_subtype_none, 0, 0, c, NULL, 0); + if (is_hydro) + c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost, + task_subtype_none, 0, 0, c, NULL, 0); #endif + + /* Cooling task */ if (is_with_cooling) c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none, 0, 0, c, NULL, 0); @@ -232,16 +171,19 @@ void engine_make_hydro_hierarchical_tasks(struct engine *e, struct cell *c, c->sourceterms = scheduler_addtask(s, task_type_sourceterms, task_subtype_none, 0, 0, c, NULL, 0); } - } - /* Set the super-cell. */ - c->super = super; + } else { /* We are above the super-cell so need to go deeper */ - /* Recurse. */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_make_hydro_hierarchical_tasks(e, c->progeny[k], super); +#ifdef SWIFT_DEBUG_CHECKS + if (c->super != NULL) error("Incorrectly set super pointer"); +#endif + + /* Recurse. */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_hierarchical_tasks(e, c->progeny[k]); + } } /** @@ -1289,6 +1231,30 @@ void engine_make_gravity_tasks(struct engine *e) { } } +void engine_make_external_gravity_tasks(struct engine *e) { + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + struct cell *cells = s->cells_top; + const int nr_cells = s->nr_cells; + + for (int cid = 0; cid < nr_cells; ++cid) { + + struct cell *ci = &cells[cid]; + + /* Skip cells without gravity particles */ + if (ci->gcount == 0) continue; + + /* Is that neighbour local ? */ + if (ci->nodeID != nodeID) continue; + + /* If the cell is local, build a self-interaction */ + scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0, + ci, NULL, 0); + } +} + /** * @brief Constructs the top-level pair tasks for the first hydro loop over * neighbours @@ -1364,59 +1330,101 @@ void engine_make_hydroloop_tasks(struct engine *e) { /** * @brief Counts the tasks associated with one cell and constructs the links * - * For each hydrodynamic task, construct the links with the corresponding cell. - * Similarly, construct the dependencies for all the sorting tasks. + * For each hydrodynamic and gravity task, construct the links with + * the corresponding cell. Similarly, construct the dependencies for + * all the sorting tasks. * * @param e The #engine. */ void engine_count_and_link_tasks(struct engine *e) { - struct scheduler *sched = &e->sched; - - for (int ind = 0; ind < sched->nr_tasks; ind++) { + struct scheduler *const sched = &e->sched; + const int nr_tasks = sched->nr_tasks; - struct task *t = &sched->tasks[ind]; + for (int ind = 0; ind < nr_tasks; ind++) { - if (t->skip) continue; + struct task *const t = &sched->tasks[ind]; + struct cell *const ci = t->ci; + struct cell *const cj = t->cj; /* Link sort tasks together. */ - if (t->type == task_type_sort && t->ci->split) + if (t->type == task_type_sort && ci->split) for (int j = 0; j < 8; j++) - if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) { - t->ci->progeny[j]->sorts->skip = 0; - scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t); + if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) { + scheduler_addunlock(sched, ci->progeny[j]->sorts, t); } - /* Link density tasks to cells. */ + /* Link self tasks to cells. */ if (t->type == task_type_self) { - atomic_inc(&t->ci->nr_tasks); + atomic_inc(&ci->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + } + if (t->subtype == task_subtype_external_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + } + + /* Link pair tasks to cells. */ } else if (t->type == task_type_pair) { - atomic_inc(&t->ci->nr_tasks); - atomic_inc(&t->cj->nr_tasks); + atomic_inc(&ci->nr_tasks); + atomic_inc(&cj->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - engine_addlink(e, &t->cj->density, t); - atomic_inc(&t->cj->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + engine_addlink(e, &cj->density, t); + atomic_inc(&cj->nr_density); } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); + } + + /* Link sub-self tasks to cells. */ } else if (t->type == task_type_sub_self) { - atomic_inc(&t->ci->nr_tasks); + atomic_inc(&ci->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + } + if (t->subtype == task_subtype_external_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); } + + /* Link sub-pair tasks to cells. */ } else if (t->type == task_type_sub_pair) { - atomic_inc(&t->ci->nr_tasks); - atomic_inc(&t->cj->nr_tasks); + atomic_inc(&ci->nr_tasks); + atomic_inc(&cj->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - engine_addlink(e, &t->cj->density, t); - atomic_inc(&t->cj->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + engine_addlink(e, &cj->density, t); + atomic_inc(&cj->nr_density); + } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); + } + if (t->subtype == task_subtype_external_grav) { + error("Found a sub-pair/external-gravity task..."); + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); } } } @@ -1434,11 +1442,27 @@ static inline void engine_make_gravity_dependencies(struct scheduler *sched, struct cell *c) { /* init --> gravity --> kick */ - scheduler_addunlock(sched, c->gsuper->init, gravity); - scheduler_addunlock(sched, gravity, c->gsuper->kick); + scheduler_addunlock(sched, c->super->init, gravity); + scheduler_addunlock(sched, gravity, c->super->kick); /* grav_up --> gravity ( --> kick) */ - scheduler_addunlock(sched, c->gsuper->grav_up, gravity); + scheduler_addunlock(sched, c->super->grav_up, gravity); +} + +/** + * @brief Creates the dependency network for the external gravity tasks of a + * given cell. + * + * @param sched The #scheduler. + * @param gravity The gravity task to link. + * @param c The cell. + */ +static inline void engine_make_external_gravity_dependencies( + struct scheduler *sched, struct task *gravity, struct cell *c) { + + /* init --> external gravity --> kick */ + scheduler_addunlock(sched, c->super->init, gravity); + scheduler_addunlock(sched, gravity, c->super->kick); } /** @@ -1467,9 +1491,6 @@ void engine_link_gravity_tasks(struct engine *e) { /* Get a pointer to the task. */ struct task *t = &sched->tasks[k]; - /* Skip? */ - if (t->skip) continue; - /* Multipole construction */ if (t->type == task_type_grav_up) { scheduler_addunlock(sched, t, gather); @@ -1480,19 +1501,27 @@ void engine_link_gravity_tasks(struct engine *e) { /* Gather the multipoles --> mm interaction --> kick */ scheduler_addunlock(sched, gather, t); - scheduler_addunlock(sched, t, t->ci->gsuper->kick); + scheduler_addunlock(sched, t, t->ci->super->kick); /* init --> mm interaction */ - scheduler_addunlock(sched, t->ci->gsuper->init, t); + scheduler_addunlock(sched, t->ci->super->init, t); } - /* Self-interaction? */ + /* Self-interaction for self-gravity? */ if (t->type == task_type_self && t->subtype == task_subtype_grav) { engine_make_gravity_dependencies(sched, t, t->ci); } + /* Self-interaction for external gravity ? */ + else if (t->type == task_type_self && + t->subtype == task_subtype_external_grav) { + + engine_make_external_gravity_dependencies(sched, t, t->ci); + + } + /* Otherwise, pair interaction? */ else if (t->type == task_type_pair && t->subtype == task_subtype_grav) { @@ -1501,7 +1530,7 @@ void engine_link_gravity_tasks(struct engine *e) { engine_make_gravity_dependencies(sched, t, t->ci); } - if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) { + if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { engine_make_gravity_dependencies(sched, t, t->cj); } @@ -1516,6 +1545,15 @@ void engine_link_gravity_tasks(struct engine *e) { } } + /* Sub-self-interaction for external gravity ? */ + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_external_grav) { + + if (t->ci->nodeID == nodeID) { + engine_make_external_gravity_dependencies(sched, t, t->ci); + } + } + /* Otherwise, sub-pair interaction? */ else if (t->type == task_type_sub_pair && t->subtype == task_subtype_grav) { @@ -1523,7 +1561,7 @@ void engine_link_gravity_tasks(struct engine *e) { engine_make_gravity_dependencies(sched, t, t->ci); } - if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) { + if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { engine_make_gravity_dependencies(sched, t, t->cj); } @@ -1600,9 +1638,6 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { for (int ind = 0; ind < nr_tasks; ind++) { struct task *t = &sched->tasks[ind]; - /* Skip? */ - if (t->skip) continue; - /* Self-interaction? */ if (t->type == task_type_self && t->subtype == task_subtype_density) { @@ -1790,12 +1825,6 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { } #endif } - - /* External gravity tasks should depend on init and unlock the kick */ - else if (t->type == task_type_grav_external) { - scheduler_addunlock(sched, t->ci->init, t); - scheduler_addunlock(sched, t, t->ci->kick); - } /* Cooling tasks should depend on kick and unlock sourceterms */ else if (t->type == task_type_cooling) { scheduler_addunlock(sched, t->ci->kick, t); @@ -1871,17 +1900,26 @@ void engine_maketasks(struct engine *e) { /* Add the gravity mm tasks. */ if (e->policy & engine_policy_self_gravity) engine_make_gravity_tasks(e); + /* Add the external gravity tasks. */ + if (e->policy & engine_policy_external_gravity) + engine_make_external_gravity_tasks(e); + + if (e->sched.nr_tasks == 0 && (s->nr_gparts > 0 || s->nr_parts > 0)) + error("We have particles but no hydro or gravity tasks were created."); + /* Split the tasks. */ scheduler_splittasks(sched); - /* Allocate the list of cell-task links. The maximum number of links - is the number of cells (s->tot_cells) times the number of neighbours (27) - times the number of interaction types (2, density and force). */ + /* Allocate the list of cell-task links. The maximum number of links is the + * number of cells (s->tot_cells) times the number of neighbours (26) times + * the number of interaction types, so 26 * 3 (density, force, grav) pairs + * and 4 (density, force, grav, ext_grav) self. + */ if (e->links != NULL) free(e->links); #ifdef EXTRA_HYDRO_LOOP - e->size_links = s->tot_cells * 27 * 3; + e->size_links = s->tot_cells * (26 * 4 + 4); #else - e->size_links = s->tot_cells * 27 * 2; + e->size_links = s->tot_cells * (26 * 3 + 4); #endif if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL) error("Failed to allocate cell-task links."); @@ -1894,25 +1932,24 @@ void engine_maketasks(struct engine *e) { /* Count the number of tasks associated with each cell and store the density tasks in each cell, and make each sort depend on the sorts of its progeny. */ - if (e->policy & engine_policy_hydro) engine_count_and_link_tasks(e); + engine_count_and_link_tasks(e); - /* Append hierarchical tasks to each cells */ - if (e->policy & engine_policy_hydro) - for (int k = 0; k < nr_cells; k++) - engine_make_hydro_hierarchical_tasks(e, &cells[k], NULL); + /* Now that the self/pair tasks are at the right level, set the super + * pointers. */ + for (int k = 0; k < nr_cells; k++) cell_set_super(&cells[k], NULL); - if ((e->policy & engine_policy_self_gravity) || - (e->policy & engine_policy_external_gravity)) - for (int k = 0; k < nr_cells; k++) - engine_make_gravity_hierarchical_tasks(e, &cells[k], NULL); + /* Append hierarchical tasks to each cells */ + for (int k = 0; k < nr_cells; k++) + engine_make_hierarchical_tasks(e, &cells[k]); /* Run through the tasks and make force tasks for each density task. Each force task depends on the cell ghosts and unlocks the kick task of its super-cell. */ if (e->policy & engine_policy_hydro) engine_make_extra_hydroloop_tasks(e); - /* Add the dependencies for the self-gravity stuff */ - if (e->policy & engine_policy_self_gravity) engine_link_gravity_tasks(e); + /* Add the dependencies for the gravity stuff */ + if (e->policy & (engine_policy_self_gravity | engine_policy_external_gravity)) + engine_link_gravity_tasks(e); #ifdef WITH_MPI @@ -1957,7 +1994,7 @@ void engine_maketasks(struct engine *e) { } /** - * @brief Mark tasks to be skipped and set the sort flags accordingly. + * @brief Mark tasks to be un-skipped and set the sort flags accordingly. * Threadpool mapper function for fixdt version. * * @param map_data pointer to the tasks @@ -1968,11 +2005,15 @@ void engine_marktasks_fixdt_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the arguments. */ struct task *tasks = (struct task *)map_data; - int *rebuild_space = (int *)extra_data; + size_t *rebuild_space = &((size_t *)extra_data)[0]; + struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[1]); for (int ind = 0; ind < num_elements; ind++) { struct task *t = &tasks[ind]; + /* All tasks are unskipped (we skip by default). */ + scheduler_activate(s, t); + /* Pair? */ if (t->type == task_type_pair || t->type == task_type_sub_pair) { @@ -1999,28 +2040,7 @@ void engine_marktasks_fixdt_mapper(void *map_data, int num_elements, } /** - * @brief Mark any sort tasks as initially skipped. - * Threadpool mapper function. - * - * @param map_data pointer to the tasks - * @param num_elements number of tasks - * @param extra_data unused - */ -void engine_marktasks_sorts_mapper(void *map_data, int num_elements, - void *extra_data) { - /* Unpack the arguments. */ - struct task *tasks = (struct task *)map_data; - for (int ind = 0; ind < num_elements; ind++) { - struct task *t = &tasks[ind]; - if (t->type == task_type_sort) { - t->flags = 0; - t->skip = 1; - } - } -} - -/** - * @brief Mark tasks to be skipped and set the sort flags accordingly. + * @brief Mark tasks to be un-skipped and set the sort flags accordingly. * Threadpool mapper function. * * @param map_data pointer to the tasks @@ -2031,18 +2051,20 @@ void engine_marktasks_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the arguments. */ struct task *tasks = (struct task *)map_data; - const int ti_end = ((int *)extra_data)[0]; - int *rebuild_space = &((int *)extra_data)[1]; + const int ti_end = ((size_t *)extra_data)[0]; + size_t *rebuild_space = &((size_t *)extra_data)[1]; + struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]); for (int ind = 0; ind < num_elements; ind++) { struct task *t = &tasks[ind]; /* Single-cell task? */ if (t->type == task_type_self || t->type == task_type_ghost || - t->type == task_type_sub_self) { + t->type == task_type_extra_ghost || t->type == task_type_cooling || + t->type == task_type_sourceterms || t->type == task_type_sub_self) { /* Set this task's skip. */ - t->skip = (t->ci->ti_end_min > ti_end); + if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); } /* Pair? */ @@ -2059,19 +2081,24 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cj->dx_max > space_maxreldx * cj->h_max)) *rebuild_space = 1; - /* Set this task's skip. */ - if ((t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end)) == 1) + /* Set this task's skip, otherwise nothing to do. */ + if (ci->ti_end_min <= ti_end || cj->ti_end_min <= ti_end) + scheduler_activate(s, t); + else continue; + /* If this is not a density task, we don't have to do any of the below. */ + if (t->subtype != task_subtype_density) continue; + /* Set the sort flags. */ - if (t->type == task_type_pair && t->subtype != task_subtype_grav) { + if (t->type == task_type_pair) { if (!(ci->sorted & (1 << t->flags))) { atomic_or(&ci->sorts->flags, (1 << t->flags)); - ci->sorts->skip = 0; + scheduler_activate(s, ci->sorts); } if (!(cj->sorted & (1 << t->flags))) { atomic_or(&cj->sorts->flags, (1 << t->flags)); - cj->sorts->skip = 0; + scheduler_activate(s, cj->sorts); } } @@ -2081,9 +2108,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (ci->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell ci's data. */ - ci->recv_xv->skip = 0; - ci->recv_rho->skip = 0; - ci->recv_ti->skip = 0; + scheduler_activate(s, ci->recv_xv); + scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->recv_ti); /* Look for the local cell cj's send tasks. */ struct link *l = NULL; @@ -2091,45 +2118,46 @@ void engine_marktasks_mapper(void *map_data, int num_elements, l = l->next) ; if (l == NULL) error("Missing link to send_xv task."); - l->t->skip = 0; + scheduler_activate(s, l->t); for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_rho task."); - l->t->skip = 0; + scheduler_activate(s, l->t); for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_ti task."); - l->t->skip = 0; + scheduler_activate(s, l->t); } else if (cj->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell cj's data. */ - cj->recv_xv->skip = 0; - cj->recv_rho->skip = 0; - cj->recv_ti->skip = 0; + scheduler_activate(s, cj->recv_xv); + scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->recv_ti); + /* Look for the local cell ci's send tasks. */ struct link *l = NULL; for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_xv task."); - l->t->skip = 0; + scheduler_activate(s, l->t); for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_rho task."); - l->t->skip = 0; + scheduler_activate(s, l->t); for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_ti task."); - l->t->skip = 0; + scheduler_activate(s, l->t); } #endif @@ -2137,25 +2165,26 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Kick? */ else if (t->type == task_type_kick) { - t->skip = (t->ci->ti_end_min > ti_end); t->ci->updated = 0; t->ci->g_updated = 0; + if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); } /* Init? */ else if (t->type == task_type_init) { - /* Set this task's skip. */ - t->skip = (t->ci->ti_end_min > ti_end); + if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); } - /* None? */ - else if (t->type == task_type_none) - t->skip = 1; + /* Tasks with no cells should not be skipped? */ + else if (t->type == task_type_grav_gather_m || + t->type == task_type_grav_fft) { + scheduler_activate(s, t); + } } } /** - * @brief Mark tasks to be skipped and set the sort flags accordingly. + * @brief Mark tasks to be un-skipped and set the sort flags accordingly. * * @return 1 if the space has to be rebuilt, 0 otherwise. */ @@ -2169,31 +2198,16 @@ int engine_marktasks(struct engine *e) { if (e->policy & engine_policy_fixdt) { /* Run through the tasks and mark as skip or not. */ + size_t extra_data[2] = {rebuild_space, (size_t)&e->sched}; threadpool_map(&e->threadpool, engine_marktasks_fixdt_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), 1000, &rebuild_space); + s->nr_tasks, sizeof(struct task), 1000, extra_data); return rebuild_space; /* Multiple-timestep case */ } else { /* Run through the tasks and mark as skip or not. */ - int extra_data[2] = {e->ti_current, rebuild_space}; - threadpool_map(&e->threadpool, engine_marktasks_sorts_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), 10000, NULL); - -#ifdef WITH_MPI - if (e->policy & engine_policy_mpi) { - - /* Skip all sends and recvs, we will unmark if needed. */ - for (int k = 0; k < s->nr_tasks; k++) { - struct task *t = &s->tasks[k]; - if (t->type == task_type_send || t->type == task_type_recv) { - t->skip = 1; - } - } - } -#endif - + size_t extra_data[3] = {e->ti_current, rebuild_space, (size_t)&e->sched}; threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks, sizeof(struct task), 10000, extra_data); rebuild_space = extra_data[1]; @@ -2214,16 +2228,21 @@ int engine_marktasks(struct engine *e) { */ void engine_print_task_counts(struct engine *e) { - struct scheduler *sched = &e->sched; + const ticks tic = getticks(); + struct scheduler *const sched = &e->sched; + const int nr_tasks = sched->nr_tasks; + const struct task *const tasks = sched->tasks; /* Count and print the number of each task type. */ int counts[task_type_count + 1]; for (int k = 0; k <= task_type_count; k++) counts[k] = 0; - for (int k = 0; k < sched->nr_tasks; k++) - if (!sched->tasks[k].skip) - counts[(int)sched->tasks[k].type] += 1; - else + for (int k = 0; k < nr_tasks; k++) { + if (tasks[k].skip) counts[task_type_count] += 1; + else + counts[(int)tasks[k].type] += 1; + } + message("Total = %d", nr_tasks); #ifdef WITH_MPI printf("[%04i] %s engine_print_task_counts: task counts are [ %s=%i", e->nodeID, clocks_get_timesincestart(), taskID_names[0], counts[0]); @@ -2237,6 +2256,10 @@ void engine_print_task_counts(struct engine *e) { fflush(stdout); message("nr_parts = %zu.", e->s->nr_parts); message("nr_gparts = %zu.", e->s->nr_gparts); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -2289,7 +2312,7 @@ void engine_prepare(struct engine *e, int nodrift) { TIMER_TIC; /* Run through the tasks and mark as skip or not. */ - int rebuild = (e->forcerebuild || engine_marktasks(e)); + int rebuild = e->forcerebuild; /* Collect the values of rebuild from all nodes. */ #ifdef WITH_MPI @@ -2399,6 +2422,10 @@ void engine_collect_kick(struct cell *c) { ti_end_min = min(ti_end_min, cp->ti_end_min); updated += cp->updated; g_updated += cp->g_updated; + + /* Collected, so clear for next time. */ + cp->updated = 0; + cp->g_updated = 0; } } } @@ -2434,6 +2461,10 @@ void engine_collect_timestep(struct engine *e) { ti_end_min = min(ti_end_min, c->ti_end_min); updates += c->updated; g_updates += c->g_updated; + + /* Collected, so clear for next time. */ + c->updated = 0; + c->g_updated = 0; } /* Aggregate the data from the different nodes. */ @@ -2476,76 +2507,28 @@ void engine_collect_timestep(struct engine *e) { void engine_print_stats(struct engine *e) { const ticks tic = getticks(); - const struct space *s = e->s; - double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, e_rad = 0.0; - double entropy = 0.0, mass = 0.0; - double mom[3] = {0.0, 0.0, 0.0}, ang_mom[3] = {0.0, 0.0, 0.0}; + struct statistics stats; + stats_init(&stats); - /* Collect the cell data. */ - for (int k = 0; k < s->nr_cells; k++) - if (s->cells_top[k].nodeID == e->nodeID) { - struct cell *c = &s->cells_top[k]; - mass += c->mass; - e_kin += c->e_kin; - e_int += c->e_int; - e_pot += c->e_pot; - e_rad += c->e_rad; - entropy += c->entropy; - mom[0] += c->mom[0]; - mom[1] += c->mom[1]; - mom[2] += c->mom[2]; - ang_mom[0] += c->ang_mom[0]; - ang_mom[1] += c->ang_mom[1]; - ang_mom[2] += c->ang_mom[2]; - } + /* Collect the stats on this node */ + stats_collect(e->s, &stats); /* Aggregate the data from the different nodes. */ #ifdef WITH_MPI - { - double in[12] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}; - double out[12]; - out[0] = e_kin; - out[1] = e_int; - out[2] = e_pot; - out[3] = e_rad; - out[4] = mom[0]; - out[5] = mom[1]; - out[6] = mom[2]; - out[7] = ang_mom[0]; - out[8] = ang_mom[1]; - out[9] = ang_mom[2]; - out[10] = mass; - out[11] = entropy; - if (MPI_Reduce(out, in, 11, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD) != - MPI_SUCCESS) - error("Failed to aggregate stats."); - e_kin = out[0]; - e_int = out[1]; - e_pot = out[2]; - e_rad = out[3]; - mom[0] = out[4]; - mom[1] = out[5]; - mom[2] = out[6]; - ang_mom[0] = out[7]; - ang_mom[1] = out[8]; - ang_mom[2] = out[9]; - mass = out[10]; - entropy = out[11]; - } -#endif + struct statistics global_stats; + stats_init(&global_stats); - const double e_tot = e_kin + e_int + e_pot; + if (MPI_Reduce(&stats, &global_stats, 1, statistics_mpi_type, + statistics_mpi_reduce_op, 0, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate stats."); +#else + struct statistics global_stats = stats; +#endif /* Print info */ - if (e->nodeID == 0) { - fprintf(e->file_stats, - " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e " - "%14e\n", - e->time, mass, e_tot, e_kin, e_int, e_pot, e_rad, entropy, mom[0], - mom[1], mom[2], ang_mom[0], ang_mom[1], ang_mom[2]); - fflush(e->file_stats); - } + if (e->nodeID == 0) + stats_print_to_file(e->file_stats, &global_stats, e->time); if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -2654,7 +2637,10 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { /* Add the tasks corresponding to external gravity to the masks */ if (e->policy & engine_policy_external_gravity) { - mask |= 1 << task_type_grav_external; + mask |= 1 << task_type_self; + mask |= 1 << task_type_sub_self; + + submask |= 1 << task_subtype_external_grav; } /* Add MPI tasks if need be */ @@ -2753,13 +2739,7 @@ void engine_step(struct engine *e) { fflush(e->file_timesteps); } - /* Save some statistics */ - if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) { - engine_print_stats(e); - e->timeLastStatistics += e->deltaTimeStatistics; - } - - /* Drift only the necessary particles, that all means all particles + /* Drift only the necessary particles, that means all particles * if we are about to repartition. */ int repart = (e->forcerepart != REPART_NONE); e->drift_all = repart || e->drift_all; @@ -2823,7 +2803,11 @@ void engine_step(struct engine *e) { /* Add the tasks corresponding to external gravity to the masks */ if (e->policy & engine_policy_external_gravity) { - mask |= 1 << task_type_grav_external; + + mask |= 1 << task_type_self; + mask |= 1 << task_type_sub_self; + + submask |= 1 << task_subtype_external_grav; } /* Add the tasks corresponding to cooling to the masks */ @@ -2844,13 +2828,19 @@ void engine_step(struct engine *e) { submask |= 1 << task_subtype_tend; } - if (e->verbose) engine_print_task_counts(e); + // if (e->verbose) engine_print_task_counts(e); /* Send off the runners. */ TIMER_TIC; engine_launch(e, e->nr_threads, mask, submask); TIMER_TOC(timer_runners); + /* Save some statistics */ + if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) { + engine_print_stats(e); + e->timeLastStatistics += e->deltaTimeStatistics; + } + TIMER_TOC2(timer_step); clocks_gettime(&time2); @@ -2876,9 +2866,10 @@ void engine_drift(struct engine *e) { const ticks tic = getticks(); threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); + if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); + message("took %.3f %s (including task unskipping).", + clocks_from_ticks(getticks() - tic), clocks_getunit()); } /** @@ -3409,13 +3400,14 @@ void engine_init(struct engine *e, struct space *s, nr_nodes * nr_threads); e->file_timesteps = fopen(timestepsfileName, "w"); fprintf(e->file_timesteps, - "# Branch: %s\n# Revision: %s\n# Compiler: %s, Version: %s \n# " + "# Host: %s\n# Branch: %s\n# Revision: %s\n# Compiler: %s, " + "Version: %s \n# " "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic " "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f " "+/- %.2f\n# Eta: %f\n", - git_branch(), git_revision(), compiler_name(), compiler_version(), - e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, kernel_name, - e->hydro_properties->target_neighbours, + hostname(), git_branch(), git_revision(), compiler_name(), + compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, + kernel_name, e->hydro_properties->target_neighbours, e->hydro_properties->delta_neighbours, e->hydro_properties->eta_neighbours); @@ -3505,6 +3497,7 @@ void engine_init(struct engine *e, struct space *s, /* Construct types for MPI communications */ #ifdef WITH_MPI part_create_mpi_types(); + stats_create_MPI_type(); #endif /* Initialize the threadpool. */ diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 0bdf56fe844309998db1fad4cf9581edb6b88305..c999e20d401570ac6518291df8cf315569fe78bd 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -411,8 +411,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( p->force.h_dt *= p->h * hydro_dimension_inv; - p->entropy_dt *= - 0.5f * hydro_gamma_minus_one * pow_minus_gamma_minus_one(p->rho); + p->entropy_dt = + 0.5f * gas_entropy_from_internal_energy(p->rho, p->entropy_dt); } /** diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h index a383a2fdce9e64a5673d0136184368220dc08458..bd970795bdf070a7bd7915cc4f493218dbf319d1 100644 --- a/src/hydro/Gizmo/hydro.h +++ b/src/hydro/Gizmo/hydro.h @@ -524,6 +524,7 @@ __attribute__((always_inline)) INLINE static void hydro_set_internal_energy( /* conserved.energy is NOT the specific energy (u), but the total thermal energy (u*m) */ p->conserved.energy = u * p->conserved.mass; + p->primitives.P = hydro_gamma_minus_one * p->primitives.rho * u; } /** @@ -540,4 +541,5 @@ __attribute__((always_inline)) INLINE static void hydro_set_entropy( p->conserved.energy = gas_internal_energy_from_entropy(p->primitives.rho, S) * p->conserved.mass; + p->primitives.P = gas_pressure_from_entropy(p->primitives.rho, S); } diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h index ede16fe5181a546921db7acf8ffcbc130c5e91c5..0c69728a9ce6317a8d7ddab945e7aab6e94ee247 100644 --- a/src/hydro/PressureEntropy/hydro.h +++ b/src/hydro/PressureEntropy/hydro.h @@ -338,11 +338,10 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( /* Compute "grad h" term (note we use rho here and not rho_bar !)*/ const float rho_inv = 1.f / p->rho; - const float entropy_minus_one_over_gamma = 1.f / p->entropy_one_over_gamma; const float rho_dh = 1.f / (1.f + hydro_dimension_inv * p->h * p->density.rho_dh * rho_inv); - const float pressure_dh = p->density.pressure_dh * rho_inv * p->h * - hydro_dimension_inv * entropy_minus_one_over_gamma; + const float pressure_dh = + p->density.pressure_dh * rho_inv * p->h * hydro_dimension_inv; const float grad_h_term = rho_dh * pressure_dh; @@ -442,8 +441,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( p->force.h_dt *= p->h * hydro_dimension_inv; - p->entropy_dt *= - 0.5f * hydro_gamma_minus_one * pow_minus_gamma_minus_one(p->rho_bar); + p->entropy_dt = + 0.5f * gas_entropy_from_internal_energy(p->rho_bar, p->entropy_dt); } /** diff --git a/src/hydro/PressureEntropy/hydro_iact.h b/src/hydro/PressureEntropy/hydro_iact.h index 18e22233f6f53e488119a58a988ba4c9faf3db36..ce1c38ca69954252dc804af9181b9060a14afcb9 100644 --- a/src/hydro/PressureEntropy/hydro_iact.h +++ b/src/hydro/PressureEntropy/hydro_iact.h @@ -255,8 +255,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Now, convolve with the kernel */ const float visc_term = 0.5f * visc * (wi_dr + wj_dr); - const float sph_term = (S_gamma_j / S_gamma_i - f_i) * P_over_rho2_i * wi_dr + - (S_gamma_i / S_gamma_j - f_j) * P_over_rho2_j * wj_dr; + const float sph_term = + (S_gamma_j / S_gamma_i - f_i / S_gamma_i) * P_over_rho2_i * wi_dr + + (S_gamma_i / S_gamma_j - f_j / S_gamma_j) * P_over_rho2_j * wj_dr; /* Eventually got the acceleration */ const float acc = (visc_term + sph_term) * r_inv; @@ -365,8 +366,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Now, convolve with the kernel */ const float visc_term = 0.5f * visc * (wi_dr + wj_dr); - const float sph_term = (S_gamma_j / S_gamma_i - f_i) * P_over_rho2_i * wi_dr + - (S_gamma_i / S_gamma_j - f_j) * P_over_rho2_j * wj_dr; + const float sph_term = + (S_gamma_j / S_gamma_i - f_i / S_gamma_i) * P_over_rho2_i * wi_dr + + (S_gamma_i / S_gamma_j - f_j / S_gamma_j) * P_over_rho2_j * wj_dr; /* Eventually got the acceleration */ const float acc = (visc_term + sph_term) * r_inv; diff --git a/src/minmax.h b/src/minmax.h index 9d92cd71d849dba615fdb05bc342014e0593d989..a53093663c79cf4280d136747663552e49c7f1b2 100644 --- a/src/minmax.h +++ b/src/minmax.h @@ -43,4 +43,18 @@ _a > _b ? _a : _b; \ }) +/** + * @brief Limits the value of x to be between a and b + * + * Only wraps once. If x > 2b, the returned value will be larger than b. + * Similarly for x < -b. + */ +#define box_wrap(x, a, b) \ + ({ \ + const __typeof__(x) _x = (x); \ + const __typeof__(a) _a = (a); \ + const __typeof__(b) _b = (b); \ + _x < _a ? (_x + _b) : ((_x > _b) ? (_x - _b) : _x); \ + }) + #endif /* SWIFT_MINMAX_H */ diff --git a/src/riemann.h b/src/riemann.h index d0ae57a640e13c2098708735d6c34de70ebea5b0..69e1a038fc37cb07e1094c8e6736f79b6afa4f35 100644 --- a/src/riemann.h +++ b/src/riemann.h @@ -27,18 +27,17 @@ #include "stdio.h" #include "stdlib.h" -/* Check that we use an ideal equation of state, since other equations of state - are not compatible with these Riemann solvers. */ -#ifndef EOS_IDEAL_GAS -#error Currently there are no Riemann solvers that can handle the requested \ - equation of state. Select an ideal gas equation of state if you want to \ - use this hydro scheme! -#endif - #if defined(RIEMANN_SOLVER_EXACT) #define RIEMANN_SOLVER_IMPLEMENTATION "Exact Riemann solver (Toro 2009)" +#if defined(EOS_IDEAL_GAS) #include "riemann/riemann_exact.h" +#elif defined(EOS_ISOTHERMAL_GAS) +#include "riemann/riemann_exact_isothermal.h" +#else +#error \ + "The Exact Riemann solver is incompatible with the selected equation of state!" +#endif #elif defined(RIEMANN_SOLVER_TRRS) diff --git a/src/riemann/riemann_exact_isothermal.h b/src/riemann/riemann_exact_isothermal.h new file mode 100644 index 0000000000000000000000000000000000000000..8e8a6a4dcce456b55f85148bed7342ad4e651c1b --- /dev/null +++ b/src/riemann/riemann_exact_isothermal.h @@ -0,0 +1,450 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Bert Vandenbroucke (bert.vandenbroucke@ugent.be) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#ifndef SWIFT_RIEMANN_EXACT_ISOTHERMAL_H +#define SWIFT_RIEMANN_EXACT_ISOTHERMAL_H + +#include <float.h> +#include "adiabatic_index.h" +#include "minmax.h" +#include "riemann_vacuum.h" + +#define const_isothermal_soundspeed \ + sqrtf(hydro_gamma_minus_one* const_isothermal_internal_energy) + +/** + * @brief Relative difference between the middle state velocity and the left or + * right state velocity used in the middle state density iteration. + * + * @param rho Current estimate of the middle state density. + * @param W Left or right state vector. + * @return Density dependent part of the middle state velocity. + */ +__attribute__((always_inline)) INLINE static float riemann_fb(float rho, + float* W) { + if (rho < W[0]) { + return const_isothermal_soundspeed * logf(rho / W[0]); + } else { + return const_isothermal_soundspeed * + (sqrtf(rho / W[0]) - sqrtf(W[0] / rho)); + } +} + +/** + * @brief Derivative w.r.t. rho of the function riemann_fb. + * + * @param rho Current estimate of the middle state density. + * @param W Left or right state vector. + * @return Derivative of riemann_fb. + */ +__attribute__((always_inline)) INLINE static float riemann_fprimeb(float rho, + float* W) { + if (rho < W[0]) { + return const_isothermal_soundspeed * W[0] / rho; + } else { + return 0.5 * const_isothermal_soundspeed * + (sqrtf(rho / W[0]) + sqrtf(W[0] / rho)) / rho; + } +} + +/** + * @brief Difference between the left and right middle state velocity estimates. + * + * Since the middle state velocity takes on a constant value, we want to get + * this difference as close to zero as possible. + * + * @param rho Current estimate of the middle state density. + * @param WL Left state vector. + * @param WR Right state vector. + * @param vL Left state velocity along the interface normal. + * @param vR Right state velocity along the interface normal. + * @return Difference between the left and right middle state velocity + * estimates. + */ +__attribute__((always_inline)) INLINE static float riemann_f( + float rho, float* WL, float* WR, float vL, float vR) { + return riemann_fb(rho, WR) + riemann_fb(rho, WL) + vR - vL; +} + +/** + * @brief Derivative of riemann_f w.r.t. rho. + * + * @param rho Current estimate of the middle state density. + * @param WL Left state vector. + * @param WR Right state vector. + * @return Derivative of riemann_f. + */ +__attribute__((always_inline)) INLINE static float riemann_fprime(float rho, + float* WL, + float* WR) { + return riemann_fprimeb(rho, WL) + riemann_fprimeb(rho, WR); +} + +/** + * @brief Get a good first guess for the middle state density. + * + * @param WL The left state vector + * @param WR The right state vector + * @param vL The left velocity along the interface normal + * @param vR The right velocity along the interface normal + */ +__attribute__((always_inline)) INLINE static float riemann_guess_rho(float* WL, + float* WR, + float vL, + float vR) { + + /* Currently three possibilities and not really an algorithm to decide which + one to choose: */ + /* just the average */ + return 0.5f * (WL[0] + WR[0]); + + /* two rarefaction approximation */ + return sqrtf(WL[0] * WR[0] * expf((vL - vR) / const_isothermal_soundspeed)); + + /* linearized primitive variable approximation */ + return 0.25f * (WL[0] + WR[0]) * (vL - vR) / const_isothermal_soundspeed + + 0.5f * (WL[0] + WR[0]); +} + +/** + * @brief Find the zeropoint of riemann_f(rho) using Brent's method. + * + * @param lower_limit Lower limit for the method (riemann_f(lower_limit) < 0) + * @param upper_limit Upper limit for the method (riemann_f(upper_limit) > 0) + * @param lowf Value of riemann_f(lower_limit). + * @param upf Value of riemann_f(upper_limit). + * @param error_tol Tolerance used to decide if the solution is converged + * @param WL Left state vector + * @param WR Right state vector + * @param vL The left velocity along the interface normal + * @param vR The right velocity along the interface normal + */ +__attribute__((always_inline)) INLINE static float riemann_solve_brent( + float lower_limit, float upper_limit, float lowf, float upf, + float error_tol, float* WL, float* WR, float vL, float vR) { + + float a, b, c, d, s; + float fa, fb, fc, fs; + float tmp, tmp2; + int mflag; + int i; + + a = lower_limit; + b = upper_limit; + c = 0.0f; + d = FLT_MAX; + + fa = lowf; + fb = upf; + + fc = 0.0f; + s = 0.0f; + fs = 0.0f; + + /* if f(a) f(b) >= 0 then error-exit */ + if (fa * fb >= 0.0f) { + error( + "Brent's method called with equal sign function values!\n" + "f(%g) = %g, f(%g) = %g\n", + a, fa, b, fb); + /* return NaN */ + return 0.0f / 0.0f; + } + + /* if |f(a)| < |f(b)| then swap (a,b) */ + if (fabs(fa) < fabs(fb)) { + tmp = a; + a = b; + b = tmp; + tmp = fa; + fa = fb; + fb = tmp; + } + + c = a; + fc = fa; + mflag = 1; + i = 0; + + while (!(fb == 0.0f) && (fabs(a - b) > error_tol * 0.5f * (a + b))) { + if ((fa != fc) && (fb != fc)) /* Inverse quadratic interpolation */ + s = a * fb * fc / (fa - fb) / (fa - fc) + + b * fa * fc / (fb - fa) / (fb - fc) + + c * fa * fb / (fc - fa) / (fc - fb); + else + /* Secant Rule */ + s = b - fb * (b - a) / (fb - fa); + + tmp2 = 0.25f * (3.0f * a + b); + if (!(((s > tmp2) && (s < b)) || ((s < tmp2) && (s > b))) || + (mflag && (fabs(s - b) >= (0.5f * fabs(b - c)))) || + (!mflag && (fabs(s - b) >= (0.5f * fabs(c - d)))) || + (mflag && (fabs(b - c) < error_tol)) || + (!mflag && (fabs(c - d) < error_tol))) { + s = 0.5f * (a + b); + mflag = 1; + } else { + mflag = 0; + } + fs = riemann_f(s, WL, WR, vL, vR); + d = c; + c = b; + fc = fb; + if (fa * fs < 0.) { + b = s; + fb = fs; + } else { + a = s; + fa = fs; + } + + /* if |f(a)| < |f(b)| then swap (a,b) */ + if (fabs(fa) < fabs(fb)) { + tmp = a; + a = b; + b = tmp; + tmp = fa; + fa = fb; + fb = tmp; + } + i++; + } + return b; +} + +/** + * @brief Solve the Riemann problem between the given left and right state and + * along the given interface normal + * + * @param WL The left state vector + * @param WR The right state vector + * @param Whalf Empty state vector in which the result will be stored + * @param n_unit Normal vector of the interface + */ +__attribute__((always_inline)) INLINE static void riemann_solver_solve( + float* WL, float* WR, float* Whalf, float* n_unit) { + + /* velocity of the left and right state in a frame aligned with n_unit */ + float vL, vR, vhalf; + /* variables used for finding rhostar */ + float rho, rhoguess, frho, frhoguess; + /* variables used for sampling the solution */ + float u, S, SH, ST; + + int errorFlag = 0; + + /* sanity checks */ + if (WL[0] != WL[0]) { + printf("NaN WL!\n"); + errorFlag = 1; + } + if (WR[0] != WR[0]) { + printf("NaN WR!\n"); + errorFlag = 1; + } + if (WL[0] < 0.0f) { + printf("Negative WL!\n"); + errorFlag = 1; + } + if (WR[0] < 0.0f) { + printf("Negative WR!\n"); + errorFlag = 1; + } + if (errorFlag) { + printf("WL: %g %g %g %g %g\n", WL[0], WL[1], WL[2], WL[3], WL[4]); + printf("WR: %g %g %g %g %g\n", WR[0], WR[1], WR[2], WR[3], WR[4]); + error("Riemman solver input error!\n"); + } + + /* calculate velocities in interface frame */ + vL = WL[1] * n_unit[0] + WL[2] * n_unit[1] + WL[3] * n_unit[2]; + vR = WR[1] * n_unit[0] + WR[2] * n_unit[1] + WR[3] * n_unit[2]; + + /* VACUUM... */ + + rho = 0.; + /* obtain a first guess for p */ + rhoguess = riemann_guess_rho(WL, WR, vL, vR); + frho = riemann_f(rho, WL, WR, vL, vR); + frhoguess = riemann_f(rhoguess, WL, WR, vL, vR); + /* ok, rhostar is close to 0, better use Brent's method... */ + /* we use Newton-Raphson until we find a suitable interval */ + if (frho * frhoguess >= 0.0f) { + /* Newton-Raphson until convergence or until suitable interval is found + to use Brent's method */ + unsigned int counter = 0; + while (fabs(rho - rhoguess) > 1.e-6f * 0.5f * (rho + rhoguess) && + frhoguess < 0.0f) { + rho = rhoguess; + rhoguess = rhoguess - frhoguess / riemann_fprime(rhoguess, WL, WR); + frhoguess = riemann_f(rhoguess, WL, WR, vL, vR); + counter++; + if (counter > 1000) { + error("Stuck in Newton-Raphson!\n"); + } + } + } + /* As soon as there is a suitable interval: use Brent's method */ + if (1.e6 * fabs(rho - rhoguess) > 0.5f * (rho + rhoguess) && + frhoguess > 0.0f) { + rho = 0.0f; + frho = riemann_f(rho, WL, WR, vL, vR); + /* use Brent's method to find the zeropoint */ + rho = riemann_solve_brent(rho, rhoguess, frho, frhoguess, 1.e-6, WL, WR, vL, + vR); + } else { + rho = rhoguess; + } + + /* calculate the middle state velocity */ + u = 0.5f * (vL - riemann_fb(rho, WL) + vR + riemann_fb(rho, WR)); + + /* sample the solution */ + if (u > 0.0f) { + /* left state */ + Whalf[1] = WL[1]; + Whalf[2] = WL[2]; + Whalf[3] = WL[3]; + if (WL[0] < rho) { + /* left shock wave */ + S = vL - const_isothermal_soundspeed * sqrtf(rho / WL[0]); + if (S >= 0.) { + /* to the left of the shock */ + Whalf[0] = WL[0]; + vhalf = 0.0f; + } else { + /* to the right of the shock */ + Whalf[0] = rho; + vhalf = u - vL; + } + } else { + /* left rarefaction wave */ + SH = vL - const_isothermal_soundspeed; + ST = u - const_isothermal_soundspeed; + if (SH > 0.) { + /* to the left of the rarefaction */ + Whalf[0] = WL[0]; + vhalf = 0.0f; + } else if (ST > 0.0f) { + /* inside the rarefaction */ + Whalf[0] = WL[0] * expf(vL / const_isothermal_soundspeed - 1.0f); + vhalf = const_isothermal_soundspeed - vL; + } else { + /* to the right of the rarefaction */ + Whalf[0] = rho; + vhalf = u - vL; + } + } + } else { + /* right state */ + Whalf[1] = WR[1]; + Whalf[2] = WR[2]; + Whalf[3] = WR[3]; + if (WR[0] < rho) { + /* right shock wave */ + S = vR + const_isothermal_soundspeed * sqrtf(rho / WR[0]); + if (S > 0.0f) { + /* to the left of the shock wave: middle state */ + Whalf[0] = rho; + vhalf = u - vR; + } else { + /* to the right of the shock wave: right state */ + Whalf[0] = WR[0]; + vhalf = 0.0f; + } + } else { + /* right rarefaction wave */ + SH = vR + const_isothermal_soundspeed; + ST = u + const_isothermal_soundspeed; + if (ST > 0.0f) { + /* to the left of rarefaction: middle state */ + Whalf[0] = rho; + vhalf = u - vR; + } else if (SH > 0.0f) { + /* inside rarefaction */ + Whalf[0] = WR[0] * expf(-vR / const_isothermal_soundspeed - 1.0f); + vhalf = -const_isothermal_soundspeed - vR; + } else { + /* to the right of rarefaction: right state */ + Whalf[0] = WR[0]; + vhalf = 0.0f; + } + } + } + + /* add the velocity solution along the interface normal to the velocities */ + Whalf[1] += vhalf * n_unit[0]; + Whalf[2] += vhalf * n_unit[1]; + Whalf[3] += vhalf * n_unit[2]; + + /* the pressure is completely irrelevant in this case */ + Whalf[4] = + Whalf[0] * const_isothermal_soundspeed * const_isothermal_soundspeed; +} + +__attribute__((always_inline)) INLINE static void riemann_solve_for_flux( + float* Wi, float* Wj, float* n_unit, float* vij, float* totflux) { + + float Whalf[5]; + float flux[5][3]; + float vtot[3]; + float rhoe; + + riemann_solver_solve(Wi, Wj, Whalf, n_unit); + + flux[0][0] = Whalf[0] * Whalf[1]; + flux[0][1] = Whalf[0] * Whalf[2]; + flux[0][2] = Whalf[0] * Whalf[3]; + + vtot[0] = Whalf[1] + vij[0]; + vtot[1] = Whalf[2] + vij[1]; + vtot[2] = Whalf[3] + vij[2]; + flux[1][0] = Whalf[0] * vtot[0] * Whalf[1] + Whalf[4]; + flux[1][1] = Whalf[0] * vtot[0] * Whalf[2]; + flux[1][2] = Whalf[0] * vtot[0] * Whalf[3]; + flux[2][0] = Whalf[0] * vtot[1] * Whalf[1]; + flux[2][1] = Whalf[0] * vtot[1] * Whalf[2] + Whalf[4]; + flux[2][2] = Whalf[0] * vtot[1] * Whalf[3]; + flux[3][0] = Whalf[0] * vtot[2] * Whalf[1]; + flux[3][1] = Whalf[0] * vtot[2] * Whalf[2]; + flux[3][2] = Whalf[0] * vtot[2] * Whalf[3] + Whalf[4]; + + /* eqn. (15) */ + /* F_P = \rho e ( \vec{v} - \vec{v_{ij}} ) + P \vec{v} */ + /* \rho e = P / (\gamma-1) + 1/2 \rho \vec{v}^2 */ + rhoe = Whalf[4] / hydro_gamma_minus_one + + 0.5f * Whalf[0] * + (vtot[0] * vtot[0] + vtot[1] * vtot[1] + vtot[2] * vtot[2]); + flux[4][0] = rhoe * Whalf[1] + Whalf[4] * vtot[0]; + flux[4][1] = rhoe * Whalf[2] + Whalf[4] * vtot[1]; + flux[4][2] = rhoe * Whalf[3] + Whalf[4] * vtot[2]; + + totflux[0] = + flux[0][0] * n_unit[0] + flux[0][1] * n_unit[1] + flux[0][2] * n_unit[2]; + totflux[1] = + flux[1][0] * n_unit[0] + flux[1][1] * n_unit[1] + flux[1][2] * n_unit[2]; + totflux[2] = + flux[2][0] * n_unit[0] + flux[2][1] * n_unit[1] + flux[2][2] * n_unit[2]; + totflux[3] = + flux[3][0] * n_unit[0] + flux[3][1] * n_unit[1] + flux[3][2] * n_unit[2]; + totflux[4] = + flux[4][0] * n_unit[0] + flux[4][1] * n_unit[1] + flux[4][2] * n_unit[2]; +} + +#endif /* SWIFT_RIEMANN_EXACT_ISOTHERMAL_H */ diff --git a/src/riemann/riemann_hllc.h b/src/riemann/riemann_hllc.h index 1a4c3f5f8338f6504a8c5f1d9eab8451eb20246c..962fb8380ad9919d7b7ad96325a12f0bfc53d255 100644 --- a/src/riemann/riemann_hllc.h +++ b/src/riemann/riemann_hllc.h @@ -24,6 +24,11 @@ #include "minmax.h" #include "riemann_vacuum.h" +#ifndef EOS_IDEAL_GAS +#error \ + "The HLLC Riemann solver currently only supports and ideal gas equation of state. Either select this equation of state, or try using another Riemann solver!" +#endif + __attribute__((always_inline)) INLINE static void riemann_solve_for_flux( float *WL, float *WR, float *n, float *vij, float *totflux) { diff --git a/src/riemann/riemann_trrs.h b/src/riemann/riemann_trrs.h index b13a76b4c57af548497780e974e5c9ee3a721fac..be56b93583b693d7d492e0c8b1357cdbe57e88b5 100644 --- a/src/riemann/riemann_trrs.h +++ b/src/riemann/riemann_trrs.h @@ -23,6 +23,11 @@ #include "adiabatic_index.h" #include "riemann_vacuum.h" +#ifndef EOS_IDEAL_GAS +#error \ + "The TRRS Riemann solver currently only supports an ideal gas equation of state. Either select this equation of state, or try using another Riemann solver!" +#endif + /** * @brief Solve the Riemann problem using the Two Rarefaction Riemann Solver * diff --git a/src/runner.c b/src/runner.c index e036030c772847a5c0a0ecb81b6aea0b01258d75..f5efc99d492be837509e50bd2674ab6923404446 100644 --- a/src/runner.c +++ b/src/runner.c @@ -184,12 +184,12 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { for (int i = 0; i < gcount; i++) { /* Get a direct pointer on the part. */ - struct gpart *restrict g = &gparts[i]; + struct gpart *restrict gp = &gparts[i]; /* Is this part within the time step? */ - if (g->ti_end <= ti_current) { + if (gp->ti_end <= ti_current) { - external_gravity_acceleration(time, potential, constants, g); + external_gravity_acceleration(time, potential, constants, gp); } } @@ -747,111 +747,99 @@ void runner_do_ghost(struct runner *r, struct cell *c) { } /** - * @brief Drift particles and g-particles in a cell forward in time + * @brief Drift particles and g-particles in a cell forward in time, + * unskipping any tasks associated with active cells. * * @param c The cell. * @param e The engine. + * @param drift whether to actually drift the particles, will not be + * necessary for non-local cells. */ -static void runner_do_drift(struct cell *c, struct engine *e) { +static void runner_do_drift(struct cell *c, struct engine *e, int drift) { + + const int ti_current = e->ti_current; + + /* Unskip any active tasks. */ + if (c->ti_end_min == e->ti_current) { + const int forcerebuild = cell_unskip_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); + } + + /* Do we really need to drift? */ + if (drift) { + if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return; + } else { + + /* Not drifting, but may still need to recurse for task skipping. */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + runner_do_drift(cp, e, 0); + } + } + } + return; + } const double timeBase = e->timeBase; const int ti_old = c->ti_old; - const int ti_current = e->ti_current; struct part *const parts = c->parts; struct xpart *const xparts = c->xparts; struct gpart *const gparts = c->gparts; - /* Do we need to drift ? */ - if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return; - - /* Check that we are actually going to move forward. */ - if (ti_current == ti_old) return; - /* Drift from the last time the cell was drifted to the current time */ const double dt = (ti_current - ti_old) * timeBase; - float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; - double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, e_rad = 0.0; - double entropy = 0.0, mass = 0.0; - double mom[3] = {0.0, 0.0, 0.0}; - double ang_mom[3] = {0.0, 0.0, 0.0}; /* No children? */ if (!c->split) { - /* Loop over all the g-particles in the cell */ - const size_t nr_gparts = c->gcount; - for (size_t k = 0; k < nr_gparts; k++) { - - /* Get a handle on the gpart. */ - struct gpart *const gp = &gparts[k]; - - /* Drift... */ - drift_gpart(gp, dt, timeBase, ti_old, ti_current); - - /* Compute (square of) motion since last cell construction */ - const float dx2 = gp->x_diff[0] * gp->x_diff[0] + - gp->x_diff[1] * gp->x_diff[1] + - gp->x_diff[2] * gp->x_diff[2]; - dx2_max = (dx2_max > dx2) ? dx2_max : dx2; - } - - /* Loop over all the particles in the cell (more work for these !) */ - const size_t nr_parts = c->count; - for (size_t k = 0; k < nr_parts; k++) { - - /* Get a handle on the part. */ - struct part *const p = &parts[k]; - struct xpart *const xp = &xparts[k]; + /* Check that we are actually going to move forward. */ + if (ti_current >= ti_old) { - /* Drift... */ - drift_part(p, xp, dt, timeBase, ti_old, ti_current); + /* Loop over all the g-particles in the cell */ + const size_t nr_gparts = c->gcount; + for (size_t k = 0; k < nr_gparts; k++) { - /* Compute (square of) motion since last cell construction */ - const float dx2 = xp->x_diff[0] * xp->x_diff[0] + - xp->x_diff[1] * xp->x_diff[1] + - xp->x_diff[2] * xp->x_diff[2]; - dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + /* Get a handle on the gpart. */ + struct gpart *const gp = &gparts[k]; - /* Maximal smoothing length */ - h_max = (h_max > p->h) ? h_max : p->h; + /* Drift... */ + drift_gpart(gp, dt, timeBase, ti_old, ti_current); - /* Now collect quantities for statistics */ - - const float half_dt = - (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; - const double x[3] = {p->x[0], p->x[1], p->x[2]}; - const float v[3] = {xp->v_full[0] + p->a_hydro[0] * half_dt, - xp->v_full[1] + p->a_hydro[1] * half_dt, - xp->v_full[2] + p->a_hydro[2] * half_dt}; + /* Compute (square of) motion since last cell construction */ + const float dx2 = gp->x_diff[0] * gp->x_diff[0] + + gp->x_diff[1] * gp->x_diff[1] + + gp->x_diff[2] * gp->x_diff[2]; + dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + } - const float m = hydro_get_mass(p); + /* Loop over all the particles in the cell */ + const size_t nr_parts = c->count; + for (size_t k = 0; k < nr_parts; k++) { - /* Collect mass */ - mass += m; + /* Get a handle on the part. */ + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; - /* Collect momentum */ - mom[0] += m * v[0]; - mom[1] += m * v[1]; - mom[2] += m * v[2]; + /* Drift... */ + drift_part(p, xp, dt, timeBase, ti_old, ti_current); - /* Collect angular momentum */ - ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]); - ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]); - ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]); + /* Compute (square of) motion since last cell construction */ + const float dx2 = xp->x_diff[0] * xp->x_diff[0] + + xp->x_diff[1] * xp->x_diff[1] + + xp->x_diff[2] * xp->x_diff[2]; + dx2_max = (dx2_max > dx2) ? dx2_max : dx2; - /* Collect energies. */ - e_kin += 0.5 * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); - e_pot += 0.; - e_int += m * hydro_get_internal_energy(p, half_dt); - e_rad += cooling_get_radiated_energy(xp); + /* Maximal smoothing length */ + h_max = (h_max > p->h) ? h_max : p->h; + } - /* Collect entropy */ - entropy += m * hydro_get_entropy(p, half_dt); - } + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); - /* Now, get the maximal particle motion from its square */ - dx_max = sqrtf(dx2_max); + } /* Check that we are actually going to move forward. */ } /* Otherwise, aggregate data from children. */ @@ -863,40 +851,15 @@ static void runner_do_drift(struct cell *c, struct engine *e) { struct cell *cp = c->progeny[k]; /* Recurse. */ - runner_do_drift(cp, e); - + runner_do_drift(cp, e, drift); dx_max = max(dx_max, cp->dx_max); h_max = max(h_max, cp->h_max); - mass += cp->mass; - e_kin += cp->e_kin; - e_int += cp->e_int; - e_pot += cp->e_pot; - e_rad += cp->e_rad; - entropy += cp->entropy; - mom[0] += cp->mom[0]; - mom[1] += cp->mom[1]; - mom[2] += cp->mom[2]; - ang_mom[0] += cp->ang_mom[0]; - ang_mom[1] += cp->ang_mom[1]; - ang_mom[2] += cp->ang_mom[2]; } } /* Store the values */ c->h_max = h_max; c->dx_max = dx_max; - c->mass = mass; - c->e_kin = e_kin; - c->e_int = e_int; - c->e_pot = e_pot; - c->e_rad = e_rad; - c->entropy = entropy; - c->mom[0] = mom[0]; - c->mom[1] = mom[1]; - c->mom[2] = mom[2]; - c->ang_mom[0] = ang_mom[0]; - c->ang_mom[1] = ang_mom[1]; - c->ang_mom[2] = ang_mom[2]; /* Update the time of the last drift */ c->ti_old = ti_current; @@ -918,9 +881,11 @@ void runner_do_drift_mapper(void *map_data, int num_elements, for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &cells[ind]; - - /* Only drift local particles. */ - if (c != NULL && c->nodeID == e->nodeID) runner_do_drift(c, e); +#ifdef WITH_MPI + if (c != NULL) runner_do_drift(c, e, (c->nodeID == e->nodeID)); +#else + if (c != NULL) runner_do_drift(c, e, 1); +#endif } } @@ -1268,6 +1233,35 @@ void *runner_main(void *data) { struct cell *cj = t->cj; t->rid = r->cpuid; +/* Check that we haven't scheduled an inactive task */ +#ifdef SWIFT_DEBUG_CHECKS + if (cj == NULL) { /* self */ + if (ci->ti_end_min > e->ti_current && t->type != task_type_sort) + error( + "Task (type='%s/%s') should have been skipped ti_current=%d " + "c->ti_end_min=%d", + taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, + ci->ti_end_min); + + /* Special case for sorts */ + if (ci->ti_end_min > e->ti_current && t->type == task_type_sort && + t->flags == 0) + error( + "Task (type='%s/%s') should have been skipped ti_current=%d " + "c->ti_end_min=%d t->flags=%d", + taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, + ci->ti_end_min, t->flags); + + } else { /* pair */ + if (ci->ti_end_min > e->ti_current && cj->ti_end_min > e->ti_current) + error( + "Task (type='%s/%s') should have been skipped ti_current=%d " + "ci->ti_end_min=%d cj->ti_end_min=%d", + taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, + ci->ti_end_min, cj->ti_end_min); + } +#endif + /* Different types of tasks... */ switch (t->type) { case task_type_self: @@ -1280,9 +1274,12 @@ void *runner_main(void *data) { runner_doself2_force(r, ci); else if (t->subtype == task_subtype_grav) runner_doself_grav(r, ci, 1); + else if (t->subtype == task_subtype_external_grav) + runner_do_grav_external(r, ci, 1); else error("Unknown task subtype."); break; + case task_type_pair: if (t->subtype == task_subtype_density) runner_dopair1_density(r, ci, cj); @@ -1297,9 +1294,7 @@ void *runner_main(void *data) { else error("Unknown task subtype."); break; - case task_type_sort: - runner_do_sort(r, ci, t->flags, 1); - break; + case task_type_sub_self: if (t->subtype == task_subtype_density) runner_dosub_self1_density(r, ci, 1); @@ -1311,9 +1306,12 @@ void *runner_main(void *data) { runner_dosub_self2_force(r, ci, 1); else if (t->subtype == task_subtype_grav) runner_dosub_grav(r, ci, cj, 1); + else if (t->subtype == task_subtype_external_grav) + runner_do_grav_external(r, ci, 1); else error("Unknown task subtype."); break; + case task_type_sub_pair: if (t->subtype == task_subtype_density) runner_dosub_pair1_density(r, ci, cj, t->flags, 1); @@ -1328,6 +1326,10 @@ void *runner_main(void *data) { else error("Unknown task subtype."); break; + + case task_type_sort: + runner_do_sort(r, ci, t->flags, 1); + break; case task_type_init: runner_do_init(r, ci, 1); break; @@ -1371,9 +1373,6 @@ void *runner_main(void *data) { case task_type_grav_fft: runner_do_grav_fft(r); break; - case task_type_grav_external: - runner_do_grav_external(r, t->ci, 1); - break; case task_type_cooling: runner_do_cooling(r, t->ci, 1); break; diff --git a/src/scheduler.c b/src/scheduler.c index f98f8824c00186956e8abe5e5c2aa3aae73abf4e..44790fcd2fa5f67e6f325ba5849da19e35ab285a 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -50,6 +50,12 @@ #include "task.h" #include "timers.h" +/** + * @brief Re-set the list of active tasks. + */ + +void scheduler_clear_active(struct scheduler *s) { s->active_count = 0; } + /** * @brief Add an unlock_task to the given task. * @@ -697,7 +703,7 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, t->wait = wait; t->ci = ci; t->cj = cj; - t->skip = 0; + t->skip = 1; /* Mark tasks as skip by default. */ t->tight = tight; t->implicit = 0; t->weight = 0; @@ -863,6 +869,7 @@ void scheduler_reset(struct scheduler *s, int size) { /* Free existing task lists if necessary. */ if (s->tasks != NULL) free(s->tasks); if (s->tasks_ind != NULL) free(s->tasks_ind); + if (s->tid_active != NULL) free(s->tid_active); /* Allocate the new lists. */ if (posix_memalign((void *)&s->tasks, task_align, @@ -871,6 +878,9 @@ void scheduler_reset(struct scheduler *s, int size) { if ((s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL) error("Failed to allocate task lists."); + + if ((s->tid_active = (int *)malloc(sizeof(int) * size)) == NULL) + error("Failed to allocate aactive task lists."); } /* Reset the counters. */ @@ -882,6 +892,7 @@ void scheduler_reset(struct scheduler *s, int size) { s->submask = 0; s->nr_unlocks = 0; s->completed_unlock_writes = 0; + s->active_count = 0; /* Set the task pointers in the queues. */ for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; @@ -891,11 +902,11 @@ void scheduler_reset(struct scheduler *s, int size) { * @brief Compute the task weights * * @param s The #scheduler. - * @param verbose Are we talkative ? + * @param verbose Are we talkative? */ + void scheduler_reweight(struct scheduler *s, int verbose) { - const ticks tic = getticks(); const int nr_tasks = s->nr_tasks; int *tid = s->tasks_ind; struct task *tasks = s->tasks; @@ -904,6 +915,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; const float wscale = 0.001; + const ticks tic = getticks(); /* Run through the tasks backwards and set their weights. */ for (int k = nr_tasks - 1; k >= 0; k--) { @@ -1033,33 +1045,84 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements, void scheduler_start(struct scheduler *s, unsigned int mask, unsigned int submask) { - // ticks tic = getticks(); - /* Store the masks */ s->mask = mask; s->submask = submask | (1 << task_subtype_none); - /* Clear all the waits and rids. */ + /* Clear all the waits, rids, and times. */ for (int k = 0; k < s->nr_tasks; k++) { s->tasks[k].wait = 1; s->tasks[k].rid = -1; + s->tasks[k].tic = 0; + s->tasks[k].toc = 0; + if (((1 << s->tasks[k].type) & mask) == 0 || + ((1 << s->tasks[k].subtype) & s->submask) == 0) + s->tasks[k].skip = 1; } /* Re-wait the tasks. */ threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tasks, s->nr_tasks, sizeof(struct task), 1000, s); +/* Check we have not missed an active task */ +#ifdef SWIFT_DEBUG_CHECKS + + const int ti_current = s->space->e->ti_current; + for (int k = 0; k < s->nr_tasks; k++) { + + struct task *t = &s->tasks[k]; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + + if (cj == NULL) { /* self */ + + if (ci->ti_end_min == ti_current && t->skip && t->type != task_type_sort) + error( + "Task (type='%s/%s') should not have been skipped ti_current=%d " + "c->ti_end_min=%d", + taskID_names[t->type], subtaskID_names[t->subtype], ti_current, + ci->ti_end_min); + + /* Special treatment for sort tasks */ + if (ci->ti_end_min == ti_current && t->skip && + t->type == task_type_sort && t->flags == 0) + error( + "Task (type='%s/%s') should not have been skipped ti_current=%d " + "c->ti_end_min=%d t->flags=%d", + taskID_names[t->type], subtaskID_names[t->subtype], ti_current, + ci->ti_end_min, t->flags); + + } else { /* pair */ + + if ((ci->ti_end_min == ti_current || cj->ti_end_min == ti_current) && + t->skip) + error( + "Task (type='%s/%s') should not have been skipped ti_current=%d " + "ci->ti_end_min=%d cj->ti_end_min=%d", + taskID_names[t->type], subtaskID_names[t->subtype], ti_current, + ci->ti_end_min, cj->ti_end_min); + } + } + +#endif + /* Loop over the tasks and enqueue whoever is ready. */ - threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tasks_ind, - s->nr_tasks, sizeof(int), 1000, s); + for (int k = 0; k < s->active_count; k++) { + struct task *t = &s->tasks[s->tid_active[k]]; + if (atomic_dec(&t->wait) == 1 && !t->skip && ((1 << t->type) & s->mask) && + ((1 << t->subtype) & s->submask)) { + scheduler_enqueue(s, t); + pthread_cond_signal(&s->sleep_cond); + } + } + + /* Clear the list of active tasks. */ + s->active_count = 0; /* To be safe, fire of one last sleep_cond in a safe way. */ pthread_mutex_lock(&s->sleep_mutex); pthread_cond_broadcast(&s->sleep_cond); pthread_mutex_unlock(&s->sleep_mutex); - - /* message("enqueueing tasks took %.3f %s." , - clocks_from_ticks( getticks() - tic ), clocks_getunit()); */ } /** @@ -1110,20 +1173,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_pair: case task_type_sub_pair: - if (t->subtype == task_subtype_grav) { - - qid = t->ci->gsuper->owner; - if (qid < 0 || - s->queues[qid].count > s->queues[t->cj->gsuper->owner].count) - qid = t->cj->gsuper->owner; - - } else { - - qid = t->ci->super->owner; - if (qid < 0 || - s->queues[qid].count > s->queues[t->cj->super->owner].count) - qid = t->cj->super->owner; - } + qid = t->ci->super->owner; + if (qid < 0 || + s->queues[qid].count > s->queues[t->cj->super->owner].count) + qid = t->cj->super->owner; break; case task_type_recv: #ifdef WITH_MPI @@ -1222,6 +1275,9 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { pthread_mutex_unlock(&s->sleep_mutex); } + /* Mark the task as skip. */ + t->skip = 1; + /* Return the next best task. Note that we currently do not implement anything that does this, as getting it to respect priorities is too tricky and currently unnecessary. */ @@ -1437,6 +1493,7 @@ void scheduler_clean(struct scheduler *s) { free(s->tasks_ind); free(s->unlocks); free(s->unlock_ind); + free(s->tid_active); for (int i = 0; i < s->nr_queues; ++i) queue_clean(&s->queues[i]); free(s->queues); } diff --git a/src/scheduler.h b/src/scheduler.h index c4eb5e99447d623e5fb8e442efc1c254c00bfadd..22c83ae3c551d4dc7e601e23f7a5301241bd1fa4 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -33,6 +33,7 @@ /* Includes. */ #include "cell.h" +#include "inline.h" #include "lock.h" #include "queue.h" #include "task.h" @@ -83,6 +84,10 @@ struct scheduler { /* The task indices. */ int *tasks_ind; + /* List of initial tasks. */ + int *tid_active; + int active_count; + /* The task unlocks. */ struct task **volatile unlocks; int *volatile unlock_ind; @@ -105,7 +110,24 @@ struct scheduler { int nodeID; }; +/* Inlined functions (for speed). */ +/** + * @brief Add a task to the list of active tasks. + * + * @param s The #scheduler. + * @param t The task to be added. + */ + +__attribute__((always_inline)) INLINE static void scheduler_activate( + struct scheduler *s, struct task *t) { + if (atomic_cas(&t->skip, 1, 0)) { + int ind = atomic_inc(&s->active_count); + s->tid_active[ind] = t - s->tasks; + } +} + /* Function prototypes. */ +void scheduler_clear_active(struct scheduler *s); void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, int nr_queues, unsigned int flags, int nodeID, struct threadpool *tp); diff --git a/src/space.c b/src/space.c index 1d2733ab2d4356c19cc0e65a0b9441edcf0acb80..86c8cc36eacd58c89181831b3c5472026281043c 100644 --- a/src/space.c +++ b/src/space.c @@ -112,6 +112,15 @@ struct parallel_sort { volatile unsigned int first, last, waiting; }; +/** + * @brief Information required to compute the particle cell indices. + */ +struct index_data { + struct space *s; + struct cell *cells; + int *ind; +}; + /** * @brief Get the shift-id of the given pair of cells, swapping them * if need be. @@ -326,7 +335,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) { c->count = 0; c->gcount = 0; c->super = c; - c->gsuper = c; c->ti_old = ti_current; lock_init(&c->lock); } @@ -387,9 +395,11 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->cells_top[k].nr_density = 0; s->cells_top[k].nr_gradient = 0; s->cells_top[k].nr_force = 0; + s->cells_top[k].nr_grav = 0; s->cells_top[k].density = NULL; s->cells_top[k].gradient = NULL; s->cells_top[k].force = NULL; + s->cells_top[k].grav = NULL; s->cells_top[k].dx_max = 0.0f; s->cells_top[k].sorted = 0; s->cells_top[k].count = 0; @@ -398,8 +408,20 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->cells_top[k].extra_ghost = NULL; s->cells_top[k].ghost = NULL; s->cells_top[k].kick = NULL; + s->cells_top[k].cooling = NULL; + s->cells_top[k].sourceterms = NULL; s->cells_top[k].super = &s->cells_top[k]; - s->cells_top[k].gsuper = &s->cells_top[k]; +#if WITH_MPI + s->cells_top[k].recv_xv = NULL; + s->cells_top[k].recv_rho = NULL; + s->cells_top[k].recv_gradient = NULL; + s->cells_top[k].recv_ti = NULL; + + s->cells_top[k].send_xv = NULL; + s->cells_top[k].send_rho = NULL; + s->cells_top[k].send_gradient = NULL; + s->cells_top[k].send_ti = NULL; +#endif } s->maxdepth = 0; } @@ -432,49 +454,21 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { struct cell *restrict cells_top = s->cells_top; const int ti_current = (s->e != NULL) ? s->e->ti_current : 0; - const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; - /* Run through the particles and get their cell index. */ - // tic = getticks(); const size_t ind_size = s->size_parts; int *ind; if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) error("Failed to allocate temporary particle indices."); - for (size_t k = 0; k < nr_parts; k++) { - struct part *restrict p = &s->parts[k]; - for (int j = 0; j < 3; j++) - if (p->x[j] < 0.0) - p->x[j] += dim[j]; - else if (p->x[j] >= dim[j]) - p->x[j] -= dim[j]; - ind[k] = - cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells_top[ind[k]].count++; - } - // message( "getting particle indices took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()): + if (ind_size > 0) space_parts_get_cell_index(s, ind, cells_top, verbose); + for (size_t i = 0; i < s->nr_parts; ++i) cells_top[ind[i]].count++; /* Run through the gravity particles and get their cell index. */ - // tic = getticks(); const size_t gind_size = s->size_gparts; int *gind; if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL) error("Failed to allocate temporary g-particle indices."); - for (size_t k = 0; k < nr_gparts; k++) { - struct gpart *restrict gp = &s->gparts[k]; - for (int j = 0; j < 3; j++) - if (gp->x[j] < 0.0) - gp->x[j] += dim[j]; - else if (gp->x[j] >= dim[j]) - gp->x[j] -= dim[j]; - gind[k] = - cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); - cells_top[gind[k]].gcount++; - } -// message( "getting g-particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); + if (gind_size > 0) space_gparts_get_cell_index(s, gind, cells_top, verbose); + for (size_t i = 0; i < s->nr_gparts; ++i) cells_top[gind[i]].gcount++; #ifdef WITH_MPI @@ -578,6 +572,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { ind = ind_new; } + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; + /* Assign each particle to its cell. */ for (size_t k = nr_parts; k < s->nr_parts; k++) { const struct part *const p = &s->parts[k]; @@ -605,9 +602,9 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { for (size_t k = 1; k < nr_parts; k++) { if (ind[k - 1] > ind[k]) { error("Sort failed!"); - } else if (ind[k] != cell_getid(cdim, s->parts[k].x[0] * ih[0], - s->parts[k].x[1] * ih[1], - s->parts[k].x[2] * ih[2])) { + } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0], + s->parts[k].x[1] * s->iwidth[1], + s->parts[k].x[2] * s->iwidth[2])) { error("Incorrect indices!"); } } @@ -752,6 +749,164 @@ void space_sanitize(struct space *s) { } } +/** + * @brief #threadpool mapper function to compute the particle cell indices. + * + * @param map_data Pointer towards the particles. + * @param nr_parts The number of particles to treat. + * @param extra_data Pointers to the space and index list + */ +void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, + void *extra_data) { + + /* Unpack the data */ + struct part *restrict parts = (struct part *)map_data; + struct index_data *data = (struct index_data *)extra_data; + struct space *s = data->s; + int *const ind = data->ind + (ptrdiff_t)(parts - s->parts); + + /* Get some constants */ + const double dim_x = s->dim[0]; + const double dim_y = s->dim[1]; + const double dim_z = s->dim[2]; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + const double ih_x = s->iwidth[0]; + const double ih_y = s->iwidth[1]; + const double ih_z = s->iwidth[2]; + + for (int k = 0; k < nr_parts; k++) { + + /* Get the particle */ + struct part *restrict p = &parts[k]; + + const double old_pos_x = p->x[0]; + const double old_pos_y = p->x[1]; + const double old_pos_z = p->x[2]; + + /* Put it back into the simulation volume */ + const double pos_x = box_wrap(old_pos_x, 0.0, dim_x); + const double pos_y = box_wrap(old_pos_y, 0.0, dim_y); + const double pos_z = box_wrap(old_pos_z, 0.0, dim_z); + + /* Get its cell index */ + const int index = + cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); + ind[k] = index; + + /* Update the position */ + p->x[0] = pos_x; + p->x[1] = pos_y; + p->x[2] = pos_z; + } +} + +/** + * @brief #threadpool mapper function to compute the g-particle cell indices. + * + * @param map_data Pointer towards the g-particles. + * @param nr_gparts The number of g-particles to treat. + * @param extra_data Pointers to the space and index list + */ +void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, + void *extra_data) { + + /* Unpack the data */ + struct gpart *restrict gparts = (struct gpart *)map_data; + struct index_data *data = (struct index_data *)extra_data; + struct space *s = data->s; + int *const ind = data->ind + (ptrdiff_t)(gparts - s->gparts); + + /* Get some constants */ + const double dim_x = s->dim[0]; + const double dim_y = s->dim[1]; + const double dim_z = s->dim[2]; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + const double ih_x = s->iwidth[0]; + const double ih_y = s->iwidth[1]; + const double ih_z = s->iwidth[2]; + + for (int k = 0; k < nr_gparts; k++) { + + /* Get the particle */ + struct gpart *restrict gp = &gparts[k]; + + const double old_pos_x = gp->x[0]; + const double old_pos_y = gp->x[1]; + const double old_pos_z = gp->x[2]; + + /* Put it back into the simulation volume */ + const double pos_x = box_wrap(old_pos_x, 0.0, dim_x); + const double pos_y = box_wrap(old_pos_y, 0.0, dim_y); + const double pos_z = box_wrap(old_pos_z, 0.0, dim_z); + + /* Get its cell index */ + const int index = + cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); + ind[k] = index; + + /* Update the position */ + gp->x[0] = pos_x; + gp->x[1] = pos_y; + gp->x[2] = pos_z; + } +} + +/** + * @brief Computes the cell index of all the particles and update the cell + * count. + * + * @param s The #space. + * @param ind The array of indices to fill. + * @param cells The array of #cell to update. + * @param verbose Are we talkative ? + */ +void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells, + int verbose) { + + const ticks tic = getticks(); + + /* Pack the extra information */ + struct index_data data; + data.s = s; + data.cells = cells; + data.ind = ind; + + threadpool_map(&s->e->threadpool, space_parts_get_cell_index_mapper, s->parts, + s->nr_parts, sizeof(struct part), 1000, &data); + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Computes the cell index of all the g-particles and update the cell + * gcount. + * + * @param s The #space. + * @param gind The array of indices to fill. + * @param cells The array of #cell to update. + * @param verbose Are we talkative ? + */ +void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, + int verbose) { + + const ticks tic = getticks(); + + /* Pack the extra information */ + struct index_data data; + data.s = s; + data.cells = cells; + data.ind = gind; + + threadpool_map(&s->e->threadpool, space_gparts_get_cell_index_mapper, + s->gparts, s->nr_gparts, sizeof(struct gpart), 1000, &data); + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + /** * @brief Sort the particles and condensed particles according to the given * indices. @@ -1337,7 +1492,6 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { temp->nodeID = c->nodeID; temp->parent = c; temp->super = NULL; - temp->gsuper = NULL; c->progeny[k] = temp; } diff --git a/src/space.h b/src/space.h index 85f32bd7580945de6d9713316b557c92667987c0..011dfb71a6c3ac2b51093ce83bc6b65ceecc2821 100644 --- a/src/space.h +++ b/src/space.h @@ -170,6 +170,10 @@ void space_recycle(struct space *s, struct cell *c); void space_split(struct space *s, struct cell *cells, int nr_cells, int verbose); void space_split_mapper(void *map_data, int num_elements, void *extra_data); +void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells, + int verbose); +void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, + int verbose); void space_do_parts_sort(); void space_do_gparts_sort(); void space_init_parts(struct space *s); diff --git a/src/statistics.c b/src/statistics.c new file mode 100644 index 0000000000000000000000000000000000000000..0afca84f517ca201fc184de2abff40b9ce9d5711 --- /dev/null +++ b/src/statistics.c @@ -0,0 +1,316 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <string.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "statistics.h" + +/* Local headers. */ +#include "cooling.h" +#include "engine.h" +#include "error.h" +#include "hydro.h" +#include "threadpool.h" + +/** + * @brief Information required to compute the statistics in the mapper + */ +struct index_data { + /*! The space we play with */ + const struct space *s; + + /*! The #statistics aggregator to fill */ + struct statistics *stats; +}; + +/** + * @brief Adds the content of one #statistics aggregator to another one. + * + * Performs a += b; + * + * @param a The #statistics structure to update. + * @param b The #statistics structure to add to a. + */ +void stats_add(struct statistics *a, const struct statistics *b) { + + /* Add everything */ + a->E_kin += b->E_kin; + a->E_int += b->E_int; + a->E_pot += b->E_pot; + a->E_rad += b->E_rad; + a->entropy += b->entropy; + a->mass += b->mass; + a->mom[0] += b->mom[0]; + a->mom[1] += b->mom[1]; + a->mom[2] += b->mom[2]; + a->ang_mom[0] += b->ang_mom[0]; + a->ang_mom[1] += b->ang_mom[1]; + a->ang_mom[2] += b->ang_mom[2]; +} + +/** + * @brief Initialises a statistics aggregator to a valid state. + * + * @param s The #statistics aggregator to initialise + */ +void stats_init(struct statistics *s) { + + /* Zero everything */ + bzero(s, sizeof(struct statistics)); + + /* Set the lock */ + lock_init(&s->lock); +} + +/** + * @brief The #threadpool mapper function used to collect statistics for #part. + * + * @param map_data Pointer to the particles. + * @param nr_parts The number of particles in this chunk + * @param extra_data The #statistics aggregator. + */ +void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) { + + /* Unpack the data */ + const struct index_data *data = (struct index_data *)extra_data; + const struct space *s = data->s; + const struct part *restrict parts = (struct part *)map_data; + const struct xpart *restrict xparts = + s->xparts + (ptrdiff_t)(parts - s->parts); + const int ti_current = s->e->ti_current; + const double timeBase = s->e->timeBase; + struct statistics *const global_stats = data->stats; + + /* Local accumulator */ + struct statistics stats; + stats_init(&stats); + + /* Loop over particles */ + for (int k = 0; k < nr_parts; k++) { + + /* Get the particle */ + const struct part *p = &parts[k]; + const struct xpart *xp = &xparts[k]; + + /* Get useful variables */ + const float dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; + const double x[3] = {p->x[0], p->x[1], p->x[2]}; + float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]}; + if (p->gpart != NULL) { + a_tot[0] += p->gpart->a_grav[0]; + a_tot[1] += p->gpart->a_grav[1]; + a_tot[2] += p->gpart->a_grav[2]; + } + const float v[3] = {xp->v_full[0] + a_tot[0] * dt, + xp->v_full[1] + a_tot[1] * dt, + xp->v_full[2] + a_tot[2] * dt}; + + const float m = hydro_get_mass(p); + + /* Collect mass */ + stats.mass += m; + + /* Collect momentum */ + stats.mom[0] += m * v[0]; + stats.mom[1] += m * v[1]; + stats.mom[2] += m * v[2]; + + /* Collect angular momentum */ + stats.ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]); + stats.ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]); + stats.ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]); + + /* Collect energies. */ + stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); + stats.E_pot += 0.; + stats.E_int += m * hydro_get_internal_energy(p, dt); + stats.E_rad += cooling_get_radiated_energy(xp); + + /* Collect entropy */ + stats.entropy += m * hydro_get_entropy(p, dt); + } + + /* Now write back to memory */ + if (lock_lock(&global_stats->lock) == 0) stats_add(global_stats, &stats); + if (lock_unlock(&global_stats->lock) != 0) error("Failed to unlock stats."); +} + +/** + * @brief The #threadpool mapper function used to collect statistics for #gpart. + * + * @param map_data Pointer to the g-particles. + * @param nr_gparts The number of g-particles in this chunk + * @param extra_data The #statistics aggregator. + */ +void stats_collect_gpart_mapper(void *map_data, int nr_gparts, + void *extra_data) { + + /* Unpack the data */ + const struct index_data *data = (struct index_data *)extra_data; + const struct space *s = data->s; + const struct gpart *restrict gparts = (struct gpart *)map_data; + const int ti_current = s->e->ti_current; + const double timeBase = s->e->timeBase; + struct statistics *const global_stats = data->stats; + + /* Local accumulator */ + struct statistics stats; + stats_init(&stats); + + /* Loop over particles */ + for (int k = 0; k < nr_gparts; k++) { + + /* Get the particle */ + const struct gpart *gp = &gparts[k]; + + /* If the g-particle has a counterpart, ignore it */ + if (gp->id_or_neg_offset < 0) continue; + + /* Get useful variables */ + const float dt = (ti_current - (gp->ti_begin + gp->ti_end) / 2) * timeBase; + const double x[3] = {gp->x[0], gp->x[1], gp->x[2]}; + const float v[3] = {gp->v_full[0] + gp->a_grav[0] * dt, + gp->v_full[1] + gp->a_grav[1] * dt, + gp->v_full[2] + gp->a_grav[2] * dt}; + + const float m = gp->mass; + + /* Collect mass */ + stats.mass += m; + + /* Collect momentum */ + stats.mom[0] += m * v[0]; + stats.mom[1] += m * v[1]; + stats.mom[2] += m * v[2]; + + /* Collect angular momentum */ + stats.ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]); + stats.ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]); + stats.ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]); + + /* Collect energies. */ + stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); + stats.E_pot += 0.; + } + + /* Now write back to memory */ + if (lock_lock(&global_stats->lock) == 0) stats_add(global_stats, &stats); + if (lock_unlock(&global_stats->lock) != 0) error("Failed to unlock stats."); +} + +/** + * @brief Collect physical statistics over all particles in a #space. + * + * @param s The #space to collect from. + * @param stats The #statistics aggregator to fill. + */ +void stats_collect(const struct space *s, struct statistics *stats) { + + /* Prepare the data */ + struct index_data extra_data; + extra_data.s = s; + extra_data.stats = stats; + + /* Run parallel collection of statistics for parts */ + if (s->nr_parts > 0) + threadpool_map(&s->e->threadpool, stats_collect_part_mapper, s->parts, + s->nr_parts, sizeof(struct part), 10000, &extra_data); + + /* Run parallel collection of statistics for gparts */ + if (s->nr_gparts > 0) + threadpool_map(&s->e->threadpool, stats_collect_gpart_mapper, s->gparts, + s->nr_gparts, sizeof(struct gpart), 10000, &extra_data); +} + +/** + * @brief Prints the content of a #statistics aggregator to a file + * + * @param file File to write to. + * @param stats The #statistics object to write to the file + * @param time The current physical time. + */ +void stats_print_to_file(FILE *file, const struct statistics *stats, + double time) { + + const double E_tot = stats->E_kin + stats->E_int + stats->E_pot; + + fprintf(file, + " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e " + "%14e\n", + time, stats->mass, E_tot, stats->E_kin, stats->E_int, stats->E_pot, + stats->E_rad, stats->entropy, stats->mom[0], stats->mom[1], + stats->mom[2], stats->ang_mom[0], stats->ang_mom[1], + stats->ang_mom[2]); + fflush(file); +} + +/* Extra stuff in MPI-land */ +#ifdef WITH_MPI + +/** + * @brief MPI datatype corresponding to the #statistics structure. + */ +MPI_Datatype statistics_mpi_type; + +/** + * @brief MPI operator used for the reduction of #statistics structure. + */ +MPI_Op statistics_mpi_reduce_op; + +/** + * @brief MPI reduce operator for #statistics structures. + */ +void stats_add_MPI(void *in, void *inout, int *len, MPI_Datatype *datatype) { + + for (int i = 0; i < *len; ++i) + stats_add(&((struct statistics *)inout)[0], + &((const struct statistics *)in)[i]); +} + +/** + * @brief Registers MPI #statistics type and reduction function. + */ +void stats_create_MPI_type() { + + /* This is not the recommended way of doing this. + One should define the structure field by field + But as long as we don't do serialization via MPI-IO + we don't really care. + Also we would have to modify this function everytime something + is added to the statistics structure. */ + if (MPI_Type_contiguous(sizeof(struct statistics) / sizeof(unsigned char), + MPI_BYTE, &statistics_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&statistics_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for statistics."); + } + + /* Create the reduction operation */ + MPI_Op_create(stats_add_MPI, 1, &statistics_mpi_reduce_op); +} +#endif diff --git a/src/statistics.h b/src/statistics.h new file mode 100644 index 0000000000000000000000000000000000000000..ece1f813bb8aff2b5402753bafc031696cfa562d --- /dev/null +++ b/src/statistics.h @@ -0,0 +1,76 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_STATISTICS_H +#define SWIFT_STATISTICS_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "lock.h" +#include "space.h" + +/** + * @brief Quantities collected for physics statistics + */ +struct statistics { + + /*! Kinetic energy */ + double E_kin; + + /*! Internal energy */ + double E_int; + + /*! Potential energy */ + double E_pot; + + /*! Radiative energy */ + double E_rad; + + /*! Entropy */ + double entropy; + + /*! Mass */ + double mass; + + /*! Momentum */ + double mom[3]; + + /*! Angular momentum */ + double ang_mom[3]; + + /*! Lock for threaded access */ + swift_lock_type lock; +}; + +void stats_collect(const struct space* s, struct statistics* stats); +void stats_add(struct statistics* a, const struct statistics* b); +void stats_print_to_file(FILE* file, const struct statistics* stats, + double time); +void stats_init(struct statistics* s); + +#ifdef WITH_MPI +extern MPI_Datatype statistics_mpi_type; +extern MPI_Op statistics_mpi_reduce_op; + +void stats_add_MPI(void* in, void* out, int* len, MPI_Datatype* datatype); +void stats_create_MPI_type(); +#endif + +#endif /* SWIFT_STATISTICS_H */ diff --git a/src/task.c b/src/task.c index 00068f45769b4ca606cc729bd5e89c13ae729eef..54b9363b7ac3d5c372b591c00b0b03cc274f66b5 100644 --- a/src/task.c +++ b/src/task.c @@ -48,13 +48,13 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", "pair", "sub_self", - "sub_pair", "init", "ghost", "extra_ghost", "kick", - "kick_fixdt", "send", "recv", "grav_gather_m", "grav_fft", - "grav_mm", "grav_up", "grav_external", "cooling", "sourceterms"}; + "none", "sort", "self", "pair", "sub_self", + "sub_pair", "init", "ghost", "extra_ghost", "kick", + "kick_fixdt", "send", "recv", "grav_gather_m", "grav_fft", + "grav_mm", "grav_up", "cooling", "sourceterms"}; const char *subtaskID_names[task_subtype_count] = { - "none", "density", "gradient", "force", "grav", "tend"}; + "none", "density", "gradient", "force", "grav", "external_grav", "tend"}; /** * @brief Computes the overlap between the parts array of two given cells. @@ -135,6 +135,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( break; case task_subtype_grav: + case task_subtype_external_grav: return task_action_gpart; break; @@ -150,7 +151,14 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( case task_type_kick_fixdt: case task_type_send: case task_type_recv: - return task_action_all; + if (t->ci->count > 0 && t->ci->gcount > 0) + return task_action_all; + else if (t->ci->count > 0) + return task_action_part; + else if (t->ci->gcount > 0) + return task_action_gpart; + else + error("Task without particles"); break; case task_type_grav_gather_m: @@ -160,10 +168,6 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_multipole; break; - case task_type_grav_external: - return task_action_gpart; - break; - default: error("Unknown task_action for task"); return task_action_none; @@ -393,3 +397,15 @@ void task_print_submask(unsigned int submask) { printf(" %s=%s", subtaskID_names[k], (submask & (1 << k)) ? "yes" : "no"); printf(" ]\n"); } + +/** + * @brief Print basic information about a task. + * + * @param t The #task. + */ +void task_print(const struct task *t) { + + message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d", + taskID_names[t->type], subtaskID_names[t->subtype], t->wait, + t->nr_unlock_tasks, t->skip); +} diff --git a/src/task.h b/src/task.h index bc4df3dc2a4cfee3c382e9f2059cba84f29299f7..f840c0b4b8e807dce28f6f13479dbdf4995ab66d 100644 --- a/src/task.h +++ b/src/task.h @@ -53,7 +53,6 @@ enum task_types { task_type_grav_fft, task_type_grav_mm, task_type_grav_up, - task_type_grav_external, task_type_cooling, task_type_sourceterms, task_type_count @@ -68,6 +67,7 @@ enum task_subtypes { task_subtype_gradient, task_subtype_force, task_subtype_grav, + task_subtype_external_grav, task_subtype_tend, task_subtype_count } __attribute__((packed)); @@ -160,5 +160,6 @@ int task_lock(struct task *t); void task_print_mask(unsigned int mask); void task_print_submask(unsigned int submask); void task_do_rewait(struct task *t); +void task_print(const struct task *t); #endif /* SWIFT_TASK_H */ diff --git a/src/version.c b/src/version.c index 8bd94e5651dbc597fcd80bc585a47c6633ee3993..3d948e593f58a5f6df0ccbcfc536c086a6859aa3 100644 --- a/src/version.c +++ b/src/version.c @@ -21,6 +21,9 @@ /* Config parameters. */ #include "../config.h" +/* Needed for gethostname() */ +#include <unistd.h> + /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> @@ -47,6 +50,24 @@ /* Local headers. */ #include "version_string.h" +/** + * @brief Return the hostname + * + * Will return the name of the host. + * + * @result the hostname. + */ +const char *hostname(void) { + static char buf[256]; + static int initialised = 0; + if (!initialised) { + buf[255] = '\0'; + if (gethostname(buf, 255)) sprintf(buf, "%s", "Unknown host"); + initialised = 1; + } + return buf; +} + /** * @brief Return the source code git revision * diff --git a/src/version.h b/src/version.h index 0d568f312cf775cf932d580a49da7c19c9e14b21..07a98c5e3175efd82110efc0104336d657edd469 100644 --- a/src/version.h +++ b/src/version.h @@ -22,6 +22,7 @@ const char* package_description(void); const char* package_version(void); +const char* hostname(void); const char* git_revision(void); const char* git_branch(void); const char* compiler_name(void); diff --git a/theory/SPH/EoS/eos.tex b/theory/SPH/EoS/eos.tex new file mode 100644 index 0000000000000000000000000000000000000000..cd21d2f5d8c3eb276bae64d91970031dc1924e67 --- /dev/null +++ b/theory/SPH/EoS/eos.tex @@ -0,0 +1,64 @@ + +In \swift, all transformations between thermodynamical quantities are +computed using a pre-defined equation of state (EoS) for the gas. This +allows user to switch EoS without having to modify the equations of +SPH. The definition of the EoS takes the form of simple relations +between thermodynamic quantities. All of them must be specified even +when they default to constants. + +%####################################################################################################### +\subsection{Ideal Gas} +\label{sec:eos:ideal} + +This is the simplest and most common equation of state. It corresponds +to a gas obeying a pressure law $P = (\gamma-1) \rho u$, where $u$ is +the gas' internal energy, $\rho$ its density and $\gamma$ the +(user defined) adiabatic index, often assumed to be $5/3$ or $7/5$. For such a gas, +we have the following relations between pressure ($P$), internal energy +($u$), density ($\rho$), sound speed ($c$) and entropic function ($A$): + +\begin{align} + P &= P_{\rm eos}(\rho, u) \equiv (\gamma-1) \rho + u \label{eq:eos:ideal:P_from_u} \\ + c &= c_{\rm eos}(\rho, u) \equiv \sqrt{\gamma (\gamma-1) + u} \label{eq:eos:ideal:c_from_u} \\ + A &= A_{\rm eos}(\rho, u) \equiv(\gamma-1) u + \rho^{\gamma-1} \label{eq:eos:ideal:A_from_u} \\ + ~ \nonumber\\ + P &= P_{\rm eos}(\rho, A) \equiv A + \rho^{\gamma} \label{eq:eos:ideal:P_from_A} \\ + c &= c_{\rm eos}(\rho, A) \equiv \sqrt{\gamma \rho^{\gamma-1} + A} \label{eq:eos:ideal:c_from_A} \\ + u &= u_{\rm eos}(\rho, A) \equiv A \rho^{\gamma-1} / + (\gamma-1) \label{eq:eos:ideal:u_from_A} +\end{align} +Note that highly optimised functions to compute $x^\gamma$ and other +useful powers of $\gamma$ have been implemented in \swift for the most +commonly used values of $\gamma$. + + +%####################################################################################################### +\subsection{Isothermal Gas} +\label{sec:eos:isothermal} + +An isothermal equation of state can be useful for certain test cases +or to speed-up the generation of glass files. This EoS corresponds to +a gas with contant (user-specified) thermal energy $u_{\rm cst}$. For such a gas, +we have the following (trivial) relations between pressure ($P$), internal energy +($u$), density ($\rho$), sound speed ($c$) and entropic function ($A$): + +\begin{align} + P &= P_{\rm eos}(\rho, u) \equiv (\gamma-1) \rho + u_{\rm cst} \label{eq:eos:isothermal:P_from_u} \\ + c &= c_{\rm eos}(\rho, u) \equiv \sqrt{\gamma (\gamma-1) + u_{\rm cst}} = c_{\rm cst} \label{eq:eos:isothermal:c_from_u} \\ + A &= A_{\rm eos}(\rho, u) \equiv(\gamma-1) u_{\rm cst} + \rho^{\gamma-1} \label{eq:eos:isothermal:A_from_u} \\ + ~ \nonumber\\ + P &= P_{\rm eos}(\rho, A) \equiv (\gamma-1) \rho + u_{\rm cst} \label{eq:eos:isothermal:P_from_A} \\ + c &= c_{\rm eos}(\rho, A) \equiv \sqrt{\gamma (\gamma-1) + u_{\rm cst}} = c_{\rm cst} \label{eq:eos:isothermal:c_from_A} \\ + u &= u_{\rm eos}(\rho, A) \equiv u_{\rm + cst} \label{eq:eos:isothermal:u_from_A} +\end{align} diff --git a/theory/SPH/EoS/eos_standalone.tex b/theory/SPH/EoS/eos_standalone.tex new file mode 100644 index 0000000000000000000000000000000000000000..14447f7e7f7194edd5ef0996ddf27a75d7323427 --- /dev/null +++ b/theory/SPH/EoS/eos_standalone.tex @@ -0,0 +1,22 @@ +\documentclass[fleqn, usenatbib, useAMS,a4paper]{mnras} +\usepackage{graphicx} +\usepackage{amsmath,paralist,xcolor,xspace,amssymb} +\usepackage{times} + +\newcommand{\swift}{{\sc Swift}\xspace} + +%opening +\title{Equation of state in SWIFT} +\author{Matthieu Schaller} + +\begin{document} + +\maketitle + +\input{eos} + +\bibliographystyle{mnras} +\bibliography{./bibliography.bib} + + +\end{document} diff --git a/theory/SPH/EoS/run.sh b/theory/SPH/EoS/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..72ef573fb26885563e89561d172c15a5a0081564 --- /dev/null +++ b/theory/SPH/EoS/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +python kernels.py +pdflatex -jobname=eos eos_standalone.tex +bibtex eos.aux +pdflatex -jobname=eos eos_standalone.tex +pdflatex -jobname=eos eos_standalone.tex diff --git a/theory/SPH/Flavours/bibliography.bib b/theory/SPH/Flavours/bibliography.bib new file mode 100644 index 0000000000000000000000000000000000000000..2bc11dacca90fe03d05c2e847503105d80eb1317 --- /dev/null +++ b/theory/SPH/Flavours/bibliography.bib @@ -0,0 +1,100 @@ +@ARTICLE{Price2012, + author = {{Price}, D.~J.}, + title = "{Smoothed particle hydrodynamics and magnetohydrodynamics}", + journal = {Journal of Computational Physics}, +archivePrefix = "arXiv", + eprint = {1012.1885}, + primaryClass = "astro-ph.IM", + year = 2012, + month = feb, + volume = 231, + pages = {759-794}, + doi = {10.1016/j.jcp.2010.12.011}, + adsurl = {http://adsabs.harvard.edu/abs/2012JCoPh.231..759P}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + +@ARTICLE{Springel2005, + author = {{Springel}, V.}, + title = "{The cosmological simulation code GADGET-2}", + journal = {\mnras}, + eprint = {astro-ph/0505010}, + keywords = {methods: numerical, galaxies: interactions, dark matter}, + year = 2005, + month = dec, + volume = 364, + pages = {1105-1134}, + doi = {10.1111/j.1365-2966.2005.09655.x}, + adsurl = {http://adsabs.harvard.edu/abs/2005MNRAS.364.1105S}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + +@ARTICLE{Hopkins2013, + author = {{Hopkins}, P.~F.}, + title = "{A general class of Lagrangian smoothed particle hydrodynamics methods and implications for fluid mixing problems}", + journal = {\mnras}, +archivePrefix = "arXiv", + eprint = {1206.5006}, + primaryClass = "astro-ph.IM", + keywords = {hydrodynamics, instabilities, turbulence, methods: numerical, cosmology: theory}, + year = 2013, + month = feb, + volume = 428, + pages = {2840-2856}, + doi = {10.1093/mnras/sts210}, + adsurl = {http://adsabs.harvard.edu/abs/2013MNRAS.428.2840H}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{Springel2002, + author = {{Springel}, V. and {Hernquist}, L.}, + title = "{Cosmological smoothed particle hydrodynamics simulations: the entropy equation}", + journal = {\mnras}, + eprint = {astro-ph/0111016}, + keywords = {methods: numerical, galaxies: evolution, galaxies: starburst, methods: numerical, galaxies: evolution, galaxies: starburst}, + year = 2002, + month = jul, + volume = 333, + pages = {649-664}, + doi = {10.1046/j.1365-8711.2002.05445.x}, + adsurl = {http://adsabs.harvard.edu/abs/2002MNRAS.333..649S}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{Balsara1995, + author = {{Balsara}, D.~S.}, + title = "{von Neumann stability analysis of smooth particle hydrodynamics--suggestions for optimal algorithms}", + journal = {Journal of Computational Physics}, + year = 1995, + volume = 121, + pages = {357-372}, + doi = {10.1016/S0021-9991(95)90221-X}, + adsurl = {http://adsabs.harvard.edu/abs/1995JCoPh.121..357B}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + +@ARTICLE{Schaller2015, + author = {{Schaller}, M. and {Dalla Vecchia}, C. and {Schaye}, J. and + {Bower}, R.~G. and {Theuns}, T. and {Crain}, R.~A. and {Furlong}, M. and + {McCarthy}, I.~G.}, + title = "{The EAGLE simulations of galaxy formation: the importance of the hydrodynamics scheme}", + journal = {\mnras}, +archivePrefix = "arXiv", + eprint = {1509.05056}, + keywords = {methods: numerical, galaxies: clusters: intracluster medium, galaxies: formation, cosmology: theory}, + year = 2015, + month = dec, + volume = 454, + pages = {2277-2291}, + doi = {10.1093/mnras/stv2169}, + adsurl = {http://adsabs.harvard.edu/abs/2015MNRAS.454.2277S}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + diff --git a/theory/SPH/Flavours/run.sh b/theory/SPH/Flavours/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..6d0791823d93f7feb8f469747f81b24032612523 --- /dev/null +++ b/theory/SPH/Flavours/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash +pdflatex -jobname=sph_flavours sph_flavours_standalone.tex +bibtex sph_flavours.aux +pdflatex -jobname=sph_flavours sph_flavours_standalone.tex +pdflatex -jobname=sph_flavours sph_flavours_standalone.tex diff --git a/theory/SPH/Flavours/sph_flavours.tex b/theory/SPH/Flavours/sph_flavours.tex new file mode 100644 index 0000000000000000000000000000000000000000..5fe1277373552d60607671299437a371e068169c --- /dev/null +++ b/theory/SPH/Flavours/sph_flavours.tex @@ -0,0 +1,382 @@ +For vector quantities, we define $\vec{a}_{ij} \equiv (\vec{a}_i - +\vec{a}_j)$. Barred quantities ($\bar a_{ij}$) correspond to the +average over two particles of a quantity: $\bar a_{ij} \equiv +\frac{1}{2}(a_i + a_j)$. To simplify notations, we also define the +vector quantity $\Wij \equiv \frac{1}{2}\left(W(\vec{x}_{ij}, h_i) + +\nabla_x W(\vec{x}_{ij},h_j)\right)$. + + +%####################################################################################################### +\subsection{\MinimalSPH} +\label{sec:sph:minimal} + +This is the simplest fully-conservative version of SPH using the +internal energy $u$ as a thermal variable that can be written +down. Its implementation in \swift should be understood as a text-book +example and template for more advanced implementations. A full +derivation and motivation for the equations can be found in the review +of \cite{Price2012}. Our implementation follows their equations (27), +(43), (44), (45), (101), (103) and (104) with $\beta=3$ and +$\alpha_u=0$. We summarize the equations here. + +\subsubsection{Density and other fluid properties (\nth{1} neighbour loop)} + +For a set of particles $i$ with positions $\vec{x}_i$ with velocities +$\vec{v}_i$, masses $m_i$, sthermal energy per unit mass $u_i$ and +smoothing length $h_i$, we compute the density for each particle: + +\begin{equation} + \rho_i \equiv \rho(\vec{x}_i) = \sum_j m_j W(\vec{x}_{ij}, h_i), + \label{eq:sph:minimal:rho} +\end{equation} +and the derivative of its density with respect to $h$: + +\begin{equation} + \label{eq:sph:minimal:rho_dh} + \rho_{\partial h_i} \equiv \dd{\rho}{h}(\vec{x}_i) = \sum_j m_j \dd{W}{h}(\vec{x}_{ij} + , h_i). +\end{equation} +The gradient terms (``h-terms'') can then be computed from the density +and its derivative: + +\begin{equation} + f_i \equiv \left(1 + \frac{h_i}{3\rho_i}\rho_{\partial h_i} + \right)^{-1}. + \label{eq:sph:minimal:f_i} +\end{equation} +Using the pre-defined equation of state, the pressure $P_i$ and the sound +speed $c_i$ at the location of particle $i$ can now be computed from +$\rho_i$ and $u_i$: + +\begin{align} + P_i &= P_{\rm eos}(\rho_i, u_i), \label{eq:sph:minimal:P}\\ + c_i &= c_{\rm eos}(\rho_i, u_i). \label{eq:sph:minimal:c} +\end{align} + +\subsubsection{Hydrodynamical accelerations (\nth{2} neighbour loop)} + +We can then proceed with the second loop over +neighbours. The signal velocity $v_{{\rm sig},ij}$ between two particles is given by + +\begin{align} + \mu_{ij} &= + \begin{cases} + \frac{\vec{v}_{ij} \cdot \vec{x}_{ij}}{|\vec{x}_{ij}|} & \rm{if}~ + \vec{v}_{ij} \cdot \vec{x}_{ij} < 0,\\ + 0 &\rm{otherwise}, \\ + \end{cases}\nonumber\\ + v_{{\rm sig},ij} &= c_i + c_j - 3\mu_{ij}. \label{eq:sph:minimal:v_sig} +\end{align} +We also use these two quantities for the calculation of the viscosity term: + +\begin{equation} +\nu_{ij} = -\frac{1}{2}\frac{\alpha \mu_{ij} v_{{\rm + sig},ij}}{\bar\rho_{ij}} + \label{eq:sph:minimal:nu_ij} +\end{equation} +The fluid accelerations are then given by + +\begin{align} + \frac{d\vec{v}_i}{dt} = -\sum_j m_j &\left[\frac{f_iP_i}{\rho_i^2} + \nabla_x W(\vec{x}_{ij}, h_i) \nonumber + +\frac{f_jP_j}{\rho_j^2}\nabla_x W(\vec{x}_{ij},h_j)\right. \\ + &+ \left. \bigg.\nu_{ij} \Wij \right], \label{eq:sph:minimal:dv_dt} +\end{align} +and the change in internal energy, + +\begin{align} + \frac{du_i}{dt} = \sum_j m_j &\left[\frac{f_iP_i}{\rho_i^2} \vec{v}_{ij} + \cdot \nabla_x W(\vec{x}_{ij}, h_i) \right. \label{eq:sph:minimal:du_dt}\\ + &+\left. \frac{1}{2}\nu_{ij}\vec{v}_{ij}\cdot\Big. \Wij\right], \nonumber +\end{align} +where in both cases the first line corresponds to the standard SPH +term and the second line to the viscuous accelerations. + +We also compute an estimator of the change in smoothing length to be +used in the prediction step. This is an estimate of the local +divergence of the velocity field compatible with the accelerations +computed above: + +\begin{equation} + \frac{dh_i}{dt} = -\frac{1}{3}h_i \sum_j \frac{m_j}{\rho_j} + \vec{v}_{ij}\cdot \nabla_x W(\vec{x}_{ij}, h_i). + \label{eq:sph:minimal:dh_dt} +\end{equation} +and update the signal velocity of the particles: + +\begin{equation} + v_{{\rm sig},i} = \max_j \left( v_{{\rm sig},ij} \right). + \label{eq:sph:minimal:v_sig_update} +\end{equation} +All the quantities required for time integration have now been obtained. + +\subsubsection{Time integration} + +For each particle, we compute a time-step given by the CFL condition: + +\begin{equation} + \Delta t = 2 C_{\rm CFL} \frac{H_i}{v_{{\rm sig},i}}, + \label{eq:sph:minimal:dt} +\end{equation} +where $C_{\rm CFL}$ is a free dimensionless parameter and $H_i = \gamma h_i$ is the +kernel support size. Particles can then be ``kicked'' forward in time: +\begin{align} + \vec{v}_i &\rightarrow \vec{v}_i + \frac{d\vec{v}_i}{dt} \Delta t \label{eq:sph:minimal:kick_v}\\ + u_i &\rightarrow u_i + \frac{du_i}{dt} \Delta t \label{eq:sph:minimal:kick_u}\\ + P_i &\rightarrow P_{\rm eos}\left(\rho_i, u_i\right) \label{eq:sph:minimal:kick_P}, \\ + c_i &\rightarrow c_{\rm eos}\left(\rho_i, u_i\right) \label{eq:sph:minimal:kick_c}, +\end{align} +where we used the pre-defined equation of state to compute the new +value of the pressure and sound-speed. + +\subsubsection{Particle properties prediction} + +Inactive particles need to have their quantities predicted forward in +time in the ``drift'' operation. We update them as follows: + +\begin{align} + \vec{x}_i &\rightarrow \vec{x}_i + \vec{v}_i \Delta t \label{eq:sph:minimal:drift_x} \\ + h_i &\rightarrow h_i \exp\left(\frac{1}{h_i} \frac{dh_i}{dt} + \Delta t\right), \label{eq:sph:minimal:drift_h}\\ + \rho_i &\rightarrow \rho_i \exp\left(-\frac{3}{h_i} \frac{dh_i}{dt} + \Delta t\right), \label{eq:sph:minimal:drift_rho} \\ + P_i &\rightarrow P_{\rm eos}\left(\rho_i, u_i + \frac{du_i}{dt} \Delta t\right), \label{eq:sph:minimal:drift_P}\\ + c_i &\rightarrow c_{\rm eos}\left(\rho_i, u_i + \frac{du_i}{dt} + \Delta t\right) \label{eq:sph:minimal:drift_c}, +\end{align} +where, as above, the last two updated quantities are obtained using +the pre-defined equation of state. Note that the thermal energy $u_i$ +itself is \emph{not} updated. + +%####################################################################################################### + +\subsection{Gadget-2 SPH} +\label{sec:sph:gadget2} + +This flavour of SPH is the one implemented in the \gadget-2 code +\citep{Springel2005}. The basic equations were derived by +\cite{Springel2002} and also includes a \cite{Balsara1995} switch for +the suppression of viscosity. The implementation here follows closely the +presentation of \cite{Springel2005}. Specifically, we use their equations (5), (7), +(8), (9), (10), (13), (14) and (17). We summarize them here for completeness. + +\subsubsection{Density and other fluid properties (\nth{1} neighbour loop)} + +For a set of particles $i$ with positions $\vec{x}_i$ with velocities +$\vec{v}_i$, masses $m_i$, entropic function per unit mass $A_i$ and +smoothing length $h_i$, we compute the density, derivative of the density with respect +to $h$ and the ``h-terms'' in a similar way to the minimal-SPH case +(Eq. \ref{eq:sph:minimal:rho}, \ref{eq:sph:minimal:rho_dh} and +\ref{eq:sph:minimal:f_i}). From these the pressure and sound-speed can +be computed using the pre-defined equation of state: + +\begin{align} + P_i &= P_{\rm eos}(\rho_i, A_i), \label{eq:sph:gadget2:P}\\ + c_i &= c_{\rm eos}(\rho_i, A_i). \label{eq:sph:gadget2:c} +\end{align} +We additionally compute the divergence and +curl of the velocity field using standard SPH expressions: + +\begin{align} + \nabla\cdot\vec{v}_i &\equiv\nabla\cdot \vec{v}(\vec{x}_i) = \frac{1}{\rho_i} \sum_j m_j + \vec{v}_{ij}\cdot\nabla_x W(\vec{x}_{ij}, h_i) \label{eq:sph:gadget2:div_v},\\ + \nabla\times\vec{v}_i &\equiv \nabla\times \vec{v}(\vec{x}_i) = \frac{1}{\rho_i} \sum_j m_j + \vec{v}_{ij}\times\nabla_x W(\vec{x}_{ij}, h_i) \label{eq:sph:gadget2:rot_v}. +\end{align} +These are used to construct the \cite{Balsara1995} switch $B_i$: + +\begin{equation} + B_i = \frac{|\nabla\cdot\vec{v}_i|}{|\nabla\cdot\vec{v}_i| + + |\nabla\times\vec{v}_i| + 10^{-4}c_i / h_i}, \label{eq:sph:gadget2:balsara} +\end{equation} +where the last term in the denominator is added to prevent numerical instabilities. + +\subsubsection{Hydrodynamical accelerations (\nth{2} neighbour loop)} + +The accelerations are computed in a similar fashion to the minimal-SPH +case with the exception of the viscosity term which is now modified to +include the switch. Instead of Eq. \ref{eq:sph:minimal:nu_ij}, we get: + +\begin{equation} +\nu_{ij} = -\frac{1}{2}\frac{\alpha \bar B_{ij} \mu_{ij} v_{{\rm sig},ij}}{\bar\rho_{ij}}, + \label{eq:sph:gadget2:nu_ij} +\end{equation} +whilst equations \ref{eq:sph:minimal:v_sig}, +\ref{eq:sph:minimal:dv_dt}, \ref{eq:sph:minimal:dh_dt} and +\ref{eq:sph:minimal:v_sig_update} remain unchanged. The only other +change is the equation of motion for the thermodynamic variable which +now has to be describing the evolution of the entropic function and +not the evolution of the thermal energy. Instead of +eq. \ref{eq:sph:minimal:du_dt}, we have + +\begin{equation} +\frac{dA_i}{dt} = \frac{1}{2} A_{\rm eos}\left(\rho_i, \sum_j +m_j \nu_{ij}\vec{v}_{ij}\cdot \Wij\right), +\end{equation} +where we made use of the pre-defined equation of state relating +density and internal energy to entropy. + +\subsubsection{Time integration} + +The time-step condition is identical to the \MinimalSPH case +(Eq. \ref{eq:sph:minimal:dt}). The same applies to the integration +forward in time (Eq. \ref{eq:sph:minimal:kick_v} to +\ref{eq:sph:minimal:kick_c}) with the exception of the change in +internal energy (Eq. \ref{eq:sph:minimal:kick_u}) which gets replaced +by an integration for the the entropy: + + +\begin{align} + \vec{v}_i &\rightarrow \vec{v}_i + \frac{d\vec{v}_i}{dt} \Delta t \label{eq:sph:gadget2:kick_v}\\ + A_i &\rightarrow A_i + \frac{dA_i}{dt} \Delta t \label{eq:sph:gadget2:kick_A}\\ + P_i &\rightarrow P_{\rm eos}\left(\rho_i, A_i\right) \label{eq:sph:gadget2:kick_P}, \\ + c_i &\rightarrow c_{\rm eos}\left(\rho_i, A_i\right) \label{eq:sph:gadget2:kick_c}, +\end{align} +where, once again, we made use of the equation of state relating +thermodynamical quantities. + +\subsubsection{Particle properties prediction} + +The prediction step is also identical to the \MinimalSPH case with the +entropic function replacing the thermal energy. + +\begin{align} + \vec{x}_i &\rightarrow \vec{x}_i + \vec{v}_i \Delta t \label{eq:sph:gadget2:drift_x} \\ + h_i &\rightarrow h_i \exp\left(\frac{1}{h_i} \frac{dh_i}{dt} + \Delta t\right), \label{eq:sph:gadget2:drift_h}\\ + \rho_i &\rightarrow \rho_i \exp\left(-\frac{3}{h_i} \frac{dh_i}{dt} + \Delta t\right), \label{eq:sph:gadget2:drift_rho} \\ + P_i &\rightarrow P_{\rm eos}\left(\rho_i, A_i + \frac{dA_i}{dt} \Delta t\right), \label{eq:sph:gadget2:drift_P}\\ + c_i &\rightarrow c_{\rm eos}\left(\rho_i, A_i + \frac{dA_i}{dt} + \Delta t\right) \label{eq:sph:gadget2:drift_c}, +\end{align} +where, as above, the last two updated quantities are obtained using +the pre-defined equation of state. Note that the entropic function $A_i$ +itself is \emph{not} updated. + + +%####################################################################################################### + +\subsection{Pressure-Entropy SPH} +\label{sec:sph:pe} + +This flavour of SPH follows the implementation described in section +2.2.3 of \cite{Hopkins2013}. We start with their equations (17), (19), +(21) and (22) but modify them for efficiency and generality +reasons. We also use the same \cite{Balsara1995} viscosity switch as +in the \GadgetSPH scheme (Sec. \ref{sec:sph:gadget2}). + +\subsubsection{Density and other fluid properties (\nth{1} neighbour loop)} + +For a set of particles $i$ with positions $\vec{x}_i$ with velocities +$\vec{v}_i$, masses $m_i$, entropic function per unit mass $A_i$ and +smoothing length $h_i$, we compute the density, derivative of the +density with respect to $h$, divergence and curl of velocity field in +a similar fashion to the \GadgetSPH scheme. From the basic particle +properties we construct an additional temporary quantity + +\begin{equation} + \tilde{A_i} \equiv A_i^{1/\gamma}, + \label{eq:sph:pe:A_tilde} +\end{equation} +which enters many equations. This allows us to construct the +entropy-weighted density $\bar\rho_i$: + +\begin{equation} + \bar\rho_i = \frac{1}{\tilde{A_i}} \sum_j m_j \tilde{A_j} W(\vec{x}_{ij}, h_i), + \label{eq:sph:pe:rho_bar} +\end{equation} +which can then be used to construct an entropy-weighted sound-speed +and pressure based on our assumed equation of state: + +\begin{align} + \bar c_i &= c_{\rm eos}(\bar\rho_i, A_i), \label{eq:sph:pe:c_bar}\\ + \bar P_i &= P_{\rm eos}(\bar\rho_i, A_i), \label{eq:sph:pe:P_bar} +\end{align} +and estimate the derivative of this later quantity with respect to the +smoothing length using: + +\begin{equation} +\bar P_{\partial h_i} \equiv \dd{\bar{P}}{h}(\vec{x}_i) = \sum_j m_j +\tilde{A_j} \dd{W}{h}(\vec{x}_{ij}), \label{eq:sph:pe:P_dh} +\end{equation} +The gradient terms (``h-terms'') are then obtained by combining $\bar +P_{\partial h_i}$ and $\rho_{\partial h_i}$ +(eq. \ref{eq:sph:minimal:rho_dh}): + +\begin{equation} + f_i \equiv \left(\frac{h_i}{3\rho_i}\bar P_{\partial + h_i}\right)\left(1 + \frac{h_i}{3\rho_i}\rho_{\partial + h_i}\right)^{-1}. +\end{equation} + +\subsubsection{Hydrodynamical accelerations (\nth{2} neighbour loop)} + +The accelerations are given by the following term: + +\begin{align} + \frac{d\vec{v}_i}{dt} = -\sum_j m_j &\left[\frac{\bar P_i}{\bar\rho_i^2} \left(\frac{\tilde A_j}{\tilde A_i} - \frac{f_i}{\tilde A_i}\right)\nabla_x W(\vec{x}_{ij}, h_i) \right. \nonumber \\ + &+\frac{P_j}{\rho_j^2} \left(\frac{\tilde A_i}{\tilde A_j} - \frac{f_j}{\tilde A_j}\right)\nabla_x W(\vec{x}_{ij},h_j) \\ + &+ \left. \bigg.\nu_{ij} \Wij \right], \label{eq:sph:pe:dv_dt} +\end{align} +where the viscosity term $\nu_{ij}$ has been computed as in +the \GadgetSPH case (Eq. \ref{eq:sph:gadget2:balsara} +and \ref{eq:sph:gadget2:nu_ij}). For completeness, the equation of +motion for the entropy is + +\begin{equation} +\frac{dA_i}{dt} = \frac{1}{2} A_{\rm eos}\left(\rho_i, \sum_j +m_j \nu_{ij}\vec{v}_{ij}\cdot \Wij\right). +\end{equation} + +\subsubsection{Time integration} + +The time-step condition is identical to the \MinimalSPH case +(Eq. \ref{eq:sph:minimal:dt}). The same applies to the integration +forward in time (Eq. \ref{eq:sph:minimal:kick_v} to +\ref{eq:sph:minimal:kick_c}) with the exception of the change in +internal energy (Eq. \ref{eq:sph:minimal:kick_u}) which gets replaced +by an integration for the the entropy: + +\begin{align} + \vec{v}_i &\rightarrow \vec{v}_i + \frac{d\vec{v}_i}{dt} \Delta t \label{eq:sph:pe:kick_v}\\ + A_i &\rightarrow A_i + \frac{dA_i}{dt} \Delta t \label{eq:sph:pe:kick_A}\\ + P_i &\rightarrow P_{\rm eos}\left(\rho_i, A_i\right) \label{eq:sph:pe:kick_P}, \\ + c_i &\rightarrow c_{\rm eos}\left(\rho_i, + A_i\right) \label{eq:sph:pe:kick_c}, \\ + \tilde A_i &= A_i^{1/\gamma} +\end{align} +where, once again, we made use of the equation of state relating +thermodynamical quantities. + + +\subsubsection{Particle properties prediction} + +The prediction step is also identical to the \MinimalSPH case with the +entropic function replacing the thermal energy. + +\begin{align} + \vec{x}_i &\rightarrow \vec{x}_i + \vec{v}_i \Delta t \label{eq:sph:pe:drift_x} \\ + h_i &\rightarrow h_i \exp\left(\frac{1}{h_i} \frac{dh_i}{dt} + \Delta t\right), \label{eq:sph:pe:drift_h}\\ + \rho_i &\rightarrow \rho_i \exp\left(-\frac{3}{h_i} \frac{dh_i}{dt} + \Delta t\right), \label{eq:sph:pe:drift_rho} \\ + \tilde A_i &\rightarrow \left(A_i + \frac{dA_i}{dt} + \Delta t\right)^{1/\gamma} \label{eq:sph:pe:drift_A_tilde}, \\ + P_i &\rightarrow P_{\rm eos}\left(\rho_i, A_i + \frac{dA_i}{dt} \Delta t\right), \label{eq:sph:pe:drift_P}\\ + c_i &\rightarrow c_{\rm eos}\left(\rho_i, A_i + \frac{dA_i}{dt} + \Delta t\right) \label{eq:sph:pe:drift_c}, +\end{align} +where, as above, the last two updated quantities are obtained using +the pre-defined equation of state. Note that the entropic function $A_i$ +itself is \emph{not} updated. + +\subsection{Pressure-Energy SPH} +\label{sec:sph:pu} + +Section 2.2.2 of \cite{Hopkins2013}.\\ \tbd +\subsection{Anarchy SPH} +Dalla Vecchia (\textit{in prep.}), also described in section 2.2.2 of +\cite{Schaller2015}.\\ +\label{sec:sph:anarchy} +\tbd diff --git a/theory/SPH/Flavours/sph_flavours_standalone.tex b/theory/SPH/Flavours/sph_flavours_standalone.tex new file mode 100644 index 0000000000000000000000000000000000000000..20c9f1451c2499d661bfbb1022bfd34c02fde4dd --- /dev/null +++ b/theory/SPH/Flavours/sph_flavours_standalone.tex @@ -0,0 +1,31 @@ +\documentclass[fleqn, usenatbib, useAMS]{mnras} +\usepackage{graphicx} +\usepackage{amsmath,paralist,xcolor,xspace,amssymb} +\usepackage{times} +\usepackage[super]{nth} + +\newcommand{\swift}{{\sc swift}\xspace} +\newcommand{\gadget}{{\sc gadget}\xspace} +\newcommand{\dd}[2]{\frac{\partial #1}{\partial #2}} +\renewcommand{\vec}[1]{{\mathbf{#1}}} +\newcommand{\Wij}{\overline{\nabla_xW_{ij}}} +\newcommand{\tbd}{\textcolor{red}{TO BE DONE}} + +\newcommand{\MinimalSPH}{\textsc{minimal-sph}\xspace} +\newcommand{\GadgetSPH}{\textsc{gadget-sph}\xspace} +\newcommand{\PESPH}{\textsc{pe-sph}\xspace} +%\setlength{\mathindent}{0pt} + +%opening +\title{SPH flavours in SWIFT} +\author{Matthieu Schaller} + +\begin{document} + +\maketitle +\input{sph_flavours} + +\bibliographystyle{mnras} +\bibliography{./bibliography} + +\end{document} diff --git a/theory/SPH/Kernels/bibliography.bib b/theory/SPH/Kernels/bibliography.bib new file mode 100644 index 0000000000000000000000000000000000000000..93d484b07c625be71e338292376a3514243af725 --- /dev/null +++ b/theory/SPH/Kernels/bibliography.bib @@ -0,0 +1,18 @@ +@ARTICLE{Dehnen2012, + author = {{Dehnen}, W. and {Aly}, H.}, + title = "{Improving convergence in smoothed particle hydrodynamics simulations without pairing instability}", + journal = {\mnras}, +archivePrefix = "arXiv", + eprint = {1204.2471}, + primaryClass = "astro-ph.IM", + keywords = {hydrodynamics, methods: numerical }, + year = 2012, + month = sep, + volume = 425, + pages = {1068-1082}, + doi = {10.1111/j.1365-2966.2012.21439.x}, + adsurl = {http://adsabs.harvard.edu/abs/2012MNRAS.425.1068D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + diff --git a/theory/SPH/kernel/kernel_definitions.tex b/theory/SPH/Kernels/kernel_definitions.tex similarity index 83% rename from theory/SPH/kernel/kernel_definitions.tex rename to theory/SPH/Kernels/kernel_definitions.tex index 65ef5c82ad928398a69b4528c7914829f146ad6e..b90b0623719f9acf311600d3c9d9157fabbc1865 100644 --- a/theory/SPH/kernel/kernel_definitions.tex +++ b/theory/SPH/Kernels/kernel_definitions.tex @@ -1,24 +1,8 @@ -\documentclass[a4paper]{mnras} -\usepackage[utf8]{inputenc} -\usepackage{amsmath} -\usepackage{graphicx} -\usepackage{xspace} +We follow the definitions and notation of +\cite{Dehnen2012}. Motivation for all the material given here can be +found in their paper. -\newcommand{\swift}{{\sc Swift}\xspace} - - - -%opening -\title{SPH kernels in SWIFT} -\author{Matthieu Schaller} - -\begin{document} - -\maketitle - -In here we follow the definitions of Dehnen \& Aly 2012. - -\section{General Definitions} +\subsection{General Definitions} The desirable properties of an SPH kernels $W(\vec{x},h)$ are: \begin{enumerate} @@ -28,14 +12,12 @@ The desirable properties of an SPH kernels $W(\vec{x},h)$ are: \item $W(\vec{x},h)$ should have a finite support and be cheap to compute. \end{enumerate} - As a consequence, the smoothing kernels used in SPH can hence be written (in 3D) as \begin{equation} W(\vec{x},h) \equiv \frac{1}{H^3}f\left(\frac{|\vec{x}|}{H}\right), \end{equation} - where $H=\gamma h$ is defined below and $f(u)$ is a dimensionless function, usually a low-order polynomial, such that $f(u \geq 1) = 0$ and normalised such that @@ -43,7 +25,6 @@ and normalised such that \begin{equation} \int f(|\vec{u}|){\rm d}^3u = 1. \end{equation} - $H$ is the kernel's support radius and is used as the ``smoothing length'' in the Gadget code( {i.e.} $H=h$). This definition is, however, not very physical and makes comparison of kernels at a @@ -56,13 +37,11 @@ standard deviation \sigma^2 \equiv \frac{1}{3}\int \vec{u}^2 W(\vec{u},h) {\rm d}^3u. \label{eq:sph:sigma} \end{equation} - The smoothing length is then: \begin{equation} h\equiv2\sigma. \label{eq:sph:h} \end{equation} - Each kernel, {\it i.e.} defintion of $f(u)$, will have a different ratio $\gamma = H/h$. So for a \emph{fixed resolution} $h$, one will have different kernel support sizes, $H$, and a different number of @@ -73,18 +52,15 @@ separation: \begin{equation} h = \eta \langle x \rangle = \eta \left(\frac{m}{\rho}\right)^{1/3}, \end{equation} - where $\rho$ is the local density of the fluid and $m$ the SPH particle mass. - The (weighted) number of neighbours within the kernel support is a -useful quantity to use in implementations of SPH. It is defined as (in -3D) +useful quantity to use in implementations of SPH. It is defined (in +3D) as: \begin{equation} N_{\rm ngb} \equiv \frac{4}{3}\pi \left(\frac{H}{h}\eta\right)^3. \end{equation} - Once the fixed ratio $\gamma= H/h$ is known (via equations \ref{eq:sph:sigma} and \ref{eq:sph:h}) for a given kernel, the number of neighbours only depends on the resolution parameter $\eta$. For @@ -92,28 +68,27 @@ the usual cubic spline kernel (see below), setting the simulation resolution to $\eta=1.2348$ yields the commonly used value $N_{\rm ngb} = 48$. -\section{Kernels available in \swift} +\subsection{Kernels available in \swift} The \swift kernels are split into two categories, the B-splines ($M_{4,5,6}$) and the Wendland kernels ($C2$, $C4$ and $C6$). In all cases we impose $f(u>1) = 0$.\\ - The spline kernels are defined as: \begin{align} - f(u) &= C M_n(u), \\ + f(u) &= C M_n(u), \nonumber\\ M_n(u) &\equiv \frac{1}{2\pi} \int_{-\infty}^{\infty} \left(\frac{\sin\left(k/n\right)}{k/n}\right)^n\cos\left(ku\right){\rm - d}k, + d}k \nonumber, \end{align} - -whilst the Wendland kernels read +whilst the Wendland kernels are constructed from: \begin{align} - f(u) &= C \Psi_{i,j}(u), \\ - \Psi_{i,j}(u) &\equiv \mathcal{I}^k\left[\max\left(1-u,0\right)\right],\\ - \mathcal{I}[f](u) &\equiv \int_u^\infty f\left(k\right)k{\rm d}k. + f(u) &= C \Psi_{i,j}(u), \nonumber\\ + \Psi_{i,j}(u) &\equiv + \mathcal{I}^k\left[\max\left(1-u,0\right)\right], \nonumber\\ + \mathcal{I}[f](u) &\equiv \int_u^\infty f\left(k\right)k{\rm d}k. \nonumber \end{align} \subsubsection{Cubic spline ($M_4$) kernel} @@ -208,11 +183,13 @@ All kernels available in \swift are shown on Fig.~\ref{fig:sph:kernels}. \includegraphics[width=\columnwidth]{kernels.pdf} \caption{The kernel functions available in \swift for a mean inter-particle separation $\langle x\rangle=1.5$ and a resolution - $\eta=1.2348$. The corresponding kernel support radii $H$ (shown by - arrows) and number of neighours $N_{\rm ngb}$ are indicated on the - figure. A Gaussian kernel with the same smoothing length is shown - for comparison. Note that all these kernels have the \emph{same - resolution} despite having vastly different number of neighbours.} + $\eta=1.2348$ shown in linear (top) and log (bottom) units to + highlight their differences. The corresponding kernel support radii + $H$ (shown by arrows) and number of neighours $N_{\rm ngb}$ are + indicated on the figure. A Gaussian kernel with the same smoothing + length is shown for comparison. Note that all these kernels have + the \emph{same resolution} despite having vastly different number of + neighbours.} \label{fig:sph:kernels} \end{figure} @@ -226,16 +203,15 @@ All kernels available in \swift are shown on Fig.~\ref{fig:sph:kernels}. \end{figure} -\section{Kernel Derivatives} +\subsection{Kernel Derivatives} The derivatives of the kernel function have relatively simple expressions and are shown on Fig.~\ref{fig:sph:kernel_derivatives}: -\begin{eqnarray*} - \vec\nabla_x W(\vec{x},h) &=& \frac{1}{h^4}f'\left(\frac{|\vec{x}|}{h}\right) \frac{\vec{x}}{|\vec{x}|}, \\ - \frac{\partial W(\vec{x},h)}{\partial h} &=&- \frac{1}{h^4}\left[3f\left(\frac{|\vec{x}|}{h}\right) + +\begin{align} + \vec\nabla_x W(\vec{x},h) &= \frac{1}{h^4}f'\left(\frac{|\vec{x}|}{h}\right) \frac{\vec{x}}{|\vec{x}|}, \\ + \frac{\partial W(\vec{x},h)}{\partial h} &=- \frac{1}{h^4}\left[3f\left(\frac{|\vec{x}|}{h}\right) + \frac{|\vec{x}|}{h}f'\left(\frac{|\vec{x}|}{h}\right)\right]. -\end{eqnarray*} +\end{align} Note that for all the kernels listed above, $f'(0) = 0$. -\end{document} diff --git a/theory/SPH/Kernels/kernel_definitions_standalone.tex b/theory/SPH/Kernels/kernel_definitions_standalone.tex new file mode 100644 index 0000000000000000000000000000000000000000..cc9f27c91747d85c0516365e12f42f27a247e8b3 --- /dev/null +++ b/theory/SPH/Kernels/kernel_definitions_standalone.tex @@ -0,0 +1,22 @@ +\documentclass[fleqn, usenatbib, useAMS,a4paper]{mnras} +\usepackage{graphicx} +\usepackage{amsmath,paralist,xcolor,xspace,amssymb} +\usepackage{times} + +\newcommand{\swift}{{\sc Swift}\xspace} + +%opening +\title{SPH kernels in SWIFT} +\author{Matthieu Schaller} + +\begin{document} + +\maketitle + +\input{kernel_definitions} + +\bibliographystyle{mnras} +\bibliography{./bibliography.bib} + + +\end{document} diff --git a/theory/SPH/kernel/kernels.py b/theory/SPH/Kernels/kernels.py similarity index 52% rename from theory/SPH/kernel/kernels.py rename to theory/SPH/Kernels/kernels.py index 9f0b853c6e9fce7180361457373f745cc2b8e804..069bfd1ea25c8b99b894eadb46f93b9656ba9c7e 100644 --- a/theory/SPH/kernel/kernels.py +++ b/theory/SPH/Kernels/kernels.py @@ -26,31 +26,53 @@ from scipy.optimize import fsolve from matplotlib.font_manager import FontProperties import numpy -params = { - 'axes.labelsize': 10, - 'axes.titlesize': 8, - 'font.size': 10, - 'legend.fontsize': 9, - 'xtick.labelsize': 10, - 'ytick.labelsize': 10, - 'xtick.major.pad': 2.5, - 'ytick.major.pad': 2.5, - 'text.usetex': True, -'figure.figsize' : (4.15,4.15), -'figure.subplot.left' : 0.14, -'figure.subplot.right' : 0.99, -'figure.subplot.bottom' : 0.06, -'figure.subplot.top' : 0.99, +params = {'axes.labelsize': 9, +'axes.titlesize': 10, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 9, +'ytick.labelsize': 9, +'text.usetex': True, +'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.17, +'figure.subplot.right' : 0.99 , +'figure.subplot.bottom' : 0.08 , +'figure.subplot.top' : 0.99 , 'figure.subplot.wspace' : 0. , 'figure.subplot.hspace' : 0. , 'lines.markersize' : 6, -'lines.linewidth' : 1.5, +'lines.linewidth' : 3., 'text.latex.unicode': True } rcParams.update(params) rc('font',**{'family':'sans-serif','sans-serif':['Times']}) +# params = { +# 'axes.labelsize': 10, +# 'axes.titlesize': 8, +# 'font.size': 10, +# 'legend.fontsize': 9, +# 'xtick.labelsize': 10, +# 'ytick.labelsize': 10, +# 'xtick.major.pad': 2.5, +# 'ytick.major.pad': 2.5, +# 'text.usetex': True, +# 'figure.figsize' : (4.15,4.15), +# 'figure.subplot.left' : 0.14, +# 'figure.subplot.right' : 0.99, +# 'figure.subplot.bottom' : 0.06, +# 'figure.subplot.top' : 0.99, +# 'figure.subplot.wspace' : 0. , +# 'figure.subplot.hspace' : 0. , +# 'lines.markersize' : 6, +# 'lines.linewidth' : 1.5, +# 'text.latex.unicode': True +# } +# rcParams.update(params) +# rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + + #Parameters eta = 1.2348422195325 # Resolution (Gives 48 neighbours for a cubic spline kernel) dx = 1.5#4 #2.7162 # Mean inter-particle separation @@ -131,17 +153,17 @@ def W_WendlandC6(r): return C_WendlandC6 * wendlandC6(r / H_WendlandC6) / H # PLOT STUFF figure() subplot(211) -xx = linspace(0., 5*h, 1000) +xx = linspace(0., 5*h, 100) maxY = 1.2*Gaussian(0, h) # Plot the kernels -plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm %14s\\quad H=\\infty}$"%("Gaussian~~~~~~")) -plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm %14s\\quad H=%4.3f}$"%("Cubic~spline~~", H_cubic)) -plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm %14s\\quad H=%4.3f}$"%("Quartic~spline", H_quartic)) -plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm %14s\\quad H=%4.3f}$"%("Quintic~spline", H_quintic)) -plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C2~", H_WendlandC2)) -plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C4~", H_WendlandC4)) -plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C6~", H_WendlandC6)) +plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm %14s~ H=\\infty}$"%("Gaussian~~~~~~")) +plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm %14s~ H=%4.3f}$"%("Cubic~spline~~", H_cubic), lw=1.5) +plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm %14s~ H=%4.3f}$"%("Quartic~spline", H_quartic), lw=1.5) +plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm %14s~ H=%4.3f}$"%("Quintic~spline", H_quintic), lw=1.5) +plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm %14s~ H=%4.3f}$"%("Wendland~C2~", H_WendlandC2), lw=1.5) +plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm %14s~ H=%4.3f}$"%("Wendland~C4~", H_WendlandC4), lw=1.5) +plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm %14s~ H=%4.3f}$"%("Wendland~C6~", H_WendlandC6), lw=1.5) # Indicate the position of H arrow(H_cubic, 0.12*maxY , 0., -0.12*maxY*0.9, fc='b', ec='b', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3) @@ -152,33 +174,33 @@ arrow(H_WendlandC4, 0.12*maxY , 0., -0.12*maxY*0.9, fc='m', ec='m', length_inclu arrow(H_WendlandC6, 0.12*maxY , 0., -0.12*maxY*0.9, fc='y', ec='y', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3) # Show h -plot([h, h], [0., maxY], 'k:', linewidth=0.5) -text(h, maxY*0.35, "$h\\equiv\\eta\\langle x\\rangle = %.4f$"%h, rotation=90, backgroundcolor='w', ha='center', va='bottom') +plot([h, h], [0., 0.63*maxY], 'k:', linewidth=0.5) +text(h, maxY*0.65, "$h\\equiv\\eta\\langle x\\rangle$", rotation=90, ha='center', va='bottom', fontsize=9) # Show sigma -plot([h/2, h/2], [0., maxY], 'k:', linewidth=0.5) -text(h/2, maxY*0.05, "$\\sigma\\equiv h/2$", rotation=90, backgroundcolor='w', ha='center', va='bottom') +plot([h/2, h/2], [0., 0.63*maxY], 'k:', linewidth=0.5) +text(h/2, maxY*0.65, "$\\sigma\\equiv h/2$", rotation=90, ha='center', va='bottom', fontsize=9) # Show <x> -plot([dx, dx], [0., maxY], 'k:', linewidth=0.5) -text(dx, maxY*0.35, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, backgroundcolor='w', ha='center', va='bottom') +plot([dx, dx], [0., 0.63*maxY], 'k:', linewidth=0.5) +text(dx, maxY*0.65, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, ha='center', va='bottom', fontsize=9) xlim(0., 2.5*h) ylim(0., maxY) gca().xaxis.set_ticklabels([]) ylabel("$W(r,h)$", labelpad=1.5) -legend(loc="upper right", handlelength=1.2, handletextpad=0.2) +legend(loc="upper right", frameon=False, handletextpad=0.1, handlelength=1.2, fontsize=8) # Same but now in log space subplot(212, yscale="log") -plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$") -plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$") -plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$") -plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$") -plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$") -plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$") -plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$") +plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$", lw=1.5) +plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$", lw=1.5) +plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$", lw=1.5) +plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$", lw=1.5) +plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$", lw=1.5) +plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$", lw=1.5) +plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$", lw=1.5) # Show h plot([h, h], [0., 1.], 'k:', linewidth=0.5) @@ -191,21 +213,22 @@ plot([dx, dx], [0., 1.], 'k:', linewidth=0.5) # Show plot properties -text(h/5., 1e-3, "$\\langle x \\rangle = %3.1f$"%(dx), va="top", backgroundcolor='w') -text(h/5.+0.06, 3e-4, "$\\eta = %5.4f$"%(eta), va="top", backgroundcolor='w') +text(h/5., 2e-3, "$\\langle x \\rangle = %3.1f$"%(dx), va="top", backgroundcolor='w', fontsize=9) +text(h/5.+0.06, 4e-4, "$\\eta = %5.4f$"%(eta), va="top", backgroundcolor='w', fontsize=9) +text(h/5.+0.06, 8e-5, "$h = \\eta\\langle x \\rangle = %5.4f$"%(h), va="top", backgroundcolor='w', fontsize=9) # Show number of neighbours -text(1.9*h, 2e-1/2.9**0, "$N_{\\rm ngb}=\\infty$", fontsize=10) -text(1.9*h, 2e-1/2.9**1, "$N_{\\rm ngb}=%3.1f$"%(N_H_cubic), color='b', fontsize=9) -text(1.9*h, 2e-1/2.9**2, "$N_{\\rm ngb}=%3.1f$"%(N_H_quartic), color='c', fontsize=9) -text(1.9*h, 2e-1/2.9**3, "$N_{\\rm ngb}=%3.1f$"%(N_H_quintic), color='g', fontsize=9) -text(1.9*h, 2e-1/2.9**4, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC2), color='r', fontsize=9) -text(1.9*h, 2e-1/2.9**5, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC4), color='m', fontsize=9) -text(1.9*h, 2e-1/2.9**6, "$N_{\\rm ngb}=%3.0f$"%(N_H_WendlandC6), color='y', fontsize=9) +text(1.75*h, 2e-1/2.9**0, "$N_{\\rm ngb}=\\infty$", fontsize=10) +text(1.75*h, 2e-1/2.9**1, "$N_{\\rm ngb}=%3.1f$"%(N_H_cubic), color='b', fontsize=9) +text(1.75*h, 2e-1/2.9**2, "$N_{\\rm ngb}=%3.1f$"%(N_H_quartic), color='c', fontsize=9) +text(1.75*h, 2e-1/2.9**3, "$N_{\\rm ngb}=%3.1f$"%(N_H_quintic), color='g', fontsize=9) +text(1.75*h, 2e-1/2.9**4, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC2), color='r', fontsize=9) +text(1.75*h, 2e-1/2.9**5, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC4), color='m', fontsize=9) +text(1.75*h, 2e-1/2.9**6, "$N_{\\rm ngb}=%3.0f$"%(N_H_WendlandC6), color='y', fontsize=9) xlim(0., 2.5*h) ylim(1e-5, 0.7) -xlabel("$r$", labelpad=0) +xlabel("$r$", labelpad=-1) ylabel("$W(r,h)$", labelpad=0.5) savefig("kernels.pdf") @@ -238,57 +261,67 @@ def d_wendlandC2(r): return where(r > 1., 0., 20.*r**4 - 60.*r**3 + 60.*r**2 def d_wendlandC4(r): return where(r > 1., 0., 93.3333*r**7 - 448.*r**6 + 840.*r**5 - 746.667*r**4 + 280.*r**3 - 18.6667*r) def d_wendlandC6(r): return where(r > 1., 0., 352.*r**10 - 2310.*r**9 + 6336.*r**8 - 9240.*r**7 + 7392.*r**6 - 2772.*r**5 + 264.*r**3 - 22.*r) def d_Gaussian(r,h): return (-8.*sqrt(2.)/(PI**(3./2.) * h**5)) * r * exp(- 2.*r**2 / (h**2)) +def dh_Gaussian(r,h): return -(3*Gaussian(r,h) + (r/h) * d_Gaussian(r,h)) # Get the second derivative of the reduced kernel definitions for 3D kernels -def d2_cubic_spline(r): return where(r > 1., 0., where(r < 0.5, - 18.*r - 6., - -6.*r + 6.) ) - -def d2_quartic_spline(r): return where(r > 1., 0., where(r < 0.2, - 72.*r**2 - 4.8, - where(r < 0.6, - -48.*r**2 + 48.*r - (48./5.), - 12.*r**2 - 24.*r + 12.))) - -def d2_quintic_spline(r): return where(r > 1., 0., where(r < 1./3., - -200.*r**3 + 120.*r**2 - (40./9.), - where(r < 2./3., - 100.*r**3 - 180.*r**2 + 100.*r - (140./9.), - -20.*r**3 + 60.*r**2 - 60.*r + 20))) -def d2_wendlandC2(r): return where(r > 1., 0., 80.*r**3 - 180.*r**2 + 120.*r - 20.) -def d2_wendlandC4(r): return where(r > 1., 0., 653.3333*r**6 - 2688.*r**5 + 4200.*r**4 - 2986.667*r**3 + 840.*r**2 - 18.6667) -def d2_wendlandC6(r): return where(r > 1., 0., 3520.*r**9 - 20790.*r**8 + 50688.*r**7 - 64680.*r**6 + 44352.*r**5 - 13860.*r**4 + 792.*r**2 - 22) -def d2_Gaussian(r,h): return (32*sqrt(2)/(PI**(3./2.)*h**7)) * r**2 * exp(-2.*r**2 / (h**2)) - 8.*sqrt(2.)/(PI**(3./2.) * h**5) * exp(- 2.*r**2 / (h**2)) +# def d2_cubic_spline(r): return where(r > 1., 0., where(r < 0.5, +# 18.*r - 6., +# -6.*r + 6.) ) + +# def d2_quartic_spline(r): return where(r > 1., 0., where(r < 0.2, +# 72.*r**2 - 4.8, +# where(r < 0.6, +# -48.*r**2 + 48.*r - (48./5.), +# 12.*r**2 - 24.*r + 12.))) + +# def d2_quintic_spline(r): return where(r > 1., 0., where(r < 1./3., +# -200.*r**3 + 120.*r**2 - (40./9.), +# where(r < 2./3., +# 100.*r**3 - 180.*r**2 + 100.*r - (140./9.), +# -20.*r**3 + 60.*r**2 - 60.*r + 20))) +# def d2_wendlandC2(r): return where(r > 1., 0., 80.*r**3 - 180.*r**2 + 120.*r - 20.) +# def d2_wendlandC4(r): return where(r > 1., 0., 653.3333*r**6 - 2688.*r**5 + 4200.*r**4 - 2986.667*r**3 + 840.*r**2 - 18.6667) +# def d2_wendlandC6(r): return where(r > 1., 0., 3520.*r**9 - 20790.*r**8 + 50688.*r**7 - 64680.*r**6 + 44352.*r**5 - 13860.*r**4 + 792.*r**2 - 22) +# def d2_Gaussian(r,h): return (32*sqrt(2)/(PI**(3./2.)*h**7)) * r**2 * exp(-2.*r**2 / (h**2)) - 8.*sqrt(2.)/(PI**(3./2.) * h**5) * exp(- 2.*r**2 / (h**2)) + +# Derivative of kernel definitions (3D) +def dWdx_cubic_spline(r): return C_cubic * d_cubic_spline(r / H_cubic) / H_cubic**4 +def dWdx_quartic_spline(r): return C_quartic * d_quartic_spline(r / H_quartic) / H_quartic**4 +def dWdx_quintic_spline(r): return C_quintic * d_quintic_spline(r / H_quintic) / H_quintic**4 +def dWdx_WendlandC2(r): return C_WendlandC2 * d_wendlandC2(r / H_WendlandC2) / H_WendlandC2**4 +def dWdx_WendlandC4(r): return C_WendlandC4 * d_wendlandC4(r / H_WendlandC4) / H_WendlandC4**4 +def dWdx_WendlandC6(r): return C_WendlandC6 * d_wendlandC6(r / H_WendlandC6) / H_WendlandC6**4 # Derivative of kernel definitions (3D) -def dW_cubic_spline(r): return C_cubic * d_cubic_spline(r / H_cubic) / H_cubic**4 -def dW_quartic_spline(r): return C_quartic * d_quartic_spline(r / H_quartic) / H_quartic**4 -def dW_quintic_spline(r): return C_quintic * d_quintic_spline(r / H_quintic) / H_quintic**4 -def dW_WendlandC2(r): return C_WendlandC2 * d_wendlandC2(r / H_WendlandC2) / H_WendlandC2**4 -def dW_WendlandC4(r): return C_WendlandC4 * d_wendlandC4(r / H_WendlandC4) / H_WendlandC4**4 -def dW_WendlandC6(r): return C_WendlandC6 * d_wendlandC6(r / H_WendlandC6) / H_WendlandC6**4 +def dWdh_cubic_spline(r): return (3. * cubic_spline(r / H_cubic) + (r / H_cubic) * d_cubic_spline(r / H_cubic)) * (-C_cubic / H_cubic**4) +def dWdh_quartic_spline(r): return (3. * quartic_spline(r / H_quartic) + (r / H_quartic) * d_quartic_spline(r / H_quartic)) * (-C_quartic / H_quartic**4) +def dWdh_quintic_spline(r): return (3. * quintic_spline(r / H_quintic) + (r / H_quintic) * d_quintic_spline(r / H_quintic)) * (-C_quintic / H_quintic**4) +def dWdh_WendlandC2(r): return (3. * wendlandC2(r / H_WendlandC2) + (r / H_WendlandC2) * d_wendlandC2(r / H_WendlandC2)) * (-C_WendlandC2 / H_WendlandC2**4) +def dWdh_WendlandC4(r): return (3. * wendlandC4(r / H_WendlandC4) + (r / H_WendlandC4) * d_wendlandC4(r / H_WendlandC4)) * (-C_WendlandC4 / H_WendlandC4**4) +def dWdh_WendlandC6(r): return (3. * wendlandC6(r / H_WendlandC6) + (r / H_WendlandC6) * d_wendlandC6(r / H_WendlandC6)) * (-C_WendlandC6 / H_WendlandC6**4) + # Second derivative of kernel definitions (3D) -def d2W_cubic_spline(r): return C_cubic * d2_cubic_spline(r / H_cubic) / H_cubic**5 -def d2W_quartic_spline(r): return C_quartic * d2_quartic_spline(r / H_quartic) / H_quartic**5 -def d2W_quintic_spline(r): return C_quintic * d2_quintic_spline(r / H_quintic) / H_quintic**5 -def d2W_WendlandC2(r): return C_WendlandC2 * d2_wendlandC2(r / H_WendlandC2) / H_WendlandC2**5 -def d2W_WendlandC4(r): return C_WendlandC4 * d2_wendlandC4(r / H_WendlandC4) / H_WendlandC4**5 -def d2W_WendlandC6(r): return C_WendlandC6 * d2_wendlandC6(r / H_WendlandC6) / H_WendlandC6**5 +#def d2W_cubic_spline(r): return C_cubic * d2_cubic_spline(r / H_cubic) / H_cubic**5 +#def d2W_quartic_spline(r): return C_quartic * d2_quartic_spline(r / H_quartic) / H_quartic**5 +#def d2W_quintic_spline(r): return C_quintic * d2_quintic_spline(r / H_quintic) / H_quintic**5 +#def d2W_WendlandC2(r): return C_WendlandC2 * d2_wendlandC2(r / H_WendlandC2) / H_WendlandC2**5 +#def d2W_WendlandC4(r): return C_WendlandC4 * d2_wendlandC4(r / H_WendlandC4) / H_WendlandC4**5 +#def d2W_WendlandC6(r): return C_WendlandC6 * d2_wendlandC6(r / H_WendlandC6) / H_WendlandC6**5 figure() subplot(211) plot([0, 2.5*h], [0., 0.], 'k--', linewidth=0.7) -plot(xx, d_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$") -plot(xx, dW_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$") -plot(xx, dW_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$") -plot(xx, dW_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$") -plot(xx, dW_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$") -plot(xx, dW_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$") -plot(xx, dW_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$") +plot(xx, d_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$", lw=1.5) +plot(xx, dWdx_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$", lw=1.5) +plot(xx, dWdx_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$", lw=1.5) +plot(xx, dWdx_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$", lw=1.5) +plot(xx, dWdx_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$", lw=1.5) +plot(xx, dWdx_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$", lw=1.5) +plot(xx, dWdx_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$", lw=1.5) maxY = d_Gaussian(h/2, h) @@ -304,37 +337,35 @@ plot([dx, dx], [2*maxY, 0.1], 'k:', linewidth=0.5) xlim(0., 2.5*h) gca().xaxis.set_ticklabels([]) -ylim(1.1*maxY, -0.1*maxY) +ylim(1.3*maxY, -0.1*maxY) xlabel("$r$", labelpad=0) ylabel("$\\partial W(r,h)/\\partial r$", labelpad=0.5) -legend(loc="lower right") +legend(loc="lower right", frameon=False, handletextpad=0.1, handlelength=1.2, fontsize=8) subplot(212) - -maxY = d2_Gaussian(h,h) -plot([h/2, h/2], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5) -plot([h, h], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5) -plot([dx, dx], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5) -text(h/2, -3.*maxY, "$\\sigma\\equiv h/2$", rotation=90, backgroundcolor='w', ha='center', va='bottom') -text(h, -3.*maxY, "$h\\equiv\\eta\\langle x\\rangle = %.4f$"%h, rotation=90, backgroundcolor='w', ha='center', va='bottom') -text(dx, -3.*maxY, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, backgroundcolor='w', ha='center', va='bottom') +plot([h/2, h/2], [0.77*maxY, -0.15*maxY], 'k:', linewidth=0.5) +plot([h, h], [0.77*maxY, -0.15*maxY], 'k:', linewidth=0.5) +plot([dx, dx], [0.77*maxY, -0.15*maxY], 'k:', linewidth=0.5) +text(h/2, 1.25*maxY, "$\\sigma\\equiv h/2$", rotation=90, ha='center', va='bottom', fontsize=9) +text(h, 1.25*maxY, "$h\\equiv\\eta\\langle x\\rangle$", rotation=90, ha='center', va='bottom', fontsize=9) +text(dx, 1.25*maxY, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, ha='center', va='bottom', fontsize=9) plot([0, 2.5*h], [0., 0.], 'k--', linewidth=0.7) -plot(xx, d2_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$") -plot(xx, d2W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$") -plot(xx, d2W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$") -plot(xx, d2W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$") -plot(xx, d2W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$") -plot(xx, d2W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$") -plot(xx, d2W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$") +#plot(xx, dh_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$") +plot(xx, dWdh_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$", lw=1.5) +plot(xx, dWdh_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$", lw=1.5) +plot(xx, dWdh_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$", lw=1.5) +plot(xx, dWdh_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$", lw=1.5) +plot(xx, dWdh_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$", lw=1.5) +plot(xx, dWdh_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$", lw=1.5) xlim(0., 2.5*h) -ylim(-3.2*maxY, 1.3*maxY) -xlabel("$r$", labelpad=0) -ylabel("$\\partial^2 W(r,h)/\\partial r^2$", labelpad=0.5) +ylim(1.3*maxY, -0.15*maxY) +xlabel("$r$", labelpad=-1) +ylabel("$\\partial W(r,h)/\\partial h$", labelpad=0.5) savefig("kernel_derivatives.pdf") diff --git a/theory/SPH/Kernels/run.sh b/theory/SPH/Kernels/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..5f2c8569deebad81203ea8cc27c348a0d1607c0a --- /dev/null +++ b/theory/SPH/Kernels/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +python kernels.py +pdflatex -jobname=kernel_definitions kernel_definitions_standalone.tex +bibtex kernel_definitions.aux +pdflatex -jobname=kernel_definitions kernel_definitions_standalone.tex +pdflatex -jobname=kernel_definitions kernel_definitions_standalone.tex diff --git a/theory/SPH/bibliography.bib b/theory/SPH/bibliography.bib new file mode 100644 index 0000000000000000000000000000000000000000..4bcb0e1939a257d54c4c0aa99495d7568838b4f8 --- /dev/null +++ b/theory/SPH/bibliography.bib @@ -0,0 +1,116 @@ +@ARTICLE{Price2012, + author = {{Price}, D.~J.}, + title = "{Smoothed particle hydrodynamics and magnetohydrodynamics}", + journal = {Journal of Computational Physics}, +archivePrefix = "arXiv", + eprint = {1012.1885}, + primaryClass = "astro-ph.IM", + year = 2012, + month = feb, + volume = 231, + pages = {759-794}, + doi = {10.1016/j.jcp.2010.12.011}, + adsurl = {http://adsabs.harvard.edu/abs/2012JCoPh.231..759P}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + +@ARTICLE{Springel2005, + author = {{Springel}, V.}, + title = "{The cosmological simulation code GADGET-2}", + journal = {\mnras}, + eprint = {astro-ph/0505010}, + keywords = {methods: numerical, galaxies: interactions, dark matter}, + year = 2005, + month = dec, + volume = 364, + pages = {1105-1134}, + doi = {10.1111/j.1365-2966.2005.09655.x}, + adsurl = {http://adsabs.harvard.edu/abs/2005MNRAS.364.1105S}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + +@ARTICLE{Hopkins2013, + author = {{Hopkins}, P.~F.}, + title = "{A general class of Lagrangian smoothed particle hydrodynamics methods and implications for fluid mixing problems}", + journal = {\mnras}, +archivePrefix = "arXiv", + eprint = {1206.5006}, + primaryClass = "astro-ph.IM", + keywords = {hydrodynamics, instabilities, turbulence, methods: numerical, cosmology: theory}, + year = 2013, + month = feb, + volume = 428, + pages = {2840-2856}, + doi = {10.1093/mnras/sts210}, + adsurl = {http://adsabs.harvard.edu/abs/2013MNRAS.428.2840H}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{Springel2002, + author = {{Springel}, V. and {Hernquist}, L.}, + title = "{Cosmological smoothed particle hydrodynamics simulations: the entropy equation}", + journal = {\mnras}, + eprint = {astro-ph/0111016}, + keywords = {methods: numerical, galaxies: evolution, galaxies: starburst, methods: numerical, galaxies: evolution, galaxies: starburst}, + year = 2002, + month = jul, + volume = 333, + pages = {649-664}, + doi = {10.1046/j.1365-8711.2002.05445.x}, + adsurl = {http://adsabs.harvard.edu/abs/2002MNRAS.333..649S}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{Balsara1995, + author = {{Balsara}, D.~S.}, + title = "{von Neumann stability analysis of smooth particle hydrodynamics--suggestions for optimal algorithms}", + journal = {Journal of Computational Physics}, + year = 1995, + volume = 121, + pages = {357-372}, + doi = {10.1016/S0021-9991(95)90221-X}, + adsurl = {http://adsabs.harvard.edu/abs/1995JCoPh.121..357B}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + +@ARTICLE{Schaller2015, + author = {{Schaller}, M. and {Dalla Vecchia}, C. and {Schaye}, J. and + {Bower}, R.~G. and {Theuns}, T. and {Crain}, R.~A. and {Furlong}, M. and + {McCarthy}, I.~G.}, + title = "{The EAGLE simulations of galaxy formation: the importance of the hydrodynamics scheme}", + journal = {\mnras}, +archivePrefix = "arXiv", + eprint = {1509.05056}, + keywords = {methods: numerical, galaxies: clusters: intracluster medium, galaxies: formation, cosmology: theory}, + year = 2015, + month = dec, + volume = 454, + pages = {2277-2291}, + doi = {10.1093/mnras/stv2169}, + adsurl = {http://adsabs.harvard.edu/abs/2015MNRAS.454.2277S}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + + +@ARTICLE{Dehnen2012, + author = {{Dehnen}, W. and {Aly}, H.}, + title = "{Improving convergence in smoothed particle hydrodynamics simulations without pairing instability}", + journal = {\mnras}, +archivePrefix = "arXiv", + eprint = {1204.2471}, + primaryClass = "astro-ph.IM", + keywords = {hydrodynamics, methods: numerical }, + year = 2012, + month = sep, + volume = 425, + pages = {1068-1082}, + doi = {10.1111/j.1365-2966.2012.21439.x}, + adsurl = {http://adsabs.harvard.edu/abs/2012MNRAS.425.1068D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} diff --git a/theory/SPH/kernel/run.sh b/theory/SPH/kernel/run.sh deleted file mode 100755 index 61b469025b705f298b367963700e99fb13f13bd4..0000000000000000000000000000000000000000 --- a/theory/SPH/kernel/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -python kernels.py -pdflatex kernel_definitions.tex -pdflatex kernel_definitions.tex diff --git a/theory/SPH/run.sh b/theory/SPH/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..651aadad79c2471f58221e2b4fcad7a09ab12256 --- /dev/null +++ b/theory/SPH/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash +cd Kernels +python kernels.py +cp kernels.pdf .. +cp kernel_derivatives.pdf .. +cd .. +pdflatex swift_sph.tex +bibtex swift_sph.aux +pdflatex swift_sph.tex +pdflatex swift_sph.tex diff --git a/theory/SPH/swift_sph.tex b/theory/SPH/swift_sph.tex new file mode 100644 index 0000000000000000000000000000000000000000..693df8cbc7cc9d03e98bdc80725572016ccf7bd0 --- /dev/null +++ b/theory/SPH/swift_sph.tex @@ -0,0 +1,38 @@ +\documentclass[fleqn, usenatbib, useAMS]{mnras} +\usepackage{graphicx} +\usepackage{amsmath,paralist,xcolor,xspace,amssymb} +\usepackage{times} +\usepackage[super]{nth} + +\newcommand{\swift}{{\sc swift}\xspace} +\newcommand{\gadget}{{\sc gadget}\xspace} +\newcommand{\dd}[2]{\frac{\partial #1}{\partial #2}} +\renewcommand{\vec}[1]{{\mathbf{#1}}} +\newcommand{\Wij}{\overline{\nabla_xW_{ij}}} +\newcommand{\tbd}{\textcolor{red}{TO BE DONE}} + +\newcommand{\MinimalSPH}{\textsc{minimal-sph}\xspace} +\newcommand{\GadgetSPH}{\textsc{gadget-sph}\xspace} +\newcommand{\PESPH}{\textsc{pe-sph}\xspace} +%\setlength{\mathindent}{0pt} + +%opening +\title{SPH implementation in \swift} +\author{Matthieu Schaller} + +\begin{document} + +\maketitle +\section{Kernel functions} +\input{Kernels/kernel_definitions} + +\section{Equation of state} +\input{EoS/eos} + +\section{SPH flavours} +\input{Flavours/sph_flavours} + +\bibliographystyle{mnras} +\bibliography{./bibliography} + +\end{document}