diff --git a/README b/README index b0a21c610f7ab4c380e523b51c6f74b81f98a2f2..cd2a397a18e872e7914b24fd58cc588ec1d6c8c0 100644 --- a/README +++ b/README @@ -26,6 +26,8 @@ Valid options are: -f {int} Overwrite the CPU frequency (Hz) to be used for time measurements -g Run with an external gravitational potential -G Run with self-gravity + -n {int} Execute a fixed number of time steps. When unset use the time_end + parameter to stop. -s Run with SPH -t {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified. -v [12] Increase the level of verbosity 1: MPI-rank 0 writes diff --git a/configure.ac b/configure.ac index 497107121753f212dd8b07f5a8e8eed7acdf82b5..23fbf5c6e4c4211379986a4a9c951892d044713c 100644 --- a/configure.ac +++ b/configure.ac @@ -72,9 +72,6 @@ if test "$enable_ipo" = "yes"; then fi fi -# Add libtool support. -LT_INIT - # Check for MPI. Need to do this before characterising the compiler (C99 mode), # as this changes the compiler. # We should consider using AX_PROG_CC_MPI to replace AC_PROG_CC when compiling @@ -151,6 +148,9 @@ AM_CONDITIONAL([HAVEMPI],[test $enable_mpi = "yes"]) # Indicate that MPIRUN can be modified by an environement variable AC_ARG_VAR(MPIRUN, Path to the mpirun command if non-standard) +# Add libtool support (now that CC is defined). +LT_INIT + # Need C99 and inline support. AC_PROG_CC_C99 AC_C_INLINE @@ -179,7 +179,7 @@ if test "$enable_opt" = "yes" ; then ac_test_CFLAGS="yes" CFLAGS="$old_CFLAGS $CFLAGS" - # Check SSE & AVX support (some overlap with AX_CC_MAXOPT). + # Check SSE & AVX support (some overlap with AX_CC_MAXOPT). # Don't use the SIMD_FLAGS result with Intel compilers. The -x<code> # value from AX_CC_MAXOPT should be sufficient. AX_EXT @@ -287,12 +287,75 @@ AC_SUBST([METIS_LIBS]) AC_SUBST([METIS_INCS]) AM_CONDITIONAL([HAVEMETIS],[test -n "$METIS_LIBS"]) -# # Check for zlib. -# AC_CHECK_LIB([z],[gzopen],[ -# AC_DEFINE([HAVE_LIBZ],[1],[Set to 1 if zlib is installed.]) -# LDFLAGS="$LDFLAGS -lz" -# ],[]) +# Check for tcmalloc a fast malloc that is part of the gperftools. +have_tcmalloc="no" +AC_ARG_WITH([tcmalloc], + [AS_HELP_STRING([--with-tcmalloc], + [use tcmalloc library or specify the directory with lib @<:@yes/no@:>@] + )], + [with_tcmalloc="$withval"], + [with_tcmalloc="no"] +) +if test "x$with_tcmalloc" != "xno"; then + if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then + tclibs="-L$with_tcmalloc -ltcmalloc" + else + tclibs="-ltcmalloc" + fi + AC_CHECK_LIB([tcmalloc],[tc_cfree],[have_tcmalloc="yes"],[have_tcmalloc="no"], + $tclibs) + + # Could just have the minimal version. + if test "$have_tcmalloc" = "no"; then + if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then + tclibs="-L$with_tcmalloc -ltcmalloc_minimal" + else + tclibs="-ltcmalloc_minimal" + fi + AC_CHECK_LIB([tcmalloc],[tc_cfree],[have_tcmalloc="yes"],[have_tcmalloc="no"], + $tclibs) + fi + if test "$have_tcmalloc" = "yes"; then + TCMALLOC_LIBS="$tclibs" + + # These are recommended for GCC. + if test "$ax_cv_c_compiler_vendor" = "gnu"; then + CFLAGS="$CFLAGS -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free" + fi + else + TCMALLOC_LIBS="" + fi +fi +AC_SUBST([TCMALLOC_LIBS]) +AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"]) + +# Check for -lprofiler usually part of the gpreftools along with tcmalloc. +have_profiler="no" +AC_ARG_WITH([profiler], + [AS_HELP_STRING([--with-profiler], + [use cpu profiler library or specify the directory with lib @<:@yes/no@:>@] + )], + [with_profiler="$withval"], + [with_profiler="yes"] +) +if test "x$with_profiler" != "xno"; then + if test "x$with_profiler" != "xyes" && test "x$with_profiler" != "x"; then + proflibs="-L$with_profiler -lprofiler" + else + proflibs="-lprofiler" + fi + AC_CHECK_LIB([profiler],[ProfilerFlush],[have_profiler="yes"],[have_profiler="no"], + $proflibs) + + if test "$have_profiler" = "yes"; then + PROFILER_LIBS="$proflibs" + else + PROFILER_LIBS="" + fi +fi +AC_SUBST([PROFILER_LIBS]) +AM_CONDITIONAL([HAVEPROFILER],[test -n "$PROFILER_LIBS"]) # Check for HDF5. This is required. AX_LIB_HDF5 @@ -410,6 +473,8 @@ AC_MSG_RESULT([ - parallel : $have_parallel_hdf5 Metis enabled : $have_metis libNUMA enabled : $have_numa + Using tcmalloc : $have_tcmalloc + CPU profiler : $have_profiler ]) # Generate output. diff --git a/examples/Makefile.am b/examples/Makefile.am index 735817c24cc52786e4c562e46e3619fb4a9a2e34..22911e903e8bbf8265d712185f41de2bf4f53553 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -21,13 +21,17 @@ MYFLAGS = -DTIMER # Add the source directory and debug to CFLAGS AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) -AM_LDFLAGS = +AM_LDFLAGS = $(HDF5_LDFLAGS) -MPI_THREAD_LIBS = @MPI_THREAD_LIBS@ +# Extra libraries. +EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) + +# MPI libraries. MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) MPI_FLAGS = -DWITH_MPI $(METIS_INCS) -# Set-up the library + +# Programs. bin_PROGRAMS = swift swift_fixdt # Build MPI versions as well? @@ -45,20 +49,20 @@ endif # Sources for swift swift_SOURCES = main.c swift_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_LDADD = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) +swift_LDADD = ../src/.libs/libswiftsim.a $(EXTRA_LIBS) swift_fixdt_SOURCES = main.c swift_fixdt_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_fixdt_LDADD = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) +swift_fixdt_LDADD = ../src/.libs/libswiftsim.a $(EXTRA_LIBS) # Sources for swift_mpi, do we need an affinity policy for MPI? swift_mpi_SOURCES = main.c swift_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS) +swift_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(MPI_LIBS) $(EXTRA_LIBS) swift_fixdt_mpi_SOURCES = main.c swift_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_fixdt_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS) +swift_fixdt_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(MPI_LIBS) $(EXTRA_LIBS) # Scripts to generate ICs EXTRA_DIST = UniformBox/makeIC.py UniformBox/run.sh UniformBox/uniformBox.yml \ diff --git a/examples/main.c b/examples/main.c index fa0ac2567ac253568903ce8c47b2c9c38fee3704..016f948bb1b031ba756a344d020d4b26c8a42bed 100644 --- a/examples/main.c +++ b/examples/main.c @@ -74,6 +74,8 @@ void print_help_message() { printf(" %2s %8s %s\n", "-g", "", "Run with an external gravitational potential"); printf(" %2s %8s %s\n", "-G", "", "Run with self-gravity"); + printf(" %2s %8s %s\n", "-n", "{int}", + "Execute a fixed number of time steps"); printf(" %2s %8s %s\n", "-s", "", "Run with SPH"); printf(" %2s %8s %s\n", "-t", "{int}", "The number of threads to use on each MPI rank. Defaults to 1 if not " @@ -138,6 +140,7 @@ int main(int argc, char *argv[]) { int with_aff = 0; int dry_run = 0; int dump_tasks = 0; + int nsteps = -2; int with_cosmology = 0; int with_external_gravity = 0; int with_self_gravity = 0; @@ -150,7 +153,7 @@ int main(int argc, char *argv[]) { /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "acdef:gGhst:v:y:")) != -1) switch (c) { + while ((c = getopt(argc, argv, "acdef:gGhn:st:v:y:")) != -1) switch (c) { case 'a': with_aff = 1; break; @@ -179,6 +182,13 @@ int main(int argc, char *argv[]) { case 'h': if (myrank == 0) print_help_message(); return 0; + case 'n': + if (sscanf(optarg, "%d", &nsteps) != 1) { + if (myrank == 0) printf("Error parsing fixed number of steps.\n"); + if (myrank == 0) print_help_message(); + return 1; + } + break; case 's': with_hydro = 1; break; @@ -322,18 +332,21 @@ int main(int argc, char *argv[]) { size_t Ngas = 0, Ngpart = 0; double dim[3] = {0., 0., 0.}; int periodic = 0; + int flag_entropy_ICs = 0; if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) read_ic_parallel(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, - myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); + &flag_entropy_ICs, myrank, nr_nodes, MPI_COMM_WORLD, + MPI_INFO_NULL, dry_run); #else read_ic_serial(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, - myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); + &flag_entropy_ICs, myrank, nr_nodes, MPI_COMM_WORLD, + MPI_INFO_NULL, dry_run); #endif #else read_ic_single(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, - dry_run); + &flag_entropy_ICs, dry_run); #endif if (myrank == 0) { clocks_gettime(&toc); @@ -354,7 +367,7 @@ int main(int argc, char *argv[]) { free(parts); parts = NULL; for (size_t k = 0; k < Ngpart; ++k) - if (gparts[k].id > 0) error("Linking problem"); + if (gparts[k].id_or_neg_offset < 0) error("Linking problem"); Ngas = 0; } @@ -466,7 +479,7 @@ int main(int argc, char *argv[]) { #endif /* Initialise the particles */ - engine_init_particles(&e); + engine_init_particles(&e, flag_entropy_ICs); /* Legend */ if (myrank == 0) @@ -474,7 +487,7 @@ int main(int argc, char *argv[]) { "Updates", "g-Updates", "Wall-clock time", clocks_getunit()); /* Main simulation loop */ - for (int j = 0; !engine_is_done(&e); j++) { + for (int j = 0; !engine_is_done(&e) && e.step != nsteps; j++) { /* Repartition the space amongst the nodes? */ #ifdef WITH_MPI diff --git a/src/Makefile.am b/src/Makefile.am index 21b5ef25ca21202caaa9107bfbf09e62aa66011d..bc263d6fb0b2a8378e26a3627cf26204c7b09bb4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -24,6 +24,13 @@ AM_LDFLAGS = $(HDF5_LDFLAGS) -version-info 0:0:0 # The git command, if available. GIT_CMD = @GIT_CMD@ +# Additional dependencies for shared libraries. +EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) + +# MPI libraries. +MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) +MPI_FLAGS = -DWITH_MPI $(METIS_INCS) + # Build the libswiftsim library lib_LTLIBRARIES = libswiftsim.la # Build a MPI-enabled version too? @@ -65,11 +72,13 @@ nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_h # Sources and flags for regular library libswiftsim_la_SOURCES = $(AM_SOURCES) +libswiftsim_la_CFLAGS = $(AM_CFLAGS) +libswiftsim_la_LDFLAGS = $(AM_LDFLAGS) $(EXTRA_LIBS) # Sources and flags for MPI library libswiftsim_mpi_la_SOURCES = $(AM_SOURCES) -libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI $(METIS_INCS) -libswiftsim_mpi_la_LDFLAGS = $(AM_LDFLAGS) -DWITH_MPI $(METIS_LIBS) +libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) $(MPI_FLAGS) +libswiftsim_mpi_la_LDFLAGS = $(AM_LDFLAGS) $(MPI_LIBS) $(EXTRA_LIBS) libswiftsim_mpi_la_SHORTNAME = mpi diff --git a/src/cell.c b/src/cell.c index e152cd6136a7d2a04eb6a657fb9ad85f7d1b8c1b..cd83ddab570a3c848e0502629534743e8f383a4f 100644 --- a/src/cell.c +++ b/src/cell.c @@ -128,6 +128,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { } /* Return the total number of unpacked cells. */ + c->pcell_size = count; return count; } @@ -213,6 +214,39 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->progeny[k] = -1; /* Return the number of packed cells used. */ + c->pcell_size = count; + return count; +} + +int cell_pack_ti_ends(struct cell *c, int *ti_ends) { + + /* Pack this cell's data. */ + ti_ends[0] = c->ti_end_min; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_pack_ti_ends(c->progeny[k], &ti_ends[count]); + } + + /* Return the number of packed values. */ + return count; +} + +int cell_unpack_ti_ends(struct cell *c, int *ti_ends) { + + /* Unpack this cell's data. */ + c->ti_end_min = ti_ends[0]; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_unpack_ti_ends(c->progeny[k], &ti_ends[count]); + } + + /* Return the number of packed values. */ return count; } @@ -376,9 +410,11 @@ void cell_gunlocktree(struct cell *c) { * @brief Sort the parts into eight bins along the given pivots. * * @param c The #cell array to be sorted. + * @param parts_offset Offset of the cell parts array relative to the + * space's parts array, i.e. c->parts - s->parts. */ -void cell_split(struct cell *c) { +void cell_split(struct cell *c, ptrdiff_t parts_offset) { int i, j; const int count = c->count, gcount = c->gcount; @@ -496,8 +532,7 @@ void cell_split(struct cell *c) { } /* Re-link the gparts. */ - for (int k = 0; k < count; k++) - if (parts[k].gpart != NULL) parts[k].gpart->part = &parts[k]; + part_relink_gparts(parts, count, parts_offset); #ifdef SWIFT_DEBUG_CHECKS /* Verify that _all_ the parts have been assigned to a cell. */ @@ -592,8 +627,7 @@ void cell_split(struct cell *c) { } /* Re-link the parts. */ - for (int k = 0; k < gcount; k++) - if (gparts[k].id > 0) gparts[k].part->gpart = &gparts[k]; + part_relink_parts(gparts, gcount, parts - parts_offset); } /** diff --git a/src/cell.h b/src/cell.h index ad39354c6ced334c8c6dfa8420dc6c7f649009a3..b5cc49d05c1cee6e0a884cdb708abbaff4281829 100644 --- a/src/cell.h +++ b/src/cell.h @@ -24,6 +24,9 @@ #define SWIFT_CELL_H /* Includes. */ +#include <stddef.h> + +/* Local includes. */ #include "lock.h" #include "multipole.h" #include "part.h" @@ -122,7 +125,10 @@ struct cell { struct task *ghost, *init, *drift, *kick; /* Task receiving data. */ - struct task *recv_xv, *recv_rho; + struct task *recv_xv, *recv_rho, *recv_ti; + + /* Task send data. */ + struct link *send_xv, *send_rho, *send_ti; /* Tasks for gravity tree. */ struct task *grav_up, *grav_down; @@ -175,13 +181,15 @@ struct cell { ((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i))) /* Function prototypes. */ -void cell_split(struct cell *c); +void cell_split(struct cell *c, ptrdiff_t parts_offset); int cell_locktree(struct cell *c); void cell_unlocktree(struct cell *c); int cell_glocktree(struct cell *c); void cell_gunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); +int cell_pack_ti_ends(struct cell *c, int *ti_ends); +int cell_unpack_ti_ends(struct cell *c, int *ti_ends); int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); diff --git a/src/common_io.c b/src/common_io.c index 03d5618cd0cafe5ff6067cf72107baacffb5d85a..971fe6b01c682c489d3444ec1d47d6a902250bb8 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -516,12 +516,10 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) { /* Let's give all these gparts a negative id */ for (size_t i = 0; i < Ndm; ++i) { - /* 0 or negative ids are not allowed */ - if (gparts[i].id <= 0) - error("0 or negative ID for DM particle %zd: ID=%lld", i, gparts[i].id); - - gparts[i].id = -gparts[i].id; + if (gparts[i].id_or_neg_offset <= 0) + error("0 or negative ID for DM particle %zd: ID=%lld", i, + gparts[i].id_or_neg_offset); } } @@ -555,7 +553,7 @@ void duplicate_hydro_gparts(struct part* const parts, gparts[i + Ndm].mass = parts[i].mass; /* Link the particles */ - gparts[i + Ndm].part = &parts[i]; + gparts[i + Ndm].id_or_neg_offset = -i; parts[i].gpart = &gparts[i + Ndm]; } } @@ -580,9 +578,8 @@ void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, * gparts[i].part); */ /* And collect the DM ones */ - if (gparts[i].id < 0LL) { - memcpy(&dmparts[count], &gparts[i], sizeof(struct gpart)); - dmparts[count].id = -dmparts[count].id; + if (gparts[i].id_or_neg_offset > 0) { + dmparts[count] = gparts[i]; count++; } } diff --git a/src/debug.c b/src/debug.c index 77238ab135e3f27b8c2c43b0ec18e7745493b138..e7b60c52f92fdb64fdefc8ac5c5e353f1ab57e6e 100644 --- a/src/debug.c +++ b/src/debug.c @@ -59,8 +59,8 @@ * * (Should be used for debugging only as it runs in O(N).) */ -void printParticle(struct part *parts, struct xpart *xparts, long long int id, - size_t N) { +void printParticle(const struct part *parts, struct xpart *xparts, + long long int id, size_t N) { int found = 0; @@ -82,24 +82,27 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id, * the standard output. * * @param gparts The array of g-particles. + * @param parts The array of particles. * @param id The id too look for. * @param N The size of the array of g-particles. * * (Should be used for debugging only as it runs in O(N).) */ -void printgParticle(struct gpart *gparts, long long int id, size_t N) { +void printgParticle(const struct gpart *gparts, const struct part *parts, + long long int id, size_t N) { int found = 0; /* Look for the particle. */ for (size_t i = 0; i < N; i++) - if (gparts[i].id == -id) { - printf("## gParticle[%zd] (DM) :\n id=%lld ", i, -gparts[i].id); + if (gparts[i].id_or_neg_offset == id) { + printf("## gParticle[%zd] (DM) :\n id=%lld", i, id); gravity_debug_particle(&gparts[i]); found = 1; break; - } else if (gparts[i].id > 0 && gparts[i].part->id == id) { - printf("## gParticle[%zd] (hydro) :\n id=%lld ", i, gparts[i].id); + } else if (gparts[i].id_or_neg_offset < 0 && + parts[-gparts[i].id_or_neg_offset].id == id) { + printf("## gParticle[%zd] (hydro) :\n id=%lld", i, id); gravity_debug_particle(&gparts[i]); found = 1; break; @@ -114,9 +117,9 @@ void printgParticle(struct gpart *gparts, long long int id, size_t N) { * @param p The particle to print * @param xp The extended data ot the particle to print */ -void printParticle_single(struct part *p, struct xpart *xp) { +void printParticle_single(const struct part *p, const struct xpart *xp) { - printf("## Particle: id=%lld ", p->id); + printf("## Particle: id=%lld", p->id); hydro_debug_particle(p, xp); printf("\n"); } @@ -128,7 +131,7 @@ void printParticle_single(struct part *p, struct xpart *xp) { */ void printgParticle_single(struct gpart *gp) { - printf("## g-Particle: id=%lld ", gp->id); + printf("## g-Particle: id=%lld ", gp->id_or_neg_offset); gravity_debug_particle(gp); printf("\n"); } diff --git a/src/debug.h b/src/debug.h index 13be15adb867e8bafe95e2900f68caaa36192510..367241201977d9b79a8c2913dbae5d08f1148529 100644 --- a/src/debug.h +++ b/src/debug.h @@ -19,14 +19,15 @@ #ifndef SWIFT_DEBUG_H #define SWIFT_DEBUG_H -struct part; -struct gpart; -struct xpart; +/* Includes. */ +#include "cell.h" +#include "part.h" -void printParticle(struct part *parts, struct xpart *xparts, long long int id, - size_t N); -void printgParticle(struct gpart *parts, long long int id, size_t N); -void printParticle_single(struct part *p, struct xpart *xp); +void printParticle(const struct part *parts, struct xpart *xparts, + long long int id, size_t N); +void printgParticle(const struct gpart *gparts, const struct part *parts, + long long int id, size_t N); +void printParticle_single(const struct part *p, const struct xpart *xp); void printgParticle_single(struct gpart *gp); #ifdef HAVE_METIS diff --git a/src/engine.c b/src/engine.c index 7524f021c89ca2d2ac91147292c13b16ed207cf2..07332635f66f4acf4f0c4f99146b82cf0d84c5b2 100644 --- a/src/engine.c +++ b/src/engine.c @@ -128,10 +128,12 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, engine_policy_external_gravity; const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt; - /* Is this the super-cell? */ - if (super == NULL && (c->grav != NULL || (c->gcount > 0 && !c->split))) { + /* Am I the super-cell? */ + /* TODO(pedro): Add a condition for gravity tasks as well. */ + if (super == NULL && + (c->density != NULL || (!c->split && (c->count > 0 || c->gcount > 0)))) { - /* This is the super cell, i.e. the first with gravity tasks attached. */ + /* This is the super cell, i.e. the first with density tasks attached. */ super = c; /* Local tasks only... */ @@ -331,11 +333,11 @@ void engine_redistribute(struct engine *e) { } #ifdef SWIFT_DEBUG_CHECKS - if (s->parts[k].gpart->id < 0) + if (s->parts[k].gpart->id_or_neg_offset >= 0) error("Trying to link a partnerless gpart !"); #endif - s->parts[k].gpart->id = count_this_dest; + s->parts[k].gpart->id_or_neg_offset = -count_this_dest; count_this_dest++; } } @@ -519,13 +521,14 @@ void engine_redistribute(struct engine *e) { for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) { /* Does this gpart have a partner ? */ - if (gparts_new[k].id >= 0) { + if (gparts_new[k].id_or_neg_offset <= 0) { - const size_t partner_index = offset_parts + gparts_new[k].id; + const ptrdiff_t partner_index = + offset_parts - gparts_new[k].id_or_neg_offset; /* Re-link */ - gparts_new[k].part = &parts_new[partner_index]; - gparts_new[k].part->gpart = &gparts_new[k]; + gparts_new[k].id_or_neg_offset = -partner_index; + parts_new[partner_index].gpart = &gparts_new[k]; } } @@ -546,22 +549,22 @@ void engine_redistribute(struct engine *e) { /* Verify that the links are correct */ for (size_t k = 0; k < nr_gparts; ++k) { - if (gparts_new[k].id > 0) { + if (gparts_new[k].id_or_neg_offset <= 0) { + + struct part *part = &parts_new[-gparts_new[k].id_or_neg_offset]; - if (gparts_new[k].part->gpart != &gparts_new[k]) - error("Linking problem !"); + if (part->gpart != &gparts_new[k]) error("Linking problem !"); - if (gparts_new[k].x[0] != gparts_new[k].part->x[0] || - gparts_new[k].x[1] != gparts_new[k].part->x[1] || - gparts_new[k].x[2] != gparts_new[k].part->x[2]) + if (gparts_new[k].x[0] != part->x[0] || + gparts_new[k].x[1] != part->x[1] || gparts_new[k].x[2] != part->x[2]) error("Linked particles are not at the same position !"); } } for (size_t k = 0; k < nr_parts; ++k) { - if (parts_new[k].gpart != NULL) { - - if (parts_new[k].gpart->part != &parts_new[k]) error("Linking problem !"); + if (parts_new[k].gpart != NULL && + parts_new[k].gpart->id_or_neg_offset != -k) { + error("Linking problem !"); } } #endif @@ -673,44 +676,63 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up, * * @param e The #engine. * @param ci The sending #cell. - * @param cj The receiving #cell + * @param cj Dummy cell containing the nodeID of the receiving node. + * @param t_xv The send_xv #task, if it has already been created. + * @param t_rho The send_rho #task, if it has already been created. + * @param t_ti The send_ti #task, if required and has already been created. */ -void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) { +void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, + struct task *t_xv, struct task *t_rho, + struct task *t_ti) { #ifdef WITH_MPI struct link *l = NULL; struct scheduler *s = &e->sched; + const int nodeID = cj->nodeID; /* Check if any of the density tasks are for the target node. */ for (l = ci->density; l != NULL; l = l->next) - if (l->t->ci->nodeID == cj->nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == cj->nodeID)) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) break; /* If so, attach send tasks. */ if (l != NULL) { - /* Create the tasks. */ - struct task *t_xv = scheduler_addtask(s, task_type_send, task_subtype_none, - 2 * ci->tag, 0, ci, cj, 0); - struct task *t_rho = scheduler_addtask(s, task_type_send, task_subtype_none, - 2 * ci->tag + 1, 0, ci, cj, 0); + /* Create the tasks and their dependencies? */ + if (t_xv == NULL) { + t_xv = scheduler_addtask(s, task_type_send, task_subtype_none, + 3 * ci->tag, 0, ci, cj, 0); + t_rho = scheduler_addtask(s, task_type_send, task_subtype_none, + 3 * ci->tag + 1, 0, ci, cj, 0); + if (!(e->policy & engine_policy_fixdt)) + t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, + 3 * ci->tag + 2, 0, ci, cj, 0); - /* The send_rho task depends on the cell's ghost task. */ - scheduler_addunlock(s, ci->super->ghost, t_rho); + /* The send_rho task depends on the cell's ghost task. */ + scheduler_addunlock(s, ci->super->ghost, t_rho); - /* The send_rho task should unlock the super-cell's kick task. */ - scheduler_addunlock(s, t_rho, ci->super->kick); + /* The send_rho task should unlock the super-cell's kick task. */ + scheduler_addunlock(s, t_rho, ci->super->kick); - /* The send_xv task should unlock the super-cell's ghost task. */ - scheduler_addunlock(s, t_xv, ci->super->ghost); + /* The send_xv task should unlock the super-cell's ghost task. */ + scheduler_addunlock(s, t_xv, ci->super->ghost); + /* The super-cell's kick task should unlock the send_ti task. */ + if (t_ti != NULL) scheduler_addunlock(s, ci->super->kick, t_ti); + } + + /* Add them to the local cell. */ + ci->send_xv = engine_addlink(e, ci->send_xv, t_xv); + ci->send_rho = engine_addlink(e, ci->send_rho, t_rho); + if (t_ti != NULL) ci->send_ti = engine_addlink(e, ci->send_ti, t_ti); } /* Recurse? */ - else if (ci->split) + if (ci->split) for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) engine_addtasks_send(e, ci->progeny[k], cj); + if (ci->progeny[k] != NULL) + engine_addtasks_send(e, ci->progeny[k], cj, t_xv, t_rho, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -721,41 +743,51 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) { * @brief Add recv tasks to a hierarchy of cells. * * @param e The #engine. - * @param c The #cell. + * @param c The foreign #cell. * @param t_xv The recv_xv #task, if it has already been created. * @param t_rho The recv_rho #task, if it has already been created. + * @param t_ti The recv_ti #task, if required and has already been created. */ - void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, - struct task *t_rho) { + struct task *t_rho, struct task *t_ti) { #ifdef WITH_MPI struct scheduler *s = &e->sched; - /* Do we need to construct a recv task? */ - if (t_xv == NULL && c->nr_density > 0) { + /* Do we need to construct a recv task? + Note that since c is a foreign cell, all its density tasks will involve + only the current rank, and thus we don't have to check them.*/ + if (t_xv == NULL && c->density != NULL) { /* Create the tasks. */ - t_xv = c->recv_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, - 2 * c->tag, 0, c, NULL, 0); - t_rho = c->recv_rho = scheduler_addtask( - s, task_type_recv, task_subtype_none, 2 * c->tag + 1, 0, c, NULL, 0); - } + t_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, 3 * c->tag, + 0, c, NULL, 0); + t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none, + 3 * c->tag + 1, 0, c, NULL, 0); + if (!(e->policy & engine_policy_fixdt)) + t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, + 3 * c->tag + 2, 0, c, NULL, 0); + } + c->recv_xv = t_xv; + c->recv_rho = t_rho; + c->recv_ti = t_ti; /* Add dependencies. */ for (struct link *l = c->density; l != NULL; l = l->next) { scheduler_addunlock(s, t_xv, l->t); scheduler_addunlock(s, l->t, t_rho); } - for (struct link *l = c->force; l != NULL; l = l->next) + for (struct link *l = c->force; l != NULL; l = l->next) { scheduler_addunlock(s, t_rho, l->t); + if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti); + } if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); /* Recurse? */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_addtasks_recv(e, c->progeny[k], t_xv, t_rho); + engine_addtasks_recv(e, c->progeny[k], t_xv, t_rho, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -781,7 +813,8 @@ void engine_exchange_cells(struct engine *e) { MPI_Status status; ticks tic = getticks(); - /* Run through the cells and get the size of the ones that will be sent */ + /* Run through the cells and get the size of the ones that will be sent off. + */ int count_out = 0; for (int k = 0; k < nr_cells; k++) { offset[k] = count_out; @@ -946,7 +979,8 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Re-link the associated gpart with the buffer offset of the part. */ if (s->parts[offset_parts + k].gpart != NULL) { - s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_out; + s->parts[offset_parts + k].gpart->id_or_neg_offset = + -e->proxies[pid].nr_parts_out; } /* Load the part and xpart into the proxy. */ @@ -962,7 +996,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, error( "Do not have a proxy for the requested nodeID %i for part with " "id=%lli, x=[%e,%e,%e].", - node_id, s->gparts[offset_parts + k].id, + node_id, s->gparts[offset_parts + k].id_or_neg_offset, s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1], s->gparts[offset_gparts + k].x[2]); proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); @@ -1022,7 +1056,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, s->xparts = xparts_new; for (size_t k = 0; k < offset_parts; k++) { if (s->parts[k].gpart != NULL) { - s->parts[k].gpart->part = &s->parts[k]; + s->parts[k].gpart->id_or_neg_offset = -k; } } } @@ -1037,8 +1071,8 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, free(s->gparts); s->gparts = gparts_new; for (size_t k = 0; k < offset_gparts; k++) { - if (s->gparts[k].id > 0) { - s->gparts[k].part->gpart = &s->gparts[k]; + if (s->gparts[k].id_or_neg_offset < 0) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } } } @@ -1111,9 +1145,10 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Re-link the gparts. */ for (int k = 0; k < p->nr_gparts_in; k++) { struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k]; - if (gp->id >= 0) { - struct part *p = &s->parts[offset_parts + count_parts + gp->id]; - gp->part = p; + if (gp->id_or_neg_offset <= 0) { + struct part *p = + &s->parts[offset_gparts + count_parts - gp->id_or_neg_offset]; + gp->id_or_neg_offset = s->parts - p; p->gpart = gp; } } @@ -1563,12 +1598,13 @@ void engine_maketasks(struct engine *e) { /* Loop through the proxy's incoming cells and add the recv tasks. */ for (int k = 0; k < p->nr_cells_in; k++) - engine_addtasks_recv(e, p->cells_in[k], NULL, NULL); + engine_addtasks_recv(e, p->cells_in[k], NULL, NULL, NULL); /* Loop through the proxy's outgoing cells and add the send tasks. */ for (int k = 0; k < p->nr_cells_out; k++) - engine_addtasks_send(e, p->cells_out[k], p->cells_in[0]); + engine_addtasks_send(e, p->cells_out[k], p->cells_in[0], NULL, NULL, + NULL); } } @@ -1643,10 +1679,9 @@ int engine_marktasks(struct engine *e) { for (int k = 0; k < nr_tasks; k++) { /* Get a handle on the kth task. */ - struct task *t = &tasks[ind[k]]; + struct task *t = &tasks[k]; - /* Sort-task? Note that due to the task ranking, the sorts - will all come before the pairs. */ + /* Sort-task? */ if (t->type == task_type_sort) { /* Re-set the flags. */ @@ -1655,6 +1690,24 @@ int engine_marktasks(struct engine *e) { } + /* Send/recv-task? */ + else if (t->type == task_type_send || t->type == task_type_recv) { + t->skip = 1; + } + } + + /* Run through the tasks and mark as skip or not. */ + for (int k = 0; k < nr_tasks; k++) { + + /* Get a handle on the kth task. */ + struct task *t = &tasks[k]; + + /* Skip sorts, sends, and recvs. */ + if (t->type == task_type_sort || t->type == task_type_send || + t->type == task_type_recv) { + continue; + } + /* Single-cell task? */ else if (t->type == task_type_self || t->type == task_type_ghost || t->type == task_type_sub_self) { @@ -1670,9 +1723,6 @@ int engine_marktasks(struct engine *e) { const struct cell *ci = t->ci; const struct cell *cj = t->cj; - /* Set this task's skip. */ - t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end); - /* Too much particle movement? */ if (t->tight && (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || @@ -1680,8 +1730,13 @@ int engine_marktasks(struct engine *e) { cj->dx_max > space_maxreldx * cj->h_max)) return 1; + /* Set this task's skip. */ + if ((t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end)) == + 1) + continue; + /* Set the sort flags. */ - if (!t->skip && t->type == task_type_pair) { + if (t->type == task_type_pair) { if (!(ci->sorted & (1 << t->flags))) { ci->sorts->flags |= (1 << t->flags); ci->sorts->skip = 0; @@ -1692,6 +1747,68 @@ int engine_marktasks(struct engine *e) { } } + /* Activate the send/recv flags. */ + if (ci->nodeID != e->nodeID) { + + /* Activate the tasks to recv foreign cell ci's data. */ + ci->recv_xv->skip = 0; + ci->recv_rho->skip = 0; + ci->recv_ti->skip = 0; + + /* Look for the local cell cj's send tasks. */ + struct link *l = NULL; + for (l = cj->send_xv; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) { + abort(); + error("Missing link to send_xv task."); + } + l->t->skip = 0; + + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + l->t->skip = 0; + + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + l->t->skip = 0; + + } else if (cj->nodeID != e->nodeID) { + + /* Activate the tasks to recv foreign cell cj's data. */ + cj->recv_xv->skip = 0; + cj->recv_rho->skip = 0; + cj->recv_ti->skip = 0; + + /* Look for the local cell ci's send tasks. */ + struct link *l = NULL; + for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) { + abort(); + error("Missing link to send_xv task."); + } + l->t->skip = 0; + + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + l->t->skip = 0; + + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + l->t->skip = 0; + } + } /* Kick? */ @@ -1762,7 +1879,6 @@ void engine_print_task_counts(struct engine *e) { * * @param e The #engine. */ - void engine_rebuild(struct engine *e) { const ticks tic = getticks(); @@ -2140,8 +2256,10 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask, *forward in time. * * @param e The #engine + * @param flag_entropy_ICs Did the 'Internal Energy' of the particles actually + *contain entropy ? */ -void engine_init_particles(struct engine *e) { +void engine_init_particles(struct engine *e, int flag_entropy_ICs) { struct space *s = e->s; @@ -2198,9 +2316,9 @@ void engine_init_particles(struct engine *e) { /* Add MPI tasks if need be */ if (e->policy & engine_policy_mpi) { - mask |= 1 << task_type_send; mask |= 1 << task_type_recv; + submask |= 1 << task_subtype_tend; } /* Now, launch the calculation */ @@ -2209,7 +2327,7 @@ void engine_init_particles(struct engine *e) { TIMER_TOC(timer_runners); /* Apply some conversions (e.g. internal energy -> entropy) */ - space_map_cells_pre(s, 0, cell_convert_hydro, NULL); + if (!flag_entropy_ICs) space_map_cells_pre(s, 0, cell_convert_hydro, NULL); clocks_gettime(&time2); @@ -2328,9 +2446,9 @@ void engine_step(struct engine *e) { /* Add MPI tasks if need be */ if (e->policy & engine_policy_mpi) { - mask |= 1 << task_type_send; mask |= 1 << task_type_recv; + submask |= 1 << task_subtype_tend; } /* Send off the runners. */ @@ -2494,8 +2612,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->xparts = xparts_new; /* Re-link the gparts. */ - for (size_t k = 0; k < s->nr_parts; k++) - if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; + part_relink_gparts(s->parts, s->nr_parts, 0); /* Re-allocate the local gparts. */ if (e->verbose) @@ -2511,31 +2628,28 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->gparts = gparts_new; /* Re-link the parts. */ - for (size_t k = 0; k < s->nr_gparts; k++) - if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k]; + part_relink_parts(s->gparts, s->nr_gparts, s->parts); #ifdef SWIFT_DEBUG_CHECKS - /* Verify that the links are correct */ for (size_t k = 0; k < s->nr_gparts; ++k) { - if (s->gparts[k].id > 0) { + if (s->gparts[k].id_or_neg_offset <= 0) { + + struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset]; - if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !"); + if (part->gpart != &s->gparts[k]) error("Linking problem !"); - if (s->gparts[k].x[0] != s->gparts[k].part->x[0] || - s->gparts[k].x[1] != s->gparts[k].part->x[1] || - s->gparts[k].x[2] != s->gparts[k].part->x[2]) + if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] || + s->gparts[k].x[2] != part->x[2]) error("Linked particles are not at the same position !"); } } for (size_t k = 0; k < s->nr_parts; ++k) { - if (s->parts[k].gpart != NULL) { - if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !"); - } + if (s->parts[k].gpart != NULL && s->parts[k].gpart->id_or_neg_offset != -k) + error("Linking problem !"); } - #endif #else diff --git a/src/engine.h b/src/engine.h index d1bfecc568355a9cf5aa57590ccba1df81b05b8a..3dfe22845a66ba02fcc270e13647efd4390c7256 100644 --- a/src/engine.h +++ b/src/engine.h @@ -214,7 +214,7 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask, unsigned int submask); void engine_prepare(struct engine *e); void engine_print(struct engine *e); -void engine_init_particles(struct engine *e); +void engine_init_particles(struct engine *e, int flag_entropy_ICs); void engine_step(struct engine *e); void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h index 21d775d703c8bd7000ad03386fe818124642a8f1..7cf375a1fdf7bccc4131dc415ab2d4acbbf2d3bc 100644 --- a/src/gravity/Default/gravity_debug.h +++ b/src/gravity/Default/gravity_debug.h @@ -18,7 +18,7 @@ ******************************************************************************/ __attribute__((always_inline)) INLINE static void gravity_debug_particle( - struct gpart* p) { + const struct gpart* p) { printf( "x=[%.3e,%.3e,%.3e], " "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h index 129c4b39828ca73d2d80d79edbdaa8ec4d5a9e01..74f364dd97361f0513755bedec83fe7cb277c36b 100644 --- a/src/gravity/Default/gravity_io.h +++ b/src/gravity/Default/gravity_io.h @@ -39,8 +39,8 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles( COMPULSORY); readArray(h_grp, "Velocities", FLOAT, N, 3, gparts, N_total, offset, v_full, COMPULSORY); - readArray(h_grp, "ParticleIDs", ULONGLONG, N, 1, gparts, N_total, offset, id, - COMPULSORY); + readArray(h_grp, "ParticleIDs", ULONGLONG, N, 1, gparts, N_total, offset, + id_or_neg_offset, COMPULSORY); } /** @@ -75,6 +75,6 @@ __attribute__((always_inline)) INLINE static void darkmatter_write_particles( Ndm, 3, gparts, Ndm_total, mpi_rank, offset, v_full, us, UNIT_CONV_SPEED); writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", - ULONGLONG, Ndm, 1, gparts, Ndm_total, mpi_rank, offset, id, us, - UNIT_CONV_NO_UNITS); + ULONGLONG, Ndm, 1, gparts, Ndm_total, mpi_rank, offset, + id_or_neg_offset, us, UNIT_CONV_NO_UNITS); } diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index a54fc1d18e7e84a1ca7a7e7913117ed207549271..b6c9e62559207cb25323c8a108e4bffc87ea0fcf 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -47,14 +47,8 @@ struct gpart { /* float tx; */ /* float tv; */ - /* Anonymous union for id/part. */ - union { - - /* Particle ID. */ - long long id; - - /* Pointer to corresponding SPH part. */ - struct part* part; - }; + /* Particle ID. If negative, it is the negative offset of the #part with + which this gpart is linked. */ + long long id_or_neg_offset; } __attribute__((aligned(gpart_align))); diff --git a/src/hydro/Default/hydro_debug.h b/src/hydro/Default/hydro_debug.h index 79ee392d46ca75a2c097bf045b2d82c9f3dc96c0..9b8136e8349398e4e9e804459a5de23d21043564 100644 --- a/src/hydro/Default/hydro_debug.h +++ b/src/hydro/Default/hydro_debug.h @@ -18,7 +18,7 @@ ******************************************************************************/ __attribute__((always_inline)) INLINE static void hydro_debug_particle( - struct part* p, struct xpart* xp) { + const struct part* p, const struct xpart* xp) { printf( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h index de71963b05149fa0b2df6222424478a6ad9b1f44..fb1f7af922b2272779c7251b15e304880a2af520 100644 --- a/src/hydro/Default/hydro_io.h +++ b/src/hydro/Default/hydro_io.h @@ -122,3 +122,10 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_f(h_grpsph, "Maximal Delta u change over dt", const_max_u_change); } + +/** + * @brief Are we writing entropy in the internal energy field ? + * + * @return 1 if entropy is in 'internal energy', 0 otherwise. + */ +int writeEntropyFlag() { return 0; } diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h index 31e89d438bc96c0f0f2ba56249664d28036cb607..b67e79182ccaaee7c0421c57a91ec9fa2adae65c 100644 --- a/src/hydro/Gadget2/hydro_debug.h +++ b/src/hydro/Gadget2/hydro_debug.h @@ -18,7 +18,7 @@ ******************************************************************************/ __attribute__((always_inline)) INLINE static void hydro_debug_particle( - struct part* p, struct xpart* xp) { + const struct part* p, const struct xpart* xp) { printf( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index b977f25386fab0787c925635297a48fa85a8df24..f7ee04cbbad07ebb06d00ee39a614b9fed5c4dfd 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -83,8 +83,8 @@ __attribute__((always_inline)) INLINE static void hydro_write_particles( writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Entropy", FLOAT, N, - 1, parts, N_total, mpi_rank, offset, entropy, us, + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, entropy, us, UNIT_CONV_ENTROPY_PER_UNIT_MASS); writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, @@ -113,3 +113,10 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_f(h_grpsph, "Viscosity alpha", const_viscosity_alpha); writeAttribute_f(h_grpsph, "Viscosity beta", 3.f); } + +/** + * @brief Are we writing entropy in the internal energy field ? + * + * @return 1 if entropy is in 'internal energy', 0 otherwise. + */ +int writeEntropyFlag() { return 1; } diff --git a/src/hydro/Minimal/hydro_debug.h b/src/hydro/Minimal/hydro_debug.h index 127ba75e99418b6a5dc197a44ccdc77de3cdef15..1e0e3f1f111149a3babeef431893dff2cbe6bb3c 100644 --- a/src/hydro/Minimal/hydro_debug.h +++ b/src/hydro/Minimal/hydro_debug.h @@ -18,7 +18,7 @@ ******************************************************************************/ __attribute__((always_inline)) INLINE static void hydro_debug_particle( - struct part* p, struct xpart* xp) { + const struct part* p, const struct xpart* xp) { printf( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e], " diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index 1bbfe1065358017e0b27e2131ff717848221fe9c..bed6b8f8221dc3a6e94c65d101c68aa9b3bdea91 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -115,3 +115,10 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_f(h_grpsph, "Maximal Delta u change over dt", const_max_u_change); } + +/** + * @brief Are we writing entropy in the internal energy field ? + * + * @return 1 if entropy is in 'internal energy', 0 otherwise. + */ +int writeEntropyFlag() { return 0; } diff --git a/src/parallel_io.c b/src/parallel_io.c index 0430fce658fdb10c41dcf23b09880747811a3950..1411b85b9b4144830aa6a9f37a487b3148a2db21 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -367,8 +367,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info, int dry_run) { + int* periodic, int* flag_entropy, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; @@ -404,6 +404,7 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, if (h_grp < 0) error("Error while opening file header\n"); /* Read the relevant information and print status */ + readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy); readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); @@ -626,6 +627,7 @@ void write_output_parallel(struct engine* e, const char* baseName, double MassTable[6] = {0., 0., 0., 0., 0., 0.}; writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + flagEntropy[0] = writeEntropyFlag(); writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); diff --git a/src/parallel_io.h b/src/parallel_io.h index 26757cb679e475d6acd2ce3c408135dfe5e49081..709479fab029e862caddb36c4216b85077e96cec 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -36,8 +36,8 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info, int dry_run); + int* periodic, int* flag_entropy, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run); void write_output_parallel(struct engine* e, const char* baseName, struct UnitSystem* us, int mpi_rank, int mpi_size, diff --git a/src/part.c b/src/part.c index b89abdde40fe8c7a57d1e9ac9e18fece83ba1f21..b00eaccaae0e86f7c4e8019a307f0bf455687b7c 100644 --- a/src/part.c +++ b/src/part.c @@ -29,6 +29,36 @@ #include "error.h" #include "part.h" +/** + * @brief Re-link the #gpart%s associated with the list of #part%s. + * + * @param parts The list of #part. + * @param N The number of particles to re-link; + * @param offset The offset of #part%s relative to the global parts list. + */ +void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset) { + for (size_t k = 0; k < N; k++) { + if (parts[k].gpart) { + parts[k].gpart->id_or_neg_offset = -(k + offset); + } + } +} + +/** + * @brief Re-link the #gpart%s associated with the list of #part%s. + * + * @param gparts The list of #gpart. + * @param N The number of particles to re-link; + * @param parts The global part array in which to find the #gpart offsets. + */ +void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts) { + for (size_t k = 0; k < N; k++) { + if (gparts[k].id_or_neg_offset <= 0) { + parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; + } + } +} + #ifdef WITH_MPI /* MPI data type for the particle transfers */ MPI_Datatype part_mpi_type; diff --git a/src/part.h b/src/part.h index e99be6e51a9bd74dd9eec8f590e80989e83ec2e1..efca7b6b5bef49f20df1e2c45b30f65ecbbf4960 100644 --- a/src/part.h +++ b/src/part.h @@ -22,6 +22,9 @@ /* Config parameters. */ #include "../config.h" +/* Standard headers. */ +#include <stddef.h> + /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> @@ -49,6 +52,8 @@ /* Import the right gravity particle definition */ #include "./gravity/Default/gravity_part.h" +void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset); +void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts); #ifdef WITH_MPI /* MPI data type for the particle transfers */ extern MPI_Datatype part_mpi_type; diff --git a/src/runner.c b/src/runner.c index 0db16a7a20de1384e8e989d40849f55e78237b2f..a0b8c51cfabf830afd685a210bd78d3f98af62f0 100644 --- a/src/runner.c +++ b/src/runner.c @@ -760,7 +760,7 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) { struct gpart *const gp = &gparts[k]; /* If the g-particle has no counterpart */ - if (gp->id < 0) { + if (gp->id_or_neg_offset > 0) { /* First, finish the force calculation */ gravity_end_force(gp); @@ -872,7 +872,7 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) { struct gpart *const gp = &gparts[k]; /* If the g-particle has no counterpart and needs to be kicked */ - if (gp->id < 0) { + if (gp->id_or_neg_offset > 0) { if (gp->ti_end <= ti_current) { @@ -980,19 +980,36 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { int ti_end_max = 0; float h_max = 0.f; - /* Collect everything... */ - for (size_t k = 0; k < nr_parts; k++) { - const int ti_end = parts[k].ti_end; - // if(ti_end < ti_current) error("Received invalid particle !"); - ti_end_min = min(ti_end_min, ti_end); - ti_end_max = max(ti_end_max, ti_end); - h_max = fmaxf(h_max, parts[k].h); + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_parts; k++) { + const int ti_end = parts[k].ti_end; + // if(ti_end < ti_current) error("Received invalid particle !"); + ti_end_min = min(ti_end_min, ti_end); + ti_end_max = max(ti_end_max, ti_end); + h_max = fmaxf(h_max, parts[k].h); + } + for (size_t k = 0; k < nr_gparts; k++) { + const int ti_end = gparts[k].ti_end; + // if(ti_end < ti_current) error("Received invalid particle !"); + ti_end_min = min(ti_end_min, ti_end); + ti_end_max = max(ti_end_max, ti_end); + } + } - for (size_t k = 0; k < nr_gparts; k++) { - const int ti_end = gparts[k].ti_end; - // if(ti_end < ti_current) error("Received invalid particle !"); - ti_end_min = min(ti_end_min, ti_end); - ti_end_max = max(ti_end_max, ti_end); + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_recv_cell(r, c->progeny[k], 0); + ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); + ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + h_max = fmaxf(h_max, c->progeny[k]->h_max); + } + } } /* ... and store. */ @@ -1098,9 +1115,17 @@ void *runner_main(void *data) { runner_do_kick_fixdt(r, ci, 1); break; case task_type_send: + if (t->subtype == task_subtype_tend) { + free(t->buff); + } break; case task_type_recv: - runner_do_recv_cell(r, ci, 1); + if (t->subtype == task_subtype_tend) { + cell_unpack_ti_ends(ci, t->buff); + free(t->buff); + } else { + runner_do_recv_cell(r, ci, 1); + } break; case task_type_grav_external: runner_do_grav_external(r, t->ci, 1); diff --git a/src/scheduler.c b/src/scheduler.c index 39dab82a41197faa2f1d203b730774d7a2b9ea02..d760545ea74ddb288887a800c692136682f70d12 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -565,9 +565,6 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype, t->rid = -1; t->last_rid = -1; - /* Init the lock. */ - lock_init(&t->lock); - /* Add an index for it. */ // lock_lock( &s->lock ); s->tasks_ind[atomic_inc(&s->nr_tasks)] = ind; @@ -811,8 +808,6 @@ void scheduler_reweight(struct scheduler *s) { default: break; } - if (t->type == task_type_send) t->weight = INT_MAX / 8; - if (t->type == task_type_recv) t->weight *= 1.41; } // message( "weighting tasks took %.3f %s." , // clocks_from_ticks( getticks() - tic ), clocks_getunit()); @@ -978,8 +973,14 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (t->subtype == task_subtype_tend) { + t->buff = malloc(sizeof(int) * t->ci->pcell_size); + err = MPI_Irecv(t->buff, t->ci->pcell_size, MPI_INT, t->ci->nodeID, + t->flags, MPI_COMM_WORLD, &t->req); + } else { + err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit irecv for particle data."); } @@ -993,8 +994,15 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (t->subtype == task_subtype_tend) { + t->buff = malloc(sizeof(int) * t->ci->pcell_size); + cell_pack_ti_ends(t->ci, t->buff); + err = MPI_Isend(t->buff, t->ci->pcell_size, MPI_INT, t->cj->nodeID, + t->flags, MPI_COMM_WORLD, &t->req); + } else { + err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit isend for particle data."); } diff --git a/src/serial_io.c b/src/serial_io.c index 9a6848dbc55bfd7f049f7f2b74a8743f2106b0d0..fac3c1b006375eb43a4e799f96a4979f26238668 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -424,8 +424,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_serial(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info, int dry_run) { + int* periodic, int* flag_entropy, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; @@ -462,6 +462,7 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, if (h_grp < 0) error("Error while opening file header\n"); /* Read the relevant information and print status */ + readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy); readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); @@ -488,6 +489,7 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, } /* Now need to broadcast that information to all ranks. */ + MPI_Bcast(flag_entropy, 1, MPI_INT, 0, comm); MPI_Bcast(periodic, 1, MPI_INT, 0, comm); MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG, 0, comm); MPI_Bcast(dim, 3, MPI_DOUBLE, 0, comm); @@ -700,6 +702,7 @@ void write_output_serial(struct engine* e, const char* baseName, double MassTable[6] = {0., 0., 0., 0., 0., 0.}; writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + flagEntropy[0] = writeEntropyFlag(); writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); diff --git a/src/serial_io.h b/src/serial_io.h index 6b64624772214494a43316d3a8aa3910c7238dc8..fa88d5cb87edfba2c101b6096b8a57b04b7178cd 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -36,8 +36,8 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info, int dry_run); + int* periodic, int* flag_entropy, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run); void write_output_serial(struct engine* e, const char* baseName, struct UnitSystem* us, int mpi_rank, int mpi_size, diff --git a/src/single_io.c b/src/single_io.c index d7c39cb729a9c52cd0413ee14410b3a2157196c0..329ec7253247a6bec992e9cb740d322fa3048a01 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -322,6 +322,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, * @param Ngas (output) number of Gas particles read. * @param Ngparts (output) The number of #gpart read. * @param periodic (output) 1 if the volume is periodic, 0 if not. + * @param flag_entropy 1 if the ICs contained Entropy in the InternalEnergy + * field * @param dry_run If 1, don't read the particle. Only allocates the arrays. * * Opens the HDF5 file fileName and reads the particles contained @@ -334,7 +336,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_single(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int dry_run) { + int* periodic, int* flag_entropy, int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; @@ -368,6 +370,7 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, if (h_grp < 0) error("Error while opening file header\n"); /* Read the relevant information and print status */ + readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy); readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); @@ -549,6 +552,7 @@ void write_output_single(struct engine* e, const char* baseName, double MassTable[NUM_PARTICLE_TYPES] = {0}; writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + flagEntropy[0] = writeEntropyFlag(); writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); diff --git a/src/single_io.h b/src/single_io.h index d2c87655e1c91b92e8ccf2aa50d2e0bf98f13482..4936aa3eb35a5c76a5a8f242c9cc79bbc86161a0 100644 --- a/src/single_io.h +++ b/src/single_io.h @@ -31,7 +31,7 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ndm, - int* periodic, int dry_run); + int* periodic, int* flag_entropy, int dry_run); void write_output_single(struct engine* e, const char* baseName, struct UnitSystem* us); diff --git a/src/space.c b/src/space.c index c4173a58895bb36ca7f81a69128574abd9700f2f..0b42a3643f5740881da85e0533c02f7d8a0ee958 100644 --- a/src/space.c +++ b/src/space.c @@ -442,10 +442,10 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { s->parts[k] = s->parts[nr_parts]; s->parts[nr_parts] = tp; if (s->parts[k].gpart != NULL) { - s->parts[k].gpart->part = &s->parts[k]; + s->parts[k].gpart->id_or_neg_offset = -k; } if (s->parts[nr_parts].gpart != NULL) { - s->parts[nr_parts].gpart->part = &s->parts[nr_parts]; + s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; } const struct xpart txp = s->xparts[k]; s->xparts[k] = s->xparts[nr_parts]; @@ -481,11 +481,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { const struct gpart tp = s->gparts[k]; s->gparts[k] = s->gparts[nr_gparts]; s->gparts[nr_gparts] = tp; - if (s->gparts[k].id > 0) { - s->gparts[k].part->gpart = &s->gparts[k]; + if (s->gparts[k].id_or_neg_offset <= 0) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } - if (s->gparts[nr_gparts].id > 0) { - s->gparts[nr_gparts].part->gpart = &s->gparts[nr_gparts]; + if (s->gparts[nr_gparts].id_or_neg_offset <= 0) { + s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; } const int t = gind[k]; gind[k] = gind[nr_gparts]; @@ -551,8 +552,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose); /* Re-link the gparts. */ - for (size_t k = 0; k < nr_parts; k++) - if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; + part_relink_gparts(s->parts, nr_parts, 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ @@ -600,8 +600,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose); /* Re-link the parts. */ - for (int k = 0; k < nr_gparts; k++) - if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k]; + part_relink_parts(s->gparts, nr_gparts, s->parts); /* We no longer need the indices as of here. */ free(gind); @@ -610,21 +609,22 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Verify that the links are correct */ for (size_t k = 0; k < nr_gparts; ++k) { - if (s->gparts[k].id > 0) { + if (s->gparts[k].id_or_neg_offset < 0) { - if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !"); + const struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset]; - if (s->gparts[k].x[0] != s->gparts[k].part->x[0] || - s->gparts[k].x[1] != s->gparts[k].part->x[1] || - s->gparts[k].x[2] != s->gparts[k].part->x[2]) + if (part->gpart != &s->gparts[k]) error("Linking problem !"); + + if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] || + s->gparts[k].x[2] != part->x[2]) error("Linked particles are not at the same position !"); } } for (size_t k = 0; k < nr_parts; ++k) { - if (s->parts[k].gpart != NULL) { - - if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !"); + if (s->parts[k].gpart != NULL && + s->parts[k].gpart->id_or_neg_offset != -k) { + error("Linking problem !"); } } #endif @@ -1268,7 +1268,7 @@ void space_do_split(struct space *s, struct cell *c) { } /* Split the cell data. */ - cell_split(c); + cell_split(c, c->parts - s->parts); /* Remove any progeny with zero parts. */ for (int k = 0; k < 8; k++) diff --git a/src/task.c b/src/task.c index 77c5a944c8ca23e870624770c1e0cde4bf6195be..f3c6a8f711df1a0506567216d1d4021c88c10f58 100644 --- a/src/task.c +++ b/src/task.c @@ -54,7 +54,7 @@ const char *taskID_names[task_type_count] = { "split_cell", "rewait"}; const char *subtaskID_names[task_type_count] = {"none", "density", "force", - "grav"}; + "grav", "t_end"}; /** * @brief Computes the overlap between the parts array of two given cells. @@ -213,77 +213,6 @@ int task_lock(struct task *t) { return 1; } -/** - * @brief Remove all unlocks to tasks that are of the given type. - * - * @param t The #task. - * @param type The task type ID to remove. - */ - -void task_cleanunlock(struct task *t, int type) { - - int k; - - lock_lock(&t->lock); - - for (k = 0; k < t->nr_unlock_tasks; k++) - if (t->unlock_tasks[k]->type == type) { - t->nr_unlock_tasks -= 1; - t->unlock_tasks[k] = t->unlock_tasks[t->nr_unlock_tasks]; - } - - lock_unlock_blind(&t->lock); -} - -/** - * @brief Remove an unlock_task from the given task. - * - * @param ta The unlocking #task. - * @param tb The #task that will be unlocked. - */ - -void task_rmunlock(struct task *ta, struct task *tb) { - - int k; - - lock_lock(&ta->lock); - - for (k = 0; k < ta->nr_unlock_tasks; k++) - if (ta->unlock_tasks[k] == tb) { - ta->nr_unlock_tasks -= 1; - ta->unlock_tasks[k] = ta->unlock_tasks[ta->nr_unlock_tasks]; - lock_unlock_blind(&ta->lock); - return; - } - error("Task not found."); -} - -/** - * @brief Remove an unlock_task from the given task. - * - * @param ta The unlocking #task. - * @param tb The #task that will be unlocked. - * - * Differs from #task_rmunlock in that it will not fail if - * the task @c tb is not in the unlocks of @c ta. - */ - -void task_rmunlock_blind(struct task *ta, struct task *tb) { - - int k; - - lock_lock(&ta->lock); - - for (k = 0; k < ta->nr_unlock_tasks; k++) - if (ta->unlock_tasks[k] == tb) { - ta->nr_unlock_tasks -= 1; - ta->unlock_tasks[k] = ta->unlock_tasks[ta->nr_unlock_tasks]; - break; - } - - lock_unlock_blind(&ta->lock); -} - /** * @brief Prints the list of tasks contained in a given mask * diff --git a/src/task.h b/src/task.h index 51a44b3127694a60da772ca4e728073b15ac1147..8dea503e42cdba9a188112cfa668c464c69f9959 100644 --- a/src/task.h +++ b/src/task.h @@ -66,6 +66,7 @@ enum task_subtypes { task_subtype_density, task_subtype_force, task_subtype_grav, + task_subtype_tend, task_subtype_count }; @@ -79,10 +80,10 @@ struct task { char skip, tight, implicit; int flags, wait, rank, weight; - swift_lock_type lock; - struct cell *ci, *cj; + void *buff; + #ifdef WITH_MPI MPI_Request req; #endif @@ -95,9 +96,6 @@ struct task { }; /* Function prototypes. */ -void task_rmunlock(struct task *ta, struct task *tb); -void task_rmunlock_blind(struct task *ta, struct task *tb); -void task_cleanunlock(struct task *t, int type); void task_unlock(struct task *t); float task_overlap(const struct task *ta, const struct task *tb); int task_lock(struct task *t); diff --git a/src/tools.c b/src/tools.c index 1a2b794f688047183827e5c2ed6ba80ba1339080..710efd7e4dd54784ed735492b4e1af0b466ef682 100644 --- a/src/tools.c +++ b/src/tools.c @@ -181,16 +181,19 @@ void pairs_single_density(double *dim, long long int pid, } void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { + float r2, hi, hj, hig2, hjg2, dx[3]; struct part *pi, *pj; /* Implements a double-for loop and checks every interaction */ for (int i = 0; i < ci->count; ++i) { + pi = &ci->parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; for (int j = 0; j < cj->count; ++j) { + pj = &cj->parts[j]; /* Pairwise distance */ @@ -202,6 +205,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { + /* Interact */ runner_iact_nonsym_density(r2, dx, hi, pj->h, pi, pj); } @@ -210,11 +214,13 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Reverse double-for loop and checks every interaction */ for (int j = 0; j < cj->count; ++j) { + pj = &cj->parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; for (int i = 0; i < ci->count; ++i) { + pi = &ci->parts[i]; /* Pairwise distance */ @@ -226,6 +232,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2) { + /* Interact */ runner_iact_nonsym_density(r2, dx, hj, pi->h, pj, pi); } @@ -239,11 +246,13 @@ void self_all_density(struct runner *r, struct cell *ci) { /* Implements a double-for loop and checks every interaction */ for (int i = 0; i < ci->count; ++i) { + pi = &ci->parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; for (int j = i + 1; j < ci->count; ++j) { + pj = &ci->parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; @@ -259,12 +268,14 @@ void self_all_density(struct runner *r, struct cell *ci) { /* Hit or miss? */ if (r2 < hig2) { + /* Interact */ runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj); } /* Hit or miss? */ if (r2 < hjg2) { + dxi[0] = -dxi[0]; dxi[1] = -dxi[1]; dxi[2] = -dxi[2]; @@ -277,7 +288,9 @@ void self_all_density(struct runner *r, struct cell *ci) { } void pairs_single_grav(double *dim, long long int pid, - struct gpart *restrict parts, int N, int periodic) { + struct gpart *restrict gparts, const struct part *parts, + int N, int periodic) { + int i, k; // int mj, mk; // double maxratio = 1.0; @@ -288,18 +301,20 @@ void pairs_single_grav(double *dim, long long int pid, /* Find "our" part. */ for (k = 0; k < N; k++) - if ((parts[k].id > 0 && parts[k].part->id == pid) || parts[k].id == -pid) + if ((gparts[k].id_or_neg_offset < 0 && + parts[-gparts[k].id_or_neg_offset].id == pid) || + gparts[k].id_or_neg_offset == pid) break; if (k == N) error("Part not found."); - pi = parts[k]; + pi = gparts[k]; pi.a_grav[0] = 0.0f; pi.a_grav[1] = 0.0f; pi.a_grav[2] = 0.0f; /* Loop over all particle pairs. */ for (k = 0; k < N; k++) { - if (parts[k].id == pi.id) continue; - pj = parts[k]; + if (gparts[k].id_or_neg_offset == pi.id_or_neg_offset) continue; + pj = gparts[k]; for (i = 0; i < 3; i++) { dx[i] = pi.x[i] - pj.x[i]; if (periodic) { @@ -326,7 +341,8 @@ void pairs_single_grav(double *dim, long long int pid, /* Dump the result. */ message( "acceleration on gpart %lli is a=[ %e %e %e ], |a|=[ %.2e %.2e %.2e ].\n", - pi.part->id, a[0], a[1], a[2], aabs[0], aabs[1], aabs[2]); + parts[-pi.id_or_neg_offset].id, a[0], a[1], a[2], aabs[0], aabs[1], + aabs[2]); } /** @@ -335,6 +351,7 @@ void pairs_single_grav(double *dim, long long int pid, * @param N number of intervals in [0,1]. */ void density_dump(int N) { + int k; float r2[4] = {0.0f, 0.0f, 0.0f, 0.0f}, hi[4], hj[4]; struct part /**pi[4], *pj[4],*/ Pi[4], Pj[4]; diff --git a/src/tools.h b/src/tools.h index 2a4462f274d8e9acd1f2d8e79996ad92c630d404..97f036994bc0d1e2b4a95d806cfdbd253664a260 100644 --- a/src/tools.h +++ b/src/tools.h @@ -33,7 +33,8 @@ void factor(int value, int *f1, int *f2); void density_dump(int N); void pairs_single_grav(double *dim, long long int pid, - struct gpart *restrict parts, int N, int periodic); + struct gpart *restrict gparts, const struct part *parts, + int N, int periodic); void pairs_single_density(double *dim, long long int pid, struct part *restrict parts, int N, int periodic);