diff --git a/.gitignore b/.gitignore index 9a56843112c8214fc4dbce8efdf3fc23aa7e5919..9d0c3e8218a7672d5986f764f713ae299aaee44b 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,8 @@ examples/*.xmf examples/used_parameters.yml examples/energy.txt examples/*/*.xmf +examples/*/*.dat +examples/*/*.hdf5 examples/*/used_parameters.yml examples/*/energy.txt diff --git a/README b/README index b0a21c610f7ab4c380e523b51c6f74b81f98a2f2..cd2a397a18e872e7914b24fd58cc588ec1d6c8c0 100644 --- a/README +++ b/README @@ -26,6 +26,8 @@ Valid options are: -f {int} Overwrite the CPU frequency (Hz) to be used for time measurements -g Run with an external gravitational potential -G Run with self-gravity + -n {int} Execute a fixed number of time steps. When unset use the time_end + parameter to stop. -s Run with SPH -t {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified. -v [12] Increase the level of verbosity 1: MPI-rank 0 writes diff --git a/configure.ac b/configure.ac index 497107121753f212dd8b07f5a8e8eed7acdf82b5..23fbf5c6e4c4211379986a4a9c951892d044713c 100644 --- a/configure.ac +++ b/configure.ac @@ -72,9 +72,6 @@ if test "$enable_ipo" = "yes"; then fi fi -# Add libtool support. -LT_INIT - # Check for MPI. Need to do this before characterising the compiler (C99 mode), # as this changes the compiler. # We should consider using AX_PROG_CC_MPI to replace AC_PROG_CC when compiling @@ -151,6 +148,9 @@ AM_CONDITIONAL([HAVEMPI],[test $enable_mpi = "yes"]) # Indicate that MPIRUN can be modified by an environement variable AC_ARG_VAR(MPIRUN, Path to the mpirun command if non-standard) +# Add libtool support (now that CC is defined). +LT_INIT + # Need C99 and inline support. AC_PROG_CC_C99 AC_C_INLINE @@ -179,7 +179,7 @@ if test "$enable_opt" = "yes" ; then ac_test_CFLAGS="yes" CFLAGS="$old_CFLAGS $CFLAGS" - # Check SSE & AVX support (some overlap with AX_CC_MAXOPT). + # Check SSE & AVX support (some overlap with AX_CC_MAXOPT). # Don't use the SIMD_FLAGS result with Intel compilers. The -x<code> # value from AX_CC_MAXOPT should be sufficient. AX_EXT @@ -287,12 +287,75 @@ AC_SUBST([METIS_LIBS]) AC_SUBST([METIS_INCS]) AM_CONDITIONAL([HAVEMETIS],[test -n "$METIS_LIBS"]) -# # Check for zlib. -# AC_CHECK_LIB([z],[gzopen],[ -# AC_DEFINE([HAVE_LIBZ],[1],[Set to 1 if zlib is installed.]) -# LDFLAGS="$LDFLAGS -lz" -# ],[]) +# Check for tcmalloc a fast malloc that is part of the gperftools. +have_tcmalloc="no" +AC_ARG_WITH([tcmalloc], + [AS_HELP_STRING([--with-tcmalloc], + [use tcmalloc library or specify the directory with lib @<:@yes/no@:>@] + )], + [with_tcmalloc="$withval"], + [with_tcmalloc="no"] +) +if test "x$with_tcmalloc" != "xno"; then + if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then + tclibs="-L$with_tcmalloc -ltcmalloc" + else + tclibs="-ltcmalloc" + fi + AC_CHECK_LIB([tcmalloc],[tc_cfree],[have_tcmalloc="yes"],[have_tcmalloc="no"], + $tclibs) + + # Could just have the minimal version. + if test "$have_tcmalloc" = "no"; then + if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then + tclibs="-L$with_tcmalloc -ltcmalloc_minimal" + else + tclibs="-ltcmalloc_minimal" + fi + AC_CHECK_LIB([tcmalloc],[tc_cfree],[have_tcmalloc="yes"],[have_tcmalloc="no"], + $tclibs) + fi + if test "$have_tcmalloc" = "yes"; then + TCMALLOC_LIBS="$tclibs" + + # These are recommended for GCC. + if test "$ax_cv_c_compiler_vendor" = "gnu"; then + CFLAGS="$CFLAGS -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free" + fi + else + TCMALLOC_LIBS="" + fi +fi +AC_SUBST([TCMALLOC_LIBS]) +AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"]) + +# Check for -lprofiler usually part of the gpreftools along with tcmalloc. +have_profiler="no" +AC_ARG_WITH([profiler], + [AS_HELP_STRING([--with-profiler], + [use cpu profiler library or specify the directory with lib @<:@yes/no@:>@] + )], + [with_profiler="$withval"], + [with_profiler="yes"] +) +if test "x$with_profiler" != "xno"; then + if test "x$with_profiler" != "xyes" && test "x$with_profiler" != "x"; then + proflibs="-L$with_profiler -lprofiler" + else + proflibs="-lprofiler" + fi + AC_CHECK_LIB([profiler],[ProfilerFlush],[have_profiler="yes"],[have_profiler="no"], + $proflibs) + + if test "$have_profiler" = "yes"; then + PROFILER_LIBS="$proflibs" + else + PROFILER_LIBS="" + fi +fi +AC_SUBST([PROFILER_LIBS]) +AM_CONDITIONAL([HAVEPROFILER],[test -n "$PROFILER_LIBS"]) # Check for HDF5. This is required. AX_LIB_HDF5 @@ -410,6 +473,8 @@ AC_MSG_RESULT([ - parallel : $have_parallel_hdf5 Metis enabled : $have_metis libNUMA enabled : $have_numa + Using tcmalloc : $have_tcmalloc + CPU profiler : $have_profiler ]) # Generate output. diff --git a/examples/BigCosmoVolume/makeIC.py b/examples/BigCosmoVolume/makeIC.py index 3020feaf753f817f039d2fd09c4fa4f7fb69b896..411ac54b41fadc4209b314b5b9976e5ac95d8000 100644 --- a/examples/BigCosmoVolume/makeIC.py +++ b/examples/BigCosmoVolume/makeIC.py @@ -47,7 +47,7 @@ if downsample > 1. or downsample <= 0.: # Download the tile if (not os.path.isfile("tile.hdf5")): print "Downloading initial tile..." - urllib.urlretrieve ("http://icc.dur.ac.uk/~jlvc76/Files/SWIFT/tile.hdf5", "tile.hdf5") + urllib.urlretrieve ("http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/tile.hdf5", "tile.hdf5") print "Done." else: print "Tile already exists. No need to download..." diff --git a/examples/CosmoVolume/getIC.sh b/examples/CosmoVolume/getIC.sh index 5b951fc247006f0378bcee2b74a9619b7b1acc7d..fdaddd1d1f09a941244f520fb368aaed8f2316b4 100755 --- a/examples/CosmoVolume/getIC.sh +++ b/examples/CosmoVolume/getIC.sh @@ -1,2 +1,2 @@ #!/bin/bash -wget http://icc.dur.ac.uk/~jlvc76/Files/SWIFT/cosmoVolume.hdf5 +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/cosmoVolume.hdf5 diff --git a/examples/EAGLE_12/README b/examples/EAGLE_12/README new file mode 100644 index 0000000000000000000000000000000000000000..7bdbfd929dac87950c6ac4b672591a75877e3b7b --- /dev/null +++ b/examples/EAGLE_12/README @@ -0,0 +1,13 @@ +ICs extracted from the EAGLE suite of simulations. + +WARNING: The ICs are 460MB in size. They contain ~6.5M DM particles, +~6M gas particles and ~250k star particles. + +The particle distribution here is the snapshot 27 (z=0.1) of the 12Mpc +Ref-model. h- and a- factors from the original Gadget code have been +corrected for. Variables not used in a pure hydro & gravity code have +been removed. Everything is ready to be run without cosmological +integration. + +The particle load of the main EAGLE simulation can be reproduced by +running these ICs on 8 cores. diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml new file mode 100644 index 0000000000000000000000000000000000000000..778d04fdd02004fd10ac468321568069e39dd217 --- /dev/null +++ b/examples/EAGLE_12/eagle_12.yml @@ -0,0 +1,46 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + cell_sub_size: 6000 # Value used for the original scaling tests + cell_split_size: 300 # Value used for the original scaling tests + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: eagle # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.05 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. + max_smoothing_length: 0.1 # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./EAGLE_ICs_12.hdf5 # The file to read + diff --git a/examples/EAGLE_12/getIC.sh b/examples/EAGLE_12/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..1983a1c19fbfd67d2a13d7a59847423d217f0e4e --- /dev/null +++ b/examples/EAGLE_12/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/EAGLE_ICs_12.hdf5 diff --git a/examples/EAGLE_12/run.sh b/examples/EAGLE_12/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..58c92adc0a1afadf0d15f81613d749f6e736f211 --- /dev/null +++ b/examples/EAGLE_12/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e EAGLE_ICs_12.hdf5 ] +then + echo "Fetching initial conditions for the EAGLE 12Mpc example..." + ./getIC.sh +fi + +../swift -s -t 16 eagle_12.yml + diff --git a/examples/EAGLE_25/README b/examples/EAGLE_25/README new file mode 100644 index 0000000000000000000000000000000000000000..0af58d16d9ca101fb2f641b1493fdc8268184ff4 --- /dev/null +++ b/examples/EAGLE_25/README @@ -0,0 +1,13 @@ +ICs extracted from the EAGLE suite of simulations. + +WARNING: The ICs are 3.6GB in size. They contain ~52M DM particles, +~50M gas particles and ~2M star particles. + +The particle distribution here is the snapshot 27 (z=0.1) of the 25Mpc +Ref-model. h- and a- factors from the original Gadget code have been +corrected for. Variables not used in a pure hydro & gravity code have +been removed. +Everything is ready to be run without cosmological integration. + +The particle load of the main EAGLE simulation can be reproduced by +running these ICs on 64 cores. diff --git a/examples/EAGLE_25/eagle_25.yml b/examples/EAGLE_25/eagle_25.yml new file mode 100644 index 0000000000000000000000000000000000000000..731d87ea360ced596c4190880c62fdfb1cd605f8 --- /dev/null +++ b/examples/EAGLE_25/eagle_25.yml @@ -0,0 +1,46 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + cell_sub_size: 6000 # Value used for the original scaling tests + cell_split_size: 300 # Value used for the original scaling tests + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: eagle # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.05 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. + max_smoothing_length: 0.1 # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./EAGLE_ICs_25.hdf5 # The file to read + diff --git a/examples/EAGLE_25/getIC.sh b/examples/EAGLE_25/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..4577db3a351f5b9ce16962897c664cd12108b01c --- /dev/null +++ b/examples/EAGLE_25/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/EAGLE_ICs_25.hdf5 diff --git a/examples/EAGLE_25/run.sh b/examples/EAGLE_25/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..f232cf98f772ab9900bd22b635090e3c103749b5 --- /dev/null +++ b/examples/EAGLE_25/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e EAGLE_ICs_25.hdf5 ] +then + echo "Fetching initial conditions for the EAGLE 25Mpc example..." + ./getIC.sh +fi + +../swift -s -t 16 eagle_25.yml + diff --git a/examples/EAGLE_50/README b/examples/EAGLE_50/README new file mode 100644 index 0000000000000000000000000000000000000000..50a8ec6b8eddbbcf34a9b36b76ab34ad7d1f5401 --- /dev/null +++ b/examples/EAGLE_50/README @@ -0,0 +1,13 @@ +ICs extracted from the EAGLE suite of simulations. + +WARNING: The ICs are 28GB in size. They contain ~425M DM particles, +~405M gas particles and ~20M star particles + +The particle distribution here is the snapshot 27 (z=0.1) of the 50Mpc +Ref-model. h- and a- factors from the original Gadget code have been +corrected for. Variables not used in a pure hydro & gravity code have +been removed. +Everything is ready to be run without cosmological integration. + +The particle load of the main EAGLE simulation can be reproduced by +running these ICs on 512 cores. diff --git a/examples/EAGLE_50/eagle_50.yml b/examples/EAGLE_50/eagle_50.yml new file mode 100644 index 0000000000000000000000000000000000000000..1b1f4e4dfa0559f4fcf18d99c0fc649a3b8f5307 --- /dev/null +++ b/examples/EAGLE_50/eagle_50.yml @@ -0,0 +1,46 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + cell_sub_size: 6000 # Value used for the original scaling tests + cell_split_size: 300 # Value used for the original scaling tests + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: eagle # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.05 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. + max_smoothing_length: 0.1 # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./EAGLE_ICs_50.hdf5 # The file to read + diff --git a/examples/EAGLE_50/getIC.sh b/examples/EAGLE_50/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..f898a02fac4f66f1d186d61a8d48d7b1f81a2af4 --- /dev/null +++ b/examples/EAGLE_50/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/EAGLE_ICs_50.hdf5 diff --git a/examples/EAGLE_50/run.sh b/examples/EAGLE_50/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..87d522be6bd95fa503d437af861a68ce8bf85b4c --- /dev/null +++ b/examples/EAGLE_50/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e EAGLE_ICs_50.hdf5 ] +then + echo "Fetching initial conditions for the EAGLE 50Mpc example..." + ./getIC.sh +fi + +../swift -s -t 16 eagle_50.yml + diff --git a/examples/Makefile.am b/examples/Makefile.am index ecc5d4acf806fcc82a97488c5a0412357409ed25..9ad4d06f8764f8f57f68d0e501f4a726941c40e6 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -21,13 +21,17 @@ MYFLAGS = -DTIMER # Add the source directory and debug to CFLAGS AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) -AM_LDFLAGS = +AM_LDFLAGS = $(HDF5_LDFLAGS) -MPI_THREAD_LIBS = @MPI_THREAD_LIBS@ +# Extra libraries. +EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) + +# MPI libraries. MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) MPI_FLAGS = -DWITH_MPI $(METIS_INCS) -# Set-up the library + +# Programs. bin_PROGRAMS = swift swift_fixdt # Build MPI versions as well? @@ -45,20 +49,20 @@ ENGINE_POLICY_SETAFFINITY= # Sources for swift swift_SOURCES = main.c swift_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_LDADD = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) +swift_LDADD = ../src/.libs/libswiftsim.a $(EXTRA_LIBS) swift_fixdt_SOURCES = main.c swift_fixdt_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_fixdt_LDADD = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) +swift_fixdt_LDADD = ../src/.libs/libswiftsim.a $(EXTRA_LIBS) # Sources for swift_mpi, do we need an affinity policy for MPI? swift_mpi_SOURCES = main.c swift_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS) +swift_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(MPI_LIBS) $(EXTRA_LIBS) swift_fixdt_mpi_SOURCES = main.c swift_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep $(ENGINE_POLICY_SETAFFINITY)" -swift_fixdt_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS) +swift_fixdt_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(MPI_LIBS) $(EXTRA_LIBS) # Scripts to generate ICs EXTRA_DIST = UniformBox/makeIC.py UniformBox/run.sh UniformBox/uniformBox.yml \ diff --git a/examples/MultiTypes/multiTypes.yml b/examples/MultiTypes/multiTypes.yml new file mode 100644 index 0000000000000000000000000000000000000000..a1ff19f98e9fccab82d1749a45adf026b6675907 --- /dev/null +++ b/examples/MultiTypes/multiTypes.yml @@ -0,0 +1,47 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: multiTypes # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.01 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. + max_smoothing_length: 0.1 # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./multiTypes.hdf5 # The file to read + +# External potential parameters +PointMass: + position_x: 50. # location of external point mass in internal units + position_y: 50. + position_z: 50. + mass: 1e10 # mass of external point mass in internal units diff --git a/examples/MultiTypes/run.sh b/examples/MultiTypes/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..9782a27fe18a7fac94cc2a12353f81abb9efb06b --- /dev/null +++ b/examples/MultiTypes/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Generate the initial conditions if they are not present. +if [ ! -e multiTypes.hdf5 ] +then + echo "Generating initial conditions for the multitype box example..." + python makeIC.py 50 60 +fi + +../swift -s -g -t 16 multiTypes.yml diff --git a/examples/main.c b/examples/main.c index fa0ac2567ac253568903ce8c47b2c9c38fee3704..016f948bb1b031ba756a344d020d4b26c8a42bed 100644 --- a/examples/main.c +++ b/examples/main.c @@ -74,6 +74,8 @@ void print_help_message() { printf(" %2s %8s %s\n", "-g", "", "Run with an external gravitational potential"); printf(" %2s %8s %s\n", "-G", "", "Run with self-gravity"); + printf(" %2s %8s %s\n", "-n", "{int}", + "Execute a fixed number of time steps"); printf(" %2s %8s %s\n", "-s", "", "Run with SPH"); printf(" %2s %8s %s\n", "-t", "{int}", "The number of threads to use on each MPI rank. Defaults to 1 if not " @@ -138,6 +140,7 @@ int main(int argc, char *argv[]) { int with_aff = 0; int dry_run = 0; int dump_tasks = 0; + int nsteps = -2; int with_cosmology = 0; int with_external_gravity = 0; int with_self_gravity = 0; @@ -150,7 +153,7 @@ int main(int argc, char *argv[]) { /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "acdef:gGhst:v:y:")) != -1) switch (c) { + while ((c = getopt(argc, argv, "acdef:gGhn:st:v:y:")) != -1) switch (c) { case 'a': with_aff = 1; break; @@ -179,6 +182,13 @@ int main(int argc, char *argv[]) { case 'h': if (myrank == 0) print_help_message(); return 0; + case 'n': + if (sscanf(optarg, "%d", &nsteps) != 1) { + if (myrank == 0) printf("Error parsing fixed number of steps.\n"); + if (myrank == 0) print_help_message(); + return 1; + } + break; case 's': with_hydro = 1; break; @@ -322,18 +332,21 @@ int main(int argc, char *argv[]) { size_t Ngas = 0, Ngpart = 0; double dim[3] = {0., 0., 0.}; int periodic = 0; + int flag_entropy_ICs = 0; if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) read_ic_parallel(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, - myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); + &flag_entropy_ICs, myrank, nr_nodes, MPI_COMM_WORLD, + MPI_INFO_NULL, dry_run); #else read_ic_serial(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, - myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); + &flag_entropy_ICs, myrank, nr_nodes, MPI_COMM_WORLD, + MPI_INFO_NULL, dry_run); #endif #else read_ic_single(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, - dry_run); + &flag_entropy_ICs, dry_run); #endif if (myrank == 0) { clocks_gettime(&toc); @@ -354,7 +367,7 @@ int main(int argc, char *argv[]) { free(parts); parts = NULL; for (size_t k = 0; k < Ngpart; ++k) - if (gparts[k].id > 0) error("Linking problem"); + if (gparts[k].id_or_neg_offset < 0) error("Linking problem"); Ngas = 0; } @@ -466,7 +479,7 @@ int main(int argc, char *argv[]) { #endif /* Initialise the particles */ - engine_init_particles(&e); + engine_init_particles(&e, flag_entropy_ICs); /* Legend */ if (myrank == 0) @@ -474,7 +487,7 @@ int main(int argc, char *argv[]) { "Updates", "g-Updates", "Wall-clock time", clocks_getunit()); /* Main simulation loop */ - for (int j = 0; !engine_is_done(&e); j++) { + for (int j = 0; !engine_is_done(&e) && e.step != nsteps; j++) { /* Repartition the space amongst the nodes? */ #ifdef WITH_MPI diff --git a/examples/mkplots.m b/examples/mkplots.m deleted file mode 100644 index 341eaf395a65db2975b161a8a98e1916b9252741..0000000000000000000000000000000000000000 --- a/examples/mkplots.m +++ /dev/null @@ -1,102 +0,0 @@ - -%% Scaling and efficiency plots for the first test -datadir = 'Opteron8380'; -test = importdata( [ datadir '/snap_C90.totals' ]); -ncores = size( test , 1 ); -col = 11; - -clf -subplot('position',[ 0.05 , 0.1 , 0.4 , 0.8 ]); -plot( 1:ncores , test(1,col) ./ test(:,col) , '-k' , 'LineWidth' , 2 ); hold on; -xlabel('nr. cores'); -plot( [1,ncores] , [1,ncores] , ':k' , 'LineWidth' , 1.4 ); -hold off; -title('Speedup GadgetSMP'); -axis([ 1 , ncores , 0 , ncores ]); - -subplot('position',[ 0.52 0.1 , 0.4 , 0.8 ]); -plot( 1:ncores , test(1,col) ./ test(:,col) ./ (1:ncores)' , '-k' , 'LineWidth' , 2 ); hold on; -plot( [1,ncores] , [1,1] , ':k' , 'LineWidth' , 1.4 ); -% text(4*ncores/5,1,sprintf('%.2f',min(test(:,10))),'BackgroundColor',[1,1,1],'FontSize',12); -xlabel('nr. cores'); -hold off; -title('Efficiency GadgetSMP'); -axis([ 1 , ncores , 0 , 1.2 ]); - -% Print this plot -set( gcf , 'PaperSize' , 2.3*[ 8.5 4.5 ] ); -set( gcf , 'PaperPosition' , 2.3*[ 0.25 0.25 8 4 ] ); -print -depsc2 test.eps -!epstopdf test.eps - - -%% Scaling and efficiency plots for the second test -test2 = importdata( 'test2.totals'); -ncores = size( test2 , 1 ); - -clf -subplot('position',[ 0.05 , 0.1 , 0.4 , 0.8 ]); -plot( 1:ncores , test2(1,10) ./ test2(:,10) , '-k' , 'LineWidth' , 2 ); hold on; -xlabel('nr. cores'); -plot( [1,ncores] , [1,ncores] , ':k' , 'LineWidth' , 1.4 ); -hold off; -title('Speedup'); -axis([ 1 , ncores , 0 , ncores ]); - -subplot('position',[ 0.52 0.1 , 0.4 , 0.8 ]); -plot( 1:ncores , test2(1,10) ./ test2(:,10) ./ (1:ncores)' , '-k' , 'LineWidth' , 2 ); hold on; -plot( [1,ncores] , [1,1] , ':k' , 'LineWidth' , 1.4 ); -text(4*ncores/5,1,sprintf('%.2f',min(test2(:,10))),'BackgroundColor',[1,1,1],'FontSize',12); -xlabel('nr. cores'); -hold off; -title('Efficiency'); -axis([ 1 , ncores , 0 , 1.2 ]); - -% Print this plot -set( gcf , 'PaperSize' , 2.3*[ 8.5 4.5 ] ); -set( gcf , 'PaperPosition' , 2.3*[ 0.25 0.25 8 4 ] ); -print -depsc2 test2.eps -!epstopdf test2.eps - - -%% Components of the first test -test = importdata( 'test_nosort.totals'); -ncores = size( test , 1 ); -cols = [ 1 , 2 , 3 , 5 ]; - -clf -subplot('position',[ 0.1 , 0.1 , 0.8 , 0.8 ]); -plot( 1:ncores , test(:,cols) , 'LineWidth' , 2 ); hold on; -legend( 'sort' , 'self' , 'pair' , 'get task' , 'Location' , 'NorthWest' ); -xlabel('nr. cores'); -hold off; -title('ms per task type'); -axis([ 1 , ncores , 0 , max(max(test(:,cols)))*1.1 ]); - -% Print this plot -set( gcf , 'PaperSize' , 2.3*[ 4.5 4 ] ); -set( gcf , 'PaperPosition' , 2.3*[ 0.25 0.25 4 4 ] ); -print -depsc2 parts.eps -!epstopdf parts.eps - - -%% Components of the second test -test2 = importdata( 'test2.totals'); -ncores = size( test2 , 1 ); -cols = [ 1 , 2 , 3 , 5 ]; - -clf -subplot('position',[ 0.1 , 0.1 , 0.8 , 0.8 ]); -plot( 1:ncores , test2(:,cols) , 'LineWidth' , 2 ); hold on; -legend( 'sort' , 'self' , 'pair' , 'get task' , 'Location' , 'NorthWest' ); -xlabel('nr. cores'); -hold off; -title('ms per task type'); -axis([ 1 , ncores , 0 , max(max(test2(:,cols)))*1.1 ]); - -% Print this plot -set( gcf , 'PaperSize' , 2.3*[ 4.5 4 ] ); -set( gcf , 'PaperPosition' , 2.3*[ 0.25 0.25 4 4 ] ); -print -depsc2 parts2.eps -!epstopdf parts2.eps - diff --git a/src/Makefile.am b/src/Makefile.am index cbf12389c2e7e725f1673a1fa7510c490ea81354..cca2c15d1c247e5898a53f7b0d55e5bfaf7e4dc5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -24,6 +24,13 @@ AM_LDFLAGS = $(HDF5_LDFLAGS) -version-info 0:0:0 # The git command, if available. GIT_CMD = @GIT_CMD@ +# Additional dependencies for shared libraries. +EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) + +# MPI libraries. +MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) +MPI_FLAGS = -DWITH_MPI $(METIS_INCS) + # Build the libswiftsim library lib_LTLIBRARIES = libswiftsim.la # Build a MPI-enabled version too? @@ -66,11 +73,13 @@ nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_h # Sources and flags for regular library libswiftsim_la_SOURCES = $(AM_SOURCES) +libswiftsim_la_CFLAGS = $(AM_CFLAGS) +libswiftsim_la_LDFLAGS = $(AM_LDFLAGS) $(EXTRA_LIBS) # Sources and flags for MPI library libswiftsim_mpi_la_SOURCES = $(AM_SOURCES) -libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI $(METIS_INCS) -libswiftsim_mpi_la_LDFLAGS = $(AM_LDFLAGS) -DWITH_MPI $(METIS_LIBS) +libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) $(MPI_FLAGS) +libswiftsim_mpi_la_LDFLAGS = $(AM_LDFLAGS) $(MPI_LIBS) $(EXTRA_LIBS) libswiftsim_mpi_la_SHORTNAME = mpi diff --git a/src/cell.c b/src/cell.c index e152cd6136a7d2a04eb6a657fb9ad85f7d1b8c1b..cd83ddab570a3c848e0502629534743e8f383a4f 100644 --- a/src/cell.c +++ b/src/cell.c @@ -128,6 +128,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { } /* Return the total number of unpacked cells. */ + c->pcell_size = count; return count; } @@ -213,6 +214,39 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->progeny[k] = -1; /* Return the number of packed cells used. */ + c->pcell_size = count; + return count; +} + +int cell_pack_ti_ends(struct cell *c, int *ti_ends) { + + /* Pack this cell's data. */ + ti_ends[0] = c->ti_end_min; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_pack_ti_ends(c->progeny[k], &ti_ends[count]); + } + + /* Return the number of packed values. */ + return count; +} + +int cell_unpack_ti_ends(struct cell *c, int *ti_ends) { + + /* Unpack this cell's data. */ + c->ti_end_min = ti_ends[0]; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_unpack_ti_ends(c->progeny[k], &ti_ends[count]); + } + + /* Return the number of packed values. */ return count; } @@ -376,9 +410,11 @@ void cell_gunlocktree(struct cell *c) { * @brief Sort the parts into eight bins along the given pivots. * * @param c The #cell array to be sorted. + * @param parts_offset Offset of the cell parts array relative to the + * space's parts array, i.e. c->parts - s->parts. */ -void cell_split(struct cell *c) { +void cell_split(struct cell *c, ptrdiff_t parts_offset) { int i, j; const int count = c->count, gcount = c->gcount; @@ -496,8 +532,7 @@ void cell_split(struct cell *c) { } /* Re-link the gparts. */ - for (int k = 0; k < count; k++) - if (parts[k].gpart != NULL) parts[k].gpart->part = &parts[k]; + part_relink_gparts(parts, count, parts_offset); #ifdef SWIFT_DEBUG_CHECKS /* Verify that _all_ the parts have been assigned to a cell. */ @@ -592,8 +627,7 @@ void cell_split(struct cell *c) { } /* Re-link the parts. */ - for (int k = 0; k < gcount; k++) - if (gparts[k].id > 0) gparts[k].part->gpart = &gparts[k]; + part_relink_parts(gparts, gcount, parts - parts_offset); } /** diff --git a/src/cell.h b/src/cell.h index 32cbb4256c65b8259416b5861383d9efced80490..f0d1381794cbd5e55bc2ed9ed4638eb46be159e4 100644 --- a/src/cell.h +++ b/src/cell.h @@ -24,6 +24,9 @@ #define SWIFT_CELL_H /* Includes. */ +#include <stddef.h> + +/* Local includes. */ #include "lock.h" #include "multipole.h" #include "part.h" @@ -122,7 +125,10 @@ struct cell { struct task *ghost, *init, *kick; /* Task receiving data. */ - struct task *recv_xv, *recv_rho; + struct task *recv_xv, *recv_rho, *recv_ti; + + /* Task send data. */ + struct link *send_xv, *send_rho, *send_ti; /* Tasks for gravity tree. */ struct task *grav_up, *grav_down; @@ -175,13 +181,15 @@ struct cell { ((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i))) /* Function prototypes. */ -void cell_split(struct cell *c); +void cell_split(struct cell *c, ptrdiff_t parts_offset); int cell_locktree(struct cell *c); void cell_unlocktree(struct cell *c); int cell_glocktree(struct cell *c); void cell_gunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); +int cell_pack_ti_ends(struct cell *c, int *ti_ends); +int cell_unpack_ti_ends(struct cell *c, int *ti_ends); int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); diff --git a/src/common_io.c b/src/common_io.c index 03d5618cd0cafe5ff6067cf72107baacffb5d85a..971fe6b01c682c489d3444ec1d47d6a902250bb8 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -516,12 +516,10 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) { /* Let's give all these gparts a negative id */ for (size_t i = 0; i < Ndm; ++i) { - /* 0 or negative ids are not allowed */ - if (gparts[i].id <= 0) - error("0 or negative ID for DM particle %zd: ID=%lld", i, gparts[i].id); - - gparts[i].id = -gparts[i].id; + if (gparts[i].id_or_neg_offset <= 0) + error("0 or negative ID for DM particle %zd: ID=%lld", i, + gparts[i].id_or_neg_offset); } } @@ -555,7 +553,7 @@ void duplicate_hydro_gparts(struct part* const parts, gparts[i + Ndm].mass = parts[i].mass; /* Link the particles */ - gparts[i + Ndm].part = &parts[i]; + gparts[i + Ndm].id_or_neg_offset = -i; parts[i].gpart = &gparts[i + Ndm]; } } @@ -580,9 +578,8 @@ void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, * gparts[i].part); */ /* And collect the DM ones */ - if (gparts[i].id < 0LL) { - memcpy(&dmparts[count], &gparts[i], sizeof(struct gpart)); - dmparts[count].id = -dmparts[count].id; + if (gparts[i].id_or_neg_offset > 0) { + dmparts[count] = gparts[i]; count++; } } diff --git a/src/debug.c b/src/debug.c index 77238ab135e3f27b8c2c43b0ec18e7745493b138..e7b60c52f92fdb64fdefc8ac5c5e353f1ab57e6e 100644 --- a/src/debug.c +++ b/src/debug.c @@ -59,8 +59,8 @@ * * (Should be used for debugging only as it runs in O(N).) */ -void printParticle(struct part *parts, struct xpart *xparts, long long int id, - size_t N) { +void printParticle(const struct part *parts, struct xpart *xparts, + long long int id, size_t N) { int found = 0; @@ -82,24 +82,27 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id, * the standard output. * * @param gparts The array of g-particles. + * @param parts The array of particles. * @param id The id too look for. * @param N The size of the array of g-particles. * * (Should be used for debugging only as it runs in O(N).) */ -void printgParticle(struct gpart *gparts, long long int id, size_t N) { +void printgParticle(const struct gpart *gparts, const struct part *parts, + long long int id, size_t N) { int found = 0; /* Look for the particle. */ for (size_t i = 0; i < N; i++) - if (gparts[i].id == -id) { - printf("## gParticle[%zd] (DM) :\n id=%lld ", i, -gparts[i].id); + if (gparts[i].id_or_neg_offset == id) { + printf("## gParticle[%zd] (DM) :\n id=%lld", i, id); gravity_debug_particle(&gparts[i]); found = 1; break; - } else if (gparts[i].id > 0 && gparts[i].part->id == id) { - printf("## gParticle[%zd] (hydro) :\n id=%lld ", i, gparts[i].id); + } else if (gparts[i].id_or_neg_offset < 0 && + parts[-gparts[i].id_or_neg_offset].id == id) { + printf("## gParticle[%zd] (hydro) :\n id=%lld", i, id); gravity_debug_particle(&gparts[i]); found = 1; break; @@ -114,9 +117,9 @@ void printgParticle(struct gpart *gparts, long long int id, size_t N) { * @param p The particle to print * @param xp The extended data ot the particle to print */ -void printParticle_single(struct part *p, struct xpart *xp) { +void printParticle_single(const struct part *p, const struct xpart *xp) { - printf("## Particle: id=%lld ", p->id); + printf("## Particle: id=%lld", p->id); hydro_debug_particle(p, xp); printf("\n"); } @@ -128,7 +131,7 @@ void printParticle_single(struct part *p, struct xpart *xp) { */ void printgParticle_single(struct gpart *gp) { - printf("## g-Particle: id=%lld ", gp->id); + printf("## g-Particle: id=%lld ", gp->id_or_neg_offset); gravity_debug_particle(gp); printf("\n"); } diff --git a/src/debug.h b/src/debug.h index 13be15adb867e8bafe95e2900f68caaa36192510..367241201977d9b79a8c2913dbae5d08f1148529 100644 --- a/src/debug.h +++ b/src/debug.h @@ -19,14 +19,15 @@ #ifndef SWIFT_DEBUG_H #define SWIFT_DEBUG_H -struct part; -struct gpart; -struct xpart; +/* Includes. */ +#include "cell.h" +#include "part.h" -void printParticle(struct part *parts, struct xpart *xparts, long long int id, - size_t N); -void printgParticle(struct gpart *parts, long long int id, size_t N); -void printParticle_single(struct part *p, struct xpart *xp); +void printParticle(const struct part *parts, struct xpart *xparts, + long long int id, size_t N); +void printgParticle(const struct gpart *gparts, const struct part *parts, + long long int id, size_t N); +void printParticle_single(const struct part *p, const struct xpart *xp); void printgParticle_single(struct gpart *gp); #ifdef HAVE_METIS diff --git a/src/engine.c b/src/engine.c index e50d893315ddbee9389552b49ffee43c003b38b1..5aea7c8b08bc505e5336b025c081a32a0eb982de 100644 --- a/src/engine.c +++ b/src/engine.c @@ -113,8 +113,8 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { } /** - * @brief Generate the hierarchical tasks for a hierarchy of cells - i.e. all - * the O(Npart) tasks. + * @brief Generate the gravity hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks. * * Tasks are only created here. The dependencies will be added later on. * @@ -122,8 +122,8 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { * @param c The #cell. * @param super The super #cell. */ -void engine_make_hierarchical_tasks(struct engine *e, struct cell *c, - struct cell *super) { +void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, + struct cell *super) { struct scheduler *s = &e->sched; const int is_with_external_gravity = @@ -132,41 +132,101 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c, const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt; /* Am I the super-cell? */ - if (super == NULL && (c->count > 0 || c->gcount > 0)) { + /* TODO(pedro): Add a condition for gravity tasks as well. */ + if (super == NULL && + (c->density != NULL || (!c->split && (c->count > 0 || c->gcount > 0)))) { - /* Remember me. */ + /* This is the super cell, i.e. the first with density tasks attached. */ super = c; /* Local tasks only... */ if (c->nodeID == e->nodeID) { /* Add the init task. */ - c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c, - NULL, 0); + if (c->init == NULL) + c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, + c, NULL, 0); + + /* Add the drift task. */ + c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0, + c, NULL, 0); /* Add the kick task that matches the policy. */ if (is_fixdt) { - c->kick = scheduler_addtask(s, task_type_kick_fixdt, task_subtype_none, - 0, 0, c, NULL, 0); + if (c->kick == NULL) + c->kick = scheduler_addtask(s, task_type_kick_fixdt, + task_subtype_none, 0, 0, c, NULL, 0); } else { - c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, 0, - c, NULL, 0); + if (c->kick == NULL) + c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, + 0, c, NULL, 0); } - if (c->count > 0) { + if (is_with_external_gravity) + c->grav_external = scheduler_addtask( + s, task_type_grav_external, task_subtype_none, 0, 0, c, NULL, 0); + } + } - /* Generate the ghost task. */ - c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, - 0, c, NULL, 0); - } + /* Set the super-cell. */ + c->super = super; + + /* Recurse. */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_gravity_hierarchical_tasks(e, c->progeny[k], super); +} + +/** + * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks. + * + * Tasks are only created here. The dependencies will be added later on. + * + * @param e The #engine. + * @param c The #cell. + * @param super The super #cell. + */ +void engine_make_hydro_hierarchical_tasks(struct engine *e, struct cell *c, + struct cell *super) { + + struct scheduler *s = &e->sched; + const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt; + + /* Is this the super-cell? */ + if (super == NULL && (c->density != NULL || (c->count > 0 && !c->split))) { + + /* This is the super cell, i.e. the first with density tasks attached. */ + super = c; + + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { - if (c->gcount > 0) { + /* Add the init task. */ + if (c->init == NULL) + c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, + c, NULL, 0); - /* Add the external gravity tasks */ - if (is_with_external_gravity) - c->grav_external = scheduler_addtask( - s, task_type_grav_external, task_subtype_none, 0, 0, c, NULL, 0); + /* Add the drift task. */ + if (c->drift == NULL) + c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, + 0, c, NULL, 0); + + /* Add the kick task that matches the policy. */ + if (is_fixdt) { + if (c->kick == NULL) + c->kick = scheduler_addtask(s, task_type_kick_fixdt, + task_subtype_none, 0, 0, c, NULL, 0); + } else { + if (c->kick == NULL) + c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, + 0, c, NULL, 0); } + + /* Generate the ghost task. */ + c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, + c, NULL, 0); } } @@ -177,7 +237,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c, if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_make_hierarchical_tasks(e, c->progeny[k], super); + engine_make_hydro_hierarchical_tasks(e, c->progeny[k], super); } /** @@ -275,11 +335,11 @@ void engine_redistribute(struct engine *e) { } #ifdef SWIFT_DEBUG_CHECKS - if (s->parts[k].gpart->id < 0) + if (s->parts[k].gpart->id_or_neg_offset >= 0) error("Trying to link a partnerless gpart !"); #endif - s->parts[k].gpart->id = count_this_dest; + s->parts[k].gpart->id_or_neg_offset = -count_this_dest; count_this_dest++; } } @@ -463,13 +523,14 @@ void engine_redistribute(struct engine *e) { for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) { /* Does this gpart have a partner ? */ - if (gparts_new[k].id >= 0) { + if (gparts_new[k].id_or_neg_offset <= 0) { - const size_t partner_index = offset_parts + gparts_new[k].id; + const ptrdiff_t partner_index = + offset_parts - gparts_new[k].id_or_neg_offset; /* Re-link */ - gparts_new[k].part = &parts_new[partner_index]; - gparts_new[k].part->gpart = &gparts_new[k]; + gparts_new[k].id_or_neg_offset = -partner_index; + parts_new[partner_index].gpart = &gparts_new[k]; } } @@ -490,22 +551,22 @@ void engine_redistribute(struct engine *e) { /* Verify that the links are correct */ for (size_t k = 0; k < nr_gparts; ++k) { - if (gparts_new[k].id > 0) { + if (gparts_new[k].id_or_neg_offset <= 0) { - if (gparts_new[k].part->gpart != &gparts_new[k]) - error("Linking problem !"); + struct part *part = &parts_new[-gparts_new[k].id_or_neg_offset]; - if (gparts_new[k].x[0] != gparts_new[k].part->x[0] || - gparts_new[k].x[1] != gparts_new[k].part->x[1] || - gparts_new[k].x[2] != gparts_new[k].part->x[2]) + if (part->gpart != &gparts_new[k]) error("Linking problem !"); + + if (gparts_new[k].x[0] != part->x[0] || + gparts_new[k].x[1] != part->x[1] || gparts_new[k].x[2] != part->x[2]) error("Linked particles are not at the same position !"); } } for (size_t k = 0; k < nr_parts; ++k) { - if (parts_new[k].gpart != NULL) { - - if (parts_new[k].gpart->part != &parts_new[k]) error("Linking problem !"); + if (parts_new[k].gpart != NULL && + parts_new[k].gpart->id_or_neg_offset != -k) { + error("Linking problem !"); } } #endif @@ -617,47 +678,63 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up, * * @param e The #engine. * @param ci The sending #cell. - * @param cj The receiving #cell + * @param cj Dummy cell containing the nodeID of the receiving node. + * @param t_xv The send_xv #task, if it has already been created. + * @param t_rho The send_rho #task, if it has already been created. + * @param t_ti The send_ti #task, if required and has already been created. */ -void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) { +void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, + struct task *t_xv, struct task *t_rho, + struct task *t_ti) { #ifdef WITH_MPI struct link *l = NULL; struct scheduler *s = &e->sched; + const int nodeID = cj->nodeID; /* Check if any of the density tasks are for the target node. */ for (l = ci->density; l != NULL; l = l->next) - if (l->t->ci->nodeID == cj->nodeID || - (l->t->cj != NULL && l->t->cj->nodeID == cj->nodeID)) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) break; /* If so, attach send tasks. */ if (l != NULL) { - /* Create the tasks. */ - struct task *t_xv = scheduler_addtask(s, task_type_send, task_subtype_none, - 2 * ci->tag, 0, ci, cj, 0); - struct task *t_rho = scheduler_addtask(s, task_type_send, task_subtype_none, - 2 * ci->tag + 1, 0, ci, cj, 0); + /* Create the tasks and their dependencies? */ + if (t_xv == NULL) { + t_xv = scheduler_addtask(s, task_type_send, task_subtype_none, + 3 * ci->tag, 0, ci, cj, 0); + t_rho = scheduler_addtask(s, task_type_send, task_subtype_none, + 3 * ci->tag + 1, 0, ci, cj, 0); + if (!(e->policy & engine_policy_fixdt)) + t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, + 3 * ci->tag + 2, 0, ci, cj, 0); - /* The first send should depend on the engine's send_root. */ - scheduler_addunlock(s, e->send_root, t_xv); + /* The send_rho task depends on the cell's ghost task. */ + scheduler_addunlock(s, ci->super->ghost, t_rho); - /* The send_rho task depends on the cell's ghost task. */ - scheduler_addunlock(s, ci->super->ghost, t_rho); + /* The send_rho task should unlock the super-cell's kick task. */ + scheduler_addunlock(s, t_rho, ci->super->kick); - /* The send_rho task should unlock the super-cell's kick task. */ - scheduler_addunlock(s, t_rho, ci->super->kick); + /* The send_xv task should unlock the super-cell's ghost task. */ + scheduler_addunlock(s, t_xv, ci->super->ghost); - /* The send_xv task should unlock the super-cell's ghost task. */ - scheduler_addunlock(s, t_xv, ci->super->ghost); + /* The super-cell's kick task should unlock the send_ti task. */ + if (t_ti != NULL) scheduler_addunlock(s, ci->super->kick, t_ti); + } + /* Add them to the local cell. */ + ci->send_xv = engine_addlink(e, ci->send_xv, t_xv); + ci->send_rho = engine_addlink(e, ci->send_rho, t_rho); + if (t_ti != NULL) ci->send_ti = engine_addlink(e, ci->send_ti, t_ti); } /* Recurse? */ - else if (ci->split) + if (ci->split) for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) engine_addtasks_send(e, ci->progeny[k], cj); + if (ci->progeny[k] != NULL) + engine_addtasks_send(e, ci->progeny[k], cj, t_xv, t_rho, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -668,44 +745,52 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) { * @brief Add recv tasks to a hierarchy of cells. * * @param e The #engine. - * @param c The #cell. + * @param c The foreign #cell. * @param t_xv The recv_xv #task, if it has already been created. * @param t_rho The recv_rho #task, if it has already been created. + * @param t_ti The recv_ti #task, if required and has already been created. */ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, - struct task *t_rho) { + struct task *t_rho, struct task *t_ti) { #ifdef WITH_MPI struct scheduler *s = &e->sched; - /* Do we need to construct a recv task? */ - if (t_xv == NULL && c->nr_density > 0) { + /* Do we need to construct a recv task? + Note that since c is a foreign cell, all its density tasks will involve + only the current rank, and thus we don't have to check them.*/ + if (t_xv == NULL && c->density != NULL) { /* Create the tasks. */ - t_xv = c->recv_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, - 2 * c->tag, 0, c, NULL, 0); - t_rho = c->recv_rho = scheduler_addtask( - s, task_type_recv, task_subtype_none, 2 * c->tag + 1, 0, c, NULL, 0); - - /* The first recv should depend on the engine's recv_root. */ - scheduler_addunlock(s, e->recv_root, t_xv); - } + t_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, 3 * c->tag, + 0, c, NULL, 0); + t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none, + 3 * c->tag + 1, 0, c, NULL, 0); + if (!(e->policy & engine_policy_fixdt)) + t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, + 3 * c->tag + 2, 0, c, NULL, 0); + } + c->recv_xv = t_xv; + c->recv_rho = t_rho; + c->recv_ti = t_ti; /* Add dependencies. */ for (struct link *l = c->density; l != NULL; l = l->next) { scheduler_addunlock(s, t_xv, l->t); scheduler_addunlock(s, l->t, t_rho); } - for (struct link *l = c->force; l != NULL; l = l->next) + for (struct link *l = c->force; l != NULL; l = l->next) { scheduler_addunlock(s, t_rho, l->t); + if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti); + } if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); /* Recurse? */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_addtasks_recv(e, c->progeny[k], t_xv, t_rho); + engine_addtasks_recv(e, c->progeny[k], t_xv, t_rho, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -731,7 +816,8 @@ void engine_exchange_cells(struct engine *e) { MPI_Status status; ticks tic = getticks(); - /* Run through the cells and get the size of the ones that will be sent */ + /* Run through the cells and get the size of the ones that will be sent off. + */ int count_out = 0; for (int k = 0; k < nr_cells; k++) { offset[k] = count_out; @@ -896,7 +982,8 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Re-link the associated gpart with the buffer offset of the part. */ if (s->parts[offset_parts + k].gpart != NULL) { - s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_out; + s->parts[offset_parts + k].gpart->id_or_neg_offset = + -e->proxies[pid].nr_parts_out; } /* Load the part and xpart into the proxy. */ @@ -912,7 +999,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, error( "Do not have a proxy for the requested nodeID %i for part with " "id=%lli, x=[%e,%e,%e].", - node_id, s->gparts[offset_parts + k].id, + node_id, s->gparts[offset_parts + k].id_or_neg_offset, s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1], s->gparts[offset_gparts + k].x[2]); proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); @@ -972,7 +1059,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, s->xparts = xparts_new; for (size_t k = 0; k < offset_parts; k++) { if (s->parts[k].gpart != NULL) { - s->parts[k].gpart->part = &s->parts[k]; + s->parts[k].gpart->id_or_neg_offset = -k; } } } @@ -987,8 +1074,8 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, free(s->gparts); s->gparts = gparts_new; for (size_t k = 0; k < offset_gparts; k++) { - if (s->gparts[k].id > 0) { - s->gparts[k].part->gpart = &s->gparts[k]; + if (s->gparts[k].id_or_neg_offset < 0) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } } } @@ -1061,9 +1148,10 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Re-link the gparts. */ for (int k = 0; k < p->nr_gparts_in; k++) { struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k]; - if (gp->id >= 0) { - struct part *p = &s->parts[offset_parts + count_parts + gp->id]; - gp->part = p; + if (gp->id_or_neg_offset <= 0) { + struct part *p = + &s->parts[offset_gparts + count_parts - gp->id_or_neg_offset]; + gp->id_or_neg_offset = s->parts - p; p->gpart = gp; } } @@ -1680,12 +1768,13 @@ void engine_maketasks(struct engine *e) { /* Loop through the proxy's incoming cells and add the recv tasks. */ for (int k = 0; k < p->nr_cells_in; k++) - engine_addtasks_recv(e, p->cells_in[k], NULL, NULL); + engine_addtasks_recv(e, p->cells_in[k], NULL, NULL, NULL); /* Loop through the proxy's outgoing cells and add the send tasks. */ for (int k = 0; k < p->nr_cells_out; k++) - engine_addtasks_send(e, p->cells_out[k], p->cells_in[0]); + engine_addtasks_send(e, p->cells_out[k], p->cells_in[0], NULL, NULL, + NULL); } } @@ -1721,7 +1810,6 @@ void engine_maketasks(struct engine *e) { * * @return 1 if the space has to be rebuilt, 0 otherwise. */ - void engine_marktasks_fixdt_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the arguments. */ @@ -1815,7 +1903,6 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cj->sorts->skip = 0; } } - } /* Kick? */ @@ -2194,6 +2281,61 @@ void engine_collect_timestep(struct engine *e) { e->g_updates = g_updates; } +/** + * @brief Mapping function to collect the data from the drift. + * + * @param c A super-cell. + */ +void engine_collect_drift(struct cell *c) { + + /* Skip super-cells (Their values are already set) */ + if (c->drift != NULL) return; + + /* Counters for the different quantities. */ + double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0; + double mom[3] = {0.0, 0.0, 0.0}, ang_mom[3] = {0.0, 0.0, 0.0}; + + /* Only do something is the cell is non-empty */ + if (c->count != 0 || c->gcount != 0) { + + /* If this cell is not split, I'm in trouble. */ + if (!c->split) error("Cell has no super-cell."); + + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL) { + + /* Recurse */ + engine_collect_drift(cp); + + /* And update */ + mass += cp->mass; + e_kin += cp->e_kin; + e_int += cp->e_int; + e_pot += cp->e_pot; + mom[0] += cp->mom[0]; + mom[1] += cp->mom[1]; + mom[2] += cp->mom[2]; + ang_mom[0] += cp->ang_mom[0]; + ang_mom[1] += cp->ang_mom[1]; + ang_mom[2] += cp->ang_mom[2]; + } + } + } + + /* Store the collected values in the cell. */ + c->mass = mass; + c->e_kin = e_kin; + c->e_int = e_int; + c->e_pot = e_pot; + c->mom[0] = mom[0]; + c->mom[1] = mom[1]; + c->mom[2] = mom[2]; + c->ang_mom[0] = ang_mom[0]; + c->ang_mom[1] = ang_mom[1]; + c->ang_mom[2] = ang_mom[2]; +} /** * @brief Print the conserved quantities statistics to a log file * @@ -2210,6 +2352,11 @@ void engine_print_stats(struct engine *e) { for (int k = 0; k < s->nr_cells; k++) if (s->cells[k].nodeID == e->nodeID) { struct cell *c = &s->cells[k]; + + /* Make the top-cells recurse */ + engine_collect_drift(c); + + /* And aggregate */ mass += c->mass; e_kin += c->e_kin; e_int += c->e_int; @@ -2307,8 +2454,10 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask, *forward in time. * * @param e The #engine + * @param flag_entropy_ICs Did the 'Internal Energy' of the particles actually + *contain entropy ? */ -void engine_init_particles(struct engine *e) { +void engine_init_particles(struct engine *e, int flag_entropy_ICs) { struct space *s = e->s; @@ -2368,6 +2517,7 @@ void engine_init_particles(struct engine *e) { mask |= 1 << task_type_send; mask |= 1 << task_type_recv; + submask |= 1 << task_subtype_tend; } /* Now, launch the calculation */ @@ -2376,7 +2526,7 @@ void engine_init_particles(struct engine *e) { TIMER_TOC(timer_runners); /* Apply some conversions (e.g. internal energy -> entropy) */ - space_map_cells_pre(s, 0, cell_convert_hydro, NULL); + if (!flag_entropy_ICs) space_map_cells_pre(s, 0, cell_convert_hydro, NULL); clocks_gettime(&time2); @@ -2500,6 +2650,7 @@ void engine_step(struct engine *e) { mask |= 1 << task_type_send; mask |= 1 << task_type_recv; + submask |= 1 << task_subtype_tend; } /* Send off the runners. */ @@ -2663,8 +2814,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->xparts = xparts_new; /* Re-link the gparts. */ - for (size_t k = 0; k < s->nr_parts; k++) - if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; + part_relink_gparts(s->parts, s->nr_parts, 0); /* Re-allocate the local gparts. */ if (e->verbose) @@ -2680,29 +2830,28 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->gparts = gparts_new; /* Re-link the parts. */ - for (size_t k = 0; k < s->nr_gparts; k++) - if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k]; + part_relink_parts(s->gparts, s->nr_gparts, s->parts); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the links are correct */ for (size_t k = 0; k < s->nr_gparts; ++k) { - if (s->gparts[k].id > 0) { + if (s->gparts[k].id_or_neg_offset <= 0) { + + struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset]; - if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !"); + if (part->gpart != &s->gparts[k]) error("Linking problem !"); - if (s->gparts[k].x[0] != s->gparts[k].part->x[0] || - s->gparts[k].x[1] != s->gparts[k].part->x[1] || - s->gparts[k].x[2] != s->gparts[k].part->x[2]) + if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] || + s->gparts[k].x[2] != part->x[2]) error("Linked particles are not at the same position !"); } } for (size_t k = 0; k < s->nr_parts; ++k) { - if (s->parts[k].gpart != NULL) { - if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !"); - } + if (s->parts[k].gpart != NULL && s->parts[k].gpart->id_or_neg_offset != -k) + error("Linking problem !"); } #endif @@ -3097,9 +3246,6 @@ void engine_init(struct engine *e, struct space *s, part_create_mpi_types(); #endif - /* Initialize the threadpool. */ - threadpool_init(&e->threadpool, e->nr_threads); - /* First of all, init the barrier and lock it. */ if (pthread_mutex_init(&e->barrier_mutex, NULL) != 0) error("Failed to initialize barrier mutex."); @@ -3114,7 +3260,16 @@ void engine_init(struct engine *e, struct space *s, /* Init the scheduler with enough tasks for the initial sorting tasks. */ const int nr_tasks = 2 * s->tot_cells + 2 * e->nr_threads; scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, scheduler_flag_steal, - e->nodeID, &e->threadpool); + e->nodeID); + + /* Create the sorting tasks. */ + for (int i = 0; i < e->nr_threads; i++) { + scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0, + NULL, NULL, 0); + + scheduler_addtask(&e->sched, task_type_gpart_sort, task_subtype_none, i, 0, + NULL, NULL, 0); + } scheduler_ranktasks(&e->sched); diff --git a/src/engine.h b/src/engine.h index dc73b32523649cb54fd66ec4308b0aa4d8cadcaf..7333bf3ac5597510c5380204012dfbeb1a3545f4 100644 --- a/src/engine.h +++ b/src/engine.h @@ -220,7 +220,7 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask, unsigned int submask); void engine_prepare(struct engine *e); void engine_print(struct engine *e); -void engine_init_particles(struct engine *e); +void engine_init_particles(struct engine *e, int flag_entropy_ICs); void engine_step(struct engine *e); void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h index 21d775d703c8bd7000ad03386fe818124642a8f1..7cf375a1fdf7bccc4131dc415ab2d4acbbf2d3bc 100644 --- a/src/gravity/Default/gravity_debug.h +++ b/src/gravity/Default/gravity_debug.h @@ -18,7 +18,7 @@ ******************************************************************************/ __attribute__((always_inline)) INLINE static void gravity_debug_particle( - struct gpart* p) { + const struct gpart* p) { printf( "x=[%.3e,%.3e,%.3e], " "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h index 129c4b39828ca73d2d80d79edbdaa8ec4d5a9e01..74f364dd97361f0513755bedec83fe7cb277c36b 100644 --- a/src/gravity/Default/gravity_io.h +++ b/src/gravity/Default/gravity_io.h @@ -39,8 +39,8 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles( COMPULSORY); readArray(h_grp, "Velocities", FLOAT, N, 3, gparts, N_total, offset, v_full, COMPULSORY); - readArray(h_grp, "ParticleIDs", ULONGLONG, N, 1, gparts, N_total, offset, id, - COMPULSORY); + readArray(h_grp, "ParticleIDs", ULONGLONG, N, 1, gparts, N_total, offset, + id_or_neg_offset, COMPULSORY); } /** @@ -75,6 +75,6 @@ __attribute__((always_inline)) INLINE static void darkmatter_write_particles( Ndm, 3, gparts, Ndm_total, mpi_rank, offset, v_full, us, UNIT_CONV_SPEED); writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", - ULONGLONG, Ndm, 1, gparts, Ndm_total, mpi_rank, offset, id, us, - UNIT_CONV_NO_UNITS); + ULONGLONG, Ndm, 1, gparts, Ndm_total, mpi_rank, offset, + id_or_neg_offset, us, UNIT_CONV_NO_UNITS); } diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index a54fc1d18e7e84a1ca7a7e7913117ed207549271..b6c9e62559207cb25323c8a108e4bffc87ea0fcf 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -47,14 +47,8 @@ struct gpart { /* float tx; */ /* float tv; */ - /* Anonymous union for id/part. */ - union { - - /* Particle ID. */ - long long id; - - /* Pointer to corresponding SPH part. */ - struct part* part; - }; + /* Particle ID. If negative, it is the negative offset of the #part with + which this gpart is linked. */ + long long id_or_neg_offset; } __attribute__((aligned(gpart_align))); diff --git a/src/hydro/Default/hydro_debug.h b/src/hydro/Default/hydro_debug.h index 79ee392d46ca75a2c097bf045b2d82c9f3dc96c0..9b8136e8349398e4e9e804459a5de23d21043564 100644 --- a/src/hydro/Default/hydro_debug.h +++ b/src/hydro/Default/hydro_debug.h @@ -18,7 +18,7 @@ ******************************************************************************/ __attribute__((always_inline)) INLINE static void hydro_debug_particle( - struct part* p, struct xpart* xp) { + const struct part* p, const struct xpart* xp) { printf( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h index de71963b05149fa0b2df6222424478a6ad9b1f44..fb1f7af922b2272779c7251b15e304880a2af520 100644 --- a/src/hydro/Default/hydro_io.h +++ b/src/hydro/Default/hydro_io.h @@ -122,3 +122,10 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_f(h_grpsph, "Maximal Delta u change over dt", const_max_u_change); } + +/** + * @brief Are we writing entropy in the internal energy field ? + * + * @return 1 if entropy is in 'internal energy', 0 otherwise. + */ +int writeEntropyFlag() { return 0; } diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h index 31e89d438bc96c0f0f2ba56249664d28036cb607..b67e79182ccaaee7c0421c57a91ec9fa2adae65c 100644 --- a/src/hydro/Gadget2/hydro_debug.h +++ b/src/hydro/Gadget2/hydro_debug.h @@ -18,7 +18,7 @@ ******************************************************************************/ __attribute__((always_inline)) INLINE static void hydro_debug_particle( - struct part* p, struct xpart* xp) { + const struct part* p, const struct xpart* xp) { printf( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index b977f25386fab0787c925635297a48fa85a8df24..f7ee04cbbad07ebb06d00ee39a614b9fed5c4dfd 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -83,8 +83,8 @@ __attribute__((always_inline)) INLINE static void hydro_write_particles( writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Entropy", FLOAT, N, - 1, parts, N_total, mpi_rank, offset, entropy, us, + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, entropy, us, UNIT_CONV_ENTROPY_PER_UNIT_MASS); writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, @@ -113,3 +113,10 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_f(h_grpsph, "Viscosity alpha", const_viscosity_alpha); writeAttribute_f(h_grpsph, "Viscosity beta", 3.f); } + +/** + * @brief Are we writing entropy in the internal energy field ? + * + * @return 1 if entropy is in 'internal energy', 0 otherwise. + */ +int writeEntropyFlag() { return 1; } diff --git a/src/hydro/Minimal/hydro_debug.h b/src/hydro/Minimal/hydro_debug.h index 127ba75e99418b6a5dc197a44ccdc77de3cdef15..1e0e3f1f111149a3babeef431893dff2cbe6bb3c 100644 --- a/src/hydro/Minimal/hydro_debug.h +++ b/src/hydro/Minimal/hydro_debug.h @@ -18,7 +18,7 @@ ******************************************************************************/ __attribute__((always_inline)) INLINE static void hydro_debug_particle( - struct part* p, struct xpart* xp) { + const struct part* p, const struct xpart* xp) { printf( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e], " diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index 1bbfe1065358017e0b27e2131ff717848221fe9c..bed6b8f8221dc3a6e94c65d101c68aa9b3bdea91 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -115,3 +115,10 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_f(h_grpsph, "Maximal Delta u change over dt", const_max_u_change); } + +/** + * @brief Are we writing entropy in the internal energy field ? + * + * @return 1 if entropy is in 'internal energy', 0 otherwise. + */ +int writeEntropyFlag() { return 0; } diff --git a/src/parallel_io.c b/src/parallel_io.c index c5cac1cb5efc6e533e599867e39cdd7c7b2c87fa..1411b85b9b4144830aa6a9f37a487b3148a2db21 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -367,8 +367,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info, int dry_run) { + int* periodic, int* flag_entropy, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; @@ -404,6 +404,7 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, if (h_grp < 0) error("Error while opening file header\n"); /* Read the relevant information and print status */ + readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy); readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); @@ -486,7 +487,7 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, break; default: - error("Particle Type %d not yet supported. Aborting", ptype); + message("Particle Type %d not yet supported. Particles ignored", ptype); } /* Close particle group */ @@ -626,6 +627,7 @@ void write_output_parallel(struct engine* e, const char* baseName, double MassTable[6] = {0., 0., 0., 0., 0., 0.}; writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + flagEntropy[0] = writeEntropyFlag(); writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); diff --git a/src/parallel_io.h b/src/parallel_io.h index 26757cb679e475d6acd2ce3c408135dfe5e49081..709479fab029e862caddb36c4216b85077e96cec 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -36,8 +36,8 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info, int dry_run); + int* periodic, int* flag_entropy, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run); void write_output_parallel(struct engine* e, const char* baseName, struct UnitSystem* us, int mpi_rank, int mpi_size, diff --git a/src/part.c b/src/part.c index b89abdde40fe8c7a57d1e9ac9e18fece83ba1f21..b00eaccaae0e86f7c4e8019a307f0bf455687b7c 100644 --- a/src/part.c +++ b/src/part.c @@ -29,6 +29,36 @@ #include "error.h" #include "part.h" +/** + * @brief Re-link the #gpart%s associated with the list of #part%s. + * + * @param parts The list of #part. + * @param N The number of particles to re-link; + * @param offset The offset of #part%s relative to the global parts list. + */ +void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset) { + for (size_t k = 0; k < N; k++) { + if (parts[k].gpart) { + parts[k].gpart->id_or_neg_offset = -(k + offset); + } + } +} + +/** + * @brief Re-link the #gpart%s associated with the list of #part%s. + * + * @param gparts The list of #gpart. + * @param N The number of particles to re-link; + * @param parts The global part array in which to find the #gpart offsets. + */ +void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts) { + for (size_t k = 0; k < N; k++) { + if (gparts[k].id_or_neg_offset <= 0) { + parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; + } + } +} + #ifdef WITH_MPI /* MPI data type for the particle transfers */ MPI_Datatype part_mpi_type; diff --git a/src/part.h b/src/part.h index e99be6e51a9bd74dd9eec8f590e80989e83ec2e1..efca7b6b5bef49f20df1e2c45b30f65ecbbf4960 100644 --- a/src/part.h +++ b/src/part.h @@ -22,6 +22,9 @@ /* Config parameters. */ #include "../config.h" +/* Standard headers. */ +#include <stddef.h> + /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> @@ -49,6 +52,8 @@ /* Import the right gravity particle definition */ #include "./gravity/Default/gravity_part.h" +void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset); +void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts); #ifdef WITH_MPI /* MPI data type for the particle transfers */ extern MPI_Datatype part_mpi_type; diff --git a/src/partition.c b/src/partition.c index 12da7e38f65c7b258dcd3946b2a8e560004f874e..610c21de789c271b5cf7e17d9536c9ae427e6b23 100644 --- a/src/partition.c +++ b/src/partition.c @@ -457,9 +457,15 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, t->type != task_type_kick && t->type != task_type_init) continue; - /* Get the task weight. */ + /* Get the task weight. This can be slightly negative on multiple board + * computers when the runners are not pinned to cores, don't stress just + * make a report and ignore these tasks. */ int w = (t->toc - t->tic) * wscale; - if (w < 0) error("Bad task weight (%d).", w); + if (w < 0) { + message("Task toc before tic: -%.3f %s, (try using processor affinity).", + clocks_from_ticks(t->tic - t->toc), clocks_getunit()); + w = 0; + } /* Do we need to re-scale? */ wtot += w; diff --git a/src/runner.c b/src/runner.c index 39dada40d7d9d96cefcef028bb3e70b5954db9e2..761201d7ee875fdb5426ba7c65f7dc3f195c8f56 100644 --- a/src/runner.c +++ b/src/runner.c @@ -765,7 +765,7 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) { struct gpart *const gp = &gparts[k]; /* If the g-particle has no counterpart */ - if (gp->id < 0) { + if (gp->id_or_neg_offset > 0) { /* First, finish the force calculation */ gravity_end_force(gp); @@ -877,7 +877,7 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) { struct gpart *const gp = &gparts[k]; /* If the g-particle has no counterpart and needs to be kicked */ - if (gp->id < 0) { + if (gp->id_or_neg_offset > 0) { if (gp->ti_end <= ti_current) { @@ -985,19 +985,36 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { int ti_end_max = 0; float h_max = 0.f; - /* Collect everything... */ - for (size_t k = 0; k < nr_parts; k++) { - const int ti_end = parts[k].ti_end; - // if(ti_end < ti_current) error("Received invalid particle !"); - ti_end_min = min(ti_end_min, ti_end); - ti_end_max = max(ti_end_max, ti_end); - h_max = fmaxf(h_max, parts[k].h); + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_parts; k++) { + const int ti_end = parts[k].ti_end; + // if(ti_end < ti_current) error("Received invalid particle !"); + ti_end_min = min(ti_end_min, ti_end); + ti_end_max = max(ti_end_max, ti_end); + h_max = fmaxf(h_max, parts[k].h); + } + for (size_t k = 0; k < nr_gparts; k++) { + const int ti_end = gparts[k].ti_end; + // if(ti_end < ti_current) error("Received invalid particle !"); + ti_end_min = min(ti_end_min, ti_end); + ti_end_max = max(ti_end_max, ti_end); + } + } - for (size_t k = 0; k < nr_gparts; k++) { - const int ti_end = gparts[k].ti_end; - // if(ti_end < ti_current) error("Received invalid particle !"); - ti_end_min = min(ti_end_min, ti_end); - ti_end_max = max(ti_end_max, ti_end); + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_recv_cell(r, c->progeny[k], 0); + ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); + ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + h_max = fmaxf(h_max, c->progeny[k]->h_max); + } + } } /* ... and store. */ @@ -1100,9 +1117,17 @@ void *runner_main(void *data) { runner_do_kick_fixdt(r, ci, 1); break; case task_type_send: + if (t->subtype == task_subtype_tend) { + free(t->buff); + } break; case task_type_recv: - runner_do_recv_cell(r, ci, 1); + if (t->subtype == task_subtype_tend) { + cell_unpack_ti_ends(ci, t->buff); + free(t->buff); + } else { + runner_do_recv_cell(r, ci, 1); + } break; case task_type_grav_external: runner_do_grav_external(r, t->ci, 1); diff --git a/src/scheduler.c b/src/scheduler.c index c4ad2b97f24d988996ecec7be6f37eaa5212a175..e3bb0716031427e566de7b50019b7e16cbddc5c1 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -681,9 +681,6 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype, t->rid = -1; t->last_rid = -1; - /* Init the lock. */ - lock_init(&t->lock); - /* Add an index for it. */ // lock_lock( &s->lock ); s->tasks_ind[atomic_inc(&s->nr_tasks)] = ind; @@ -959,8 +956,6 @@ void scheduler_reweight(struct scheduler *s) { default: break; } - if (t->type == task_type_send) t->weight = INT_MAX / 8; - if (t->type == task_type_recv) t->weight *= 1.41; } // message( "weighting tasks took %.3f %s." , // clocks_from_ticks( getticks() - tic ), clocks_getunit()); @@ -1115,8 +1110,14 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (t->subtype == task_subtype_tend) { + t->buff = malloc(sizeof(int) * t->ci->pcell_size); + err = MPI_Irecv(t->buff, t->ci->pcell_size, MPI_INT, t->ci->nodeID, + t->flags, MPI_COMM_WORLD, &t->req); + } else { + err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit irecv for particle data."); } @@ -1130,8 +1131,15 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (t->subtype == task_subtype_tend) { + t->buff = malloc(sizeof(int) * t->ci->pcell_size); + cell_pack_ti_ends(t->ci, t->buff); + err = MPI_Isend(t->buff, t->ci->pcell_size, MPI_INT, t->cj->nodeID, + t->flags, MPI_COMM_WORLD, &t->req); + } else { + err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit isend for particle data."); } diff --git a/src/serial_io.c b/src/serial_io.c index 7e78276dc83430655b4ea4de2fb7425e71e07966..fac3c1b006375eb43a4e799f96a4979f26238668 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -424,8 +424,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_serial(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info, int dry_run) { + int* periodic, int* flag_entropy, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; @@ -462,6 +462,7 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, if (h_grp < 0) error("Error while opening file header\n"); /* Read the relevant information and print status */ + readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy); readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); @@ -488,6 +489,7 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, } /* Now need to broadcast that information to all ranks. */ + MPI_Bcast(flag_entropy, 1, MPI_INT, 0, comm); MPI_Bcast(periodic, 1, MPI_INT, 0, comm); MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG, 0, comm); MPI_Bcast(dim, 3, MPI_DOUBLE, 0, comm); @@ -562,7 +564,8 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, break; default: - error("Particle Type %d not yet supported. Aborting", ptype); + message("Particle Type %d not yet supported. Particles ignored", + ptype); } /* Close particle group */ @@ -699,6 +702,7 @@ void write_output_serial(struct engine* e, const char* baseName, double MassTable[6] = {0., 0., 0., 0., 0., 0.}; writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + flagEntropy[0] = writeEntropyFlag(); writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); diff --git a/src/serial_io.h b/src/serial_io.h index 6b64624772214494a43316d3a8aa3910c7238dc8..fa88d5cb87edfba2c101b6096b8a57b04b7178cd 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -36,8 +36,8 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info, int dry_run); + int* periodic, int* flag_entropy, int mpi_rank, + int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run); void write_output_serial(struct engine* e, const char* baseName, struct UnitSystem* us, int mpi_rank, int mpi_size, diff --git a/src/single_io.c b/src/single_io.c index 3f65aae0b5d495670f2b4862e466ec849f997d63..329ec7253247a6bec992e9cb740d322fa3048a01 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -322,6 +322,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, * @param Ngas (output) number of Gas particles read. * @param Ngparts (output) The number of #gpart read. * @param periodic (output) 1 if the volume is periodic, 0 if not. + * @param flag_entropy 1 if the ICs contained Entropy in the InternalEnergy + * field * @param dry_run If 1, don't read the particle. Only allocates the arrays. * * Opens the HDF5 file fileName and reads the particles contained @@ -334,7 +336,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_single(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic, int dry_run) { + int* periodic, int* flag_entropy, int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; @@ -368,6 +370,7 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, if (h_grp < 0) error("Error while opening file header\n"); /* Read the relevant information and print status */ + readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy); readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); @@ -436,7 +439,7 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, break; default: - error("Particle Type %d not yet supported. Aborting", ptype); + message("Particle Type %d not yet supported. Particles ignored", ptype); } /* Close particle group */ @@ -549,6 +552,7 @@ void write_output_single(struct engine* e, const char* baseName, double MassTable[NUM_PARTICLE_TYPES] = {0}; writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + flagEntropy[0] = writeEntropyFlag(); writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); diff --git a/src/single_io.h b/src/single_io.h index d2c87655e1c91b92e8ccf2aa50d2e0bf98f13482..4936aa3eb35a5c76a5a8f242c9cc79bbc86161a0 100644 --- a/src/single_io.h +++ b/src/single_io.h @@ -31,7 +31,7 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ndm, - int* periodic, int dry_run); + int* periodic, int* flag_entropy, int dry_run); void write_output_single(struct engine* e, const char* baseName, struct UnitSystem* us); diff --git a/src/space.c b/src/space.c index 8ca707ae0b84cf5d4dd956c7992770ea57d0723c..046b0209b79244e6d4663cd517e8517af1a816da 100644 --- a/src/space.c +++ b/src/space.c @@ -349,6 +349,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->cells[k].gcount = 0; s->cells[k].init = NULL; s->cells[k].ghost = NULL; + s->cells[k].drift = NULL; s->cells[k].kick = NULL; s->cells[k].super = &s->cells[k]; } @@ -439,10 +440,10 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { s->parts[k] = s->parts[nr_parts]; s->parts[nr_parts] = tp; if (s->parts[k].gpart != NULL) { - s->parts[k].gpart->part = &s->parts[k]; + s->parts[k].gpart->id_or_neg_offset = -k; } if (s->parts[nr_parts].gpart != NULL) { - s->parts[nr_parts].gpart->part = &s->parts[nr_parts]; + s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; } const struct xpart txp = s->xparts[k]; s->xparts[k] = s->xparts[nr_parts]; @@ -478,11 +479,12 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { const struct gpart tp = s->gparts[k]; s->gparts[k] = s->gparts[nr_gparts]; s->gparts[nr_gparts] = tp; - if (s->gparts[k].id > 0) { - s->gparts[k].part->gpart = &s->gparts[k]; + if (s->gparts[k].id_or_neg_offset <= 0) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } - if (s->gparts[nr_gparts].id > 0) { - s->gparts[nr_gparts].part->gpart = &s->gparts[nr_gparts]; + if (s->gparts[nr_gparts].id_or_neg_offset <= 0) { + s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; } const int t = gind[k]; gind[k] = gind[nr_gparts]; @@ -548,8 +550,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose); /* Re-link the gparts. */ - for (size_t k = 0; k < nr_parts; k++) - if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; + part_relink_gparts(s->parts, nr_parts, 0); #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ @@ -597,8 +598,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose); /* Re-link the parts. */ - for (int k = 0; k < nr_gparts; k++) - if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k]; + part_relink_parts(s->gparts, nr_gparts, s->parts); /* We no longer need the indices as of here. */ free(gind); @@ -607,21 +607,22 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Verify that the links are correct */ for (size_t k = 0; k < nr_gparts; ++k) { - if (s->gparts[k].id > 0) { + if (s->gparts[k].id_or_neg_offset < 0) { - if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !"); + const struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset]; - if (s->gparts[k].x[0] != s->gparts[k].part->x[0] || - s->gparts[k].x[1] != s->gparts[k].part->x[1] || - s->gparts[k].x[2] != s->gparts[k].part->x[2]) + if (part->gpart != &s->gparts[k]) error("Linking problem !"); + + if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] || + s->gparts[k].x[2] != part->x[2]) error("Linked particles are not at the same position !"); } } for (size_t k = 0; k < nr_parts; ++k) { - if (s->parts[k].gpart != NULL) { - - if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !"); + if (s->parts[k].gpart != NULL && + s->parts[k].gpart->id_or_neg_offset != -k) { + error("Linking problem !"); } } #endif diff --git a/src/task.c b/src/task.c index 77c3a98fb15b1601bc78fb7cda4ab53b00d58c4b..fb101e3025b332402dc6a9c0ba11d947cf12a7ab 100644 --- a/src/task.c +++ b/src/task.c @@ -53,7 +53,7 @@ const char *taskID_names[task_type_count] = { "grav_down", "grav_external", "comm_root"}; const char *subtaskID_names[task_type_count] = {"none", "density", "force", - "grav"}; + "grav", "t_end"}; /** * @brief Computes the overlap between the parts array of two given cells. @@ -208,77 +208,6 @@ int task_lock(struct task *t) { return 1; } -/** - * @brief Remove all unlocks to tasks that are of the given type. - * - * @param t The #task. - * @param type The task type ID to remove. - */ - -void task_cleanunlock(struct task *t, int type) { - - int k; - - lock_lock(&t->lock); - - for (k = 0; k < t->nr_unlock_tasks; k++) - if (t->unlock_tasks[k]->type == type) { - t->nr_unlock_tasks -= 1; - t->unlock_tasks[k] = t->unlock_tasks[t->nr_unlock_tasks]; - } - - lock_unlock_blind(&t->lock); -} - -/** - * @brief Remove an unlock_task from the given task. - * - * @param ta The unlocking #task. - * @param tb The #task that will be unlocked. - */ - -void task_rmunlock(struct task *ta, struct task *tb) { - - int k; - - lock_lock(&ta->lock); - - for (k = 0; k < ta->nr_unlock_tasks; k++) - if (ta->unlock_tasks[k] == tb) { - ta->nr_unlock_tasks -= 1; - ta->unlock_tasks[k] = ta->unlock_tasks[ta->nr_unlock_tasks]; - lock_unlock_blind(&ta->lock); - return; - } - error("Task not found."); -} - -/** - * @brief Remove an unlock_task from the given task. - * - * @param ta The unlocking #task. - * @param tb The #task that will be unlocked. - * - * Differs from #task_rmunlock in that it will not fail if - * the task @c tb is not in the unlocks of @c ta. - */ - -void task_rmunlock_blind(struct task *ta, struct task *tb) { - - int k; - - lock_lock(&ta->lock); - - for (k = 0; k < ta->nr_unlock_tasks; k++) - if (ta->unlock_tasks[k] == tb) { - ta->nr_unlock_tasks -= 1; - ta->unlock_tasks[k] = ta->unlock_tasks[ta->nr_unlock_tasks]; - break; - } - - lock_unlock_blind(&ta->lock); -} - /** * @brief Prints the list of tasks contained in a given mask * diff --git a/src/task.h b/src/task.h index 4c6e396c7fd6d7d27bcc85d56b15845c424d39d6..01eb859e7740c3588bf1d7691f0f031ecfebe1cb 100644 --- a/src/task.h +++ b/src/task.h @@ -62,6 +62,7 @@ enum task_subtypes { task_subtype_density, task_subtype_force, task_subtype_grav, + task_subtype_tend, task_subtype_count }; @@ -75,10 +76,10 @@ struct task { char skip, tight, implicit; int flags, wait, rank, weight; - swift_lock_type lock; - struct cell *ci, *cj; + void *buff; + #ifdef WITH_MPI MPI_Request req; #endif @@ -91,9 +92,6 @@ struct task { }; /* Function prototypes. */ -void task_rmunlock(struct task *ta, struct task *tb); -void task_rmunlock_blind(struct task *ta, struct task *tb); -void task_cleanunlock(struct task *t, int type); void task_unlock(struct task *t); float task_overlap(const struct task *ta, const struct task *tb); int task_lock(struct task *t); diff --git a/src/tools.c b/src/tools.c index 1a2b794f688047183827e5c2ed6ba80ba1339080..710efd7e4dd54784ed735492b4e1af0b466ef682 100644 --- a/src/tools.c +++ b/src/tools.c @@ -181,16 +181,19 @@ void pairs_single_density(double *dim, long long int pid, } void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { + float r2, hi, hj, hig2, hjg2, dx[3]; struct part *pi, *pj; /* Implements a double-for loop and checks every interaction */ for (int i = 0; i < ci->count; ++i) { + pi = &ci->parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; for (int j = 0; j < cj->count; ++j) { + pj = &cj->parts[j]; /* Pairwise distance */ @@ -202,6 +205,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { + /* Interact */ runner_iact_nonsym_density(r2, dx, hi, pj->h, pi, pj); } @@ -210,11 +214,13 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Reverse double-for loop and checks every interaction */ for (int j = 0; j < cj->count; ++j) { + pj = &cj->parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; for (int i = 0; i < ci->count; ++i) { + pi = &ci->parts[i]; /* Pairwise distance */ @@ -226,6 +232,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2) { + /* Interact */ runner_iact_nonsym_density(r2, dx, hj, pi->h, pj, pi); } @@ -239,11 +246,13 @@ void self_all_density(struct runner *r, struct cell *ci) { /* Implements a double-for loop and checks every interaction */ for (int i = 0; i < ci->count; ++i) { + pi = &ci->parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; for (int j = i + 1; j < ci->count; ++j) { + pj = &ci->parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; @@ -259,12 +268,14 @@ void self_all_density(struct runner *r, struct cell *ci) { /* Hit or miss? */ if (r2 < hig2) { + /* Interact */ runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj); } /* Hit or miss? */ if (r2 < hjg2) { + dxi[0] = -dxi[0]; dxi[1] = -dxi[1]; dxi[2] = -dxi[2]; @@ -277,7 +288,9 @@ void self_all_density(struct runner *r, struct cell *ci) { } void pairs_single_grav(double *dim, long long int pid, - struct gpart *restrict parts, int N, int periodic) { + struct gpart *restrict gparts, const struct part *parts, + int N, int periodic) { + int i, k; // int mj, mk; // double maxratio = 1.0; @@ -288,18 +301,20 @@ void pairs_single_grav(double *dim, long long int pid, /* Find "our" part. */ for (k = 0; k < N; k++) - if ((parts[k].id > 0 && parts[k].part->id == pid) || parts[k].id == -pid) + if ((gparts[k].id_or_neg_offset < 0 && + parts[-gparts[k].id_or_neg_offset].id == pid) || + gparts[k].id_or_neg_offset == pid) break; if (k == N) error("Part not found."); - pi = parts[k]; + pi = gparts[k]; pi.a_grav[0] = 0.0f; pi.a_grav[1] = 0.0f; pi.a_grav[2] = 0.0f; /* Loop over all particle pairs. */ for (k = 0; k < N; k++) { - if (parts[k].id == pi.id) continue; - pj = parts[k]; + if (gparts[k].id_or_neg_offset == pi.id_or_neg_offset) continue; + pj = gparts[k]; for (i = 0; i < 3; i++) { dx[i] = pi.x[i] - pj.x[i]; if (periodic) { @@ -326,7 +341,8 @@ void pairs_single_grav(double *dim, long long int pid, /* Dump the result. */ message( "acceleration on gpart %lli is a=[ %e %e %e ], |a|=[ %.2e %.2e %.2e ].\n", - pi.part->id, a[0], a[1], a[2], aabs[0], aabs[1], aabs[2]); + parts[-pi.id_or_neg_offset].id, a[0], a[1], a[2], aabs[0], aabs[1], + aabs[2]); } /** @@ -335,6 +351,7 @@ void pairs_single_grav(double *dim, long long int pid, * @param N number of intervals in [0,1]. */ void density_dump(int N) { + int k; float r2[4] = {0.0f, 0.0f, 0.0f, 0.0f}, hi[4], hj[4]; struct part /**pi[4], *pj[4],*/ Pi[4], Pj[4]; diff --git a/src/tools.h b/src/tools.h index 2a4462f274d8e9acd1f2d8e79996ad92c630d404..97f036994bc0d1e2b4a95d806cfdbd253664a260 100644 --- a/src/tools.h +++ b/src/tools.h @@ -33,7 +33,8 @@ void factor(int value, int *f1, int *f2); void density_dump(int N); void pairs_single_grav(double *dim, long long int pid, - struct gpart *restrict parts, int N, int periodic); + struct gpart *restrict gparts, const struct part *parts, + int N, int periodic); void pairs_single_density(double *dim, long long int pid, struct part *restrict parts, int N, int periodic);