diff --git a/.gitignore b/.gitignore index 8137ea759b24b3f4ec9909a460da4bcb47b0a1ac..5a986acbd59a818b151540fb9303eadb4f926f77 100644 --- a/.gitignore +++ b/.gitignore @@ -25,17 +25,30 @@ examples/swift_mindt examples/swift_mindt_mpi examples/swift_mpi -tests/testVectorize -tests/brute_force.dat -tests/swift_dopair.dat +tests/testPair +tests/brute_force_standard.dat +tests/swift_dopair_standard.dat +tests/brute_force_perturbed.dat +tests/swift_dopair_perturbed.dat +tests/test27cells +tests/brute_force_27_standard.dat +tests/swift_dopair_27_standard.dat +tests/brute_force_27_perturbed.dat +tests/swift_dopair_27_perturbed.dat tests/testGreetings tests/testReading tests/input.hdf5 tests/testSingle tests/testTimeIntegration tests/testSPHStep +tests/testKernel +tests/testParser +tests/parser_output.yml theory/latex/swift.pdf +theory/kernel/kernels.pdf +theory/kernel/kernel_derivatives.pdf +theory/kernel/kernel_definitions.pdf m4/libtool.m4 m4/ltoptions.m4 diff --git a/README b/README index 320df3f8ca6880776d338408c2c71ea82b1414c8..0c57e3f5656268c71bb7732af933302cbde9547b 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ - Welcome to the cosmological code + Welcome to the cosmological hydrodynamical code ______ _________________ / ___/ | / / _/ ___/_ __/ \__ \| | /| / // // /_ / / @@ -6,8 +6,26 @@ /____/ |__/|__/___/_/ /_/ SPH With Inter-dependent Fine-grained Tasking -Website: www.swiftsim.com -Twitter: @SwiftSimulation + Website: www.swiftsim.com + Twitter: @SwiftSimulation -See INSTALL.swift for instructions. +See INSTALL.swift for install instructions. +Usage: swift [OPTION] PARAMFILE + +Valid options are: + -c Run with cosmological time integration + -d Dry run. Read the parameter file, allocate memory but does not read + the particles from ICs and exit before the start of time integration. + Allows user to check validy of parameter and IC files as well as memory limits. + -e Enable floating-point exceptions (debugging mode) + -f {int} Overwrite the CPU frequency (Hz) to be used for time measurements + -g Run with an external gravitational potential + -G Run with self-gravity + -s Run with SPH + -v [12] Increase the level of verbosity 1: MPI-rank 0 writes + 2: All MPI-ranks write + -y {int} Time-step frequency at which task graphs are dumped + -h Print this help message and exit + +See the file examples/parameter_example.yml for an example of parameter file. diff --git a/configure.ac b/configure.ac index e5d44fda300f15088c282b93b25499ecb242e24f..11ad6550d87f6764570f48449719292bcec3704d 100644 --- a/configure.ac +++ b/configure.ac @@ -287,11 +287,11 @@ AC_SUBST([METIS_LIBS]) AC_SUBST([METIS_INCS]) AM_CONDITIONAL([HAVEMETIS],[test -n "$METIS_LIBS"]) -# Check for zlib. -AC_CHECK_LIB([z],[gzopen],[ - AC_DEFINE([HAVE_LIBZ],[1],[Set to 1 if zlib is installed.]) - LDFLAGS="$LDFLAGS -lz" - ],[]) +# # Check for zlib. +# AC_CHECK_LIB([z],[gzopen],[ +# AC_DEFINE([HAVE_LIBZ],[1],[Set to 1 if zlib is installed.]) +# LDFLAGS="$LDFLAGS -lz" +# ],[]) # Check for HDF5. This is required. diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 8f61a060b37b0e62189160d0a8c61e713cfd3b8f..802d8c31c251e006711934b6d30ace6c47eec4ac 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -759,7 +759,9 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = @top_srcdir@ @top_srcdir@/src @top_srcdir@/src/hydro/Minimal @top_srcdir@/src/gravity/Default +INPUT = @top_srcdir@ @top_srcdir@/src @top_srcdir@/tests @top_srcdir@/examples +INPUT += @top_srcdir@/src/hydro/Minimal @top_srcdir@/src/gravity/Default +INPUT += @top_srcdir@/src/riemann # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/examples/BigCosmoVolume/makeIC.py b/examples/BigCosmoVolume/makeIC.py index 0994e1c95e053defe7766122c52bc405c7239776..3020feaf753f817f039d2fd09c4fa4f7fb69b896 100644 --- a/examples/BigCosmoVolume/makeIC.py +++ b/examples/BigCosmoVolume/makeIC.py @@ -77,7 +77,7 @@ indices = indices < numPart coords = coords[indices,:] v = v[indices,:] m = m[indices] -h = h[indices] +h = h[indices] / 1.825742 # Correct from Gadget defintion of h to physical definition u = u[indices] ids = ids[indices] diff --git a/examples/CosmoVolume/cosmoVolume.yml b/examples/CosmoVolume/cosmoVolume.yml new file mode 100644 index 0000000000000000000000000000000000000000..20d5febb280748a208633f75351d523b79286035 --- /dev/null +++ b/examples/CosmoVolume/cosmoVolume.yml @@ -0,0 +1,48 @@ + +# Define the system of units to use internally. +UnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + nr_threads: 16 # The number of threads per MPI rank to use. + nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. + cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). + cell_sub_size: 5000 # Maximal number of interactions per sub-task (this is the default value). + cell_split_size: 400 # Maximal number of particles per cell (this is the default value). + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 1. # The tolerance for the targetted number of neighbours. + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. + max_smoothing_length: 0.6 # Maximal smoothing length allowed (in internal units). + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./cosmoVolume.hdf5 # The file to read + h_scaling: 1. # A scaling factor to apply to all smoothing lengths in the ICs. + shift_x: 0. # A shift to apply to all particles read from the ICs (in internal units). + shift_y: 0. + shift_z: 0. + +# Parameters govering domain decomposition +DomainDecomposition: + initial_type: m # The initial strategy ("g", "m", "w", or "v"). See documentation for details. + initial_grid_x: 10 # Grid size if the 'g' strategy is chosen. + initial_grid_y: 10 + initial_grid_z: 10 + repartition_type: b # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details. + diff --git a/examples/CosmoVolume/run.sh b/examples/CosmoVolume/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..a788a35c76a7c0b205297a7de922a9a7e833243a --- /dev/null +++ b/examples/CosmoVolume/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e cosmoVolume.hdf5 ] +then + echo "Fetching initial conditions for the cosmo volume example..." + ./getIC.sh +fi + +../swift -s cosmoVolume.yml diff --git a/examples/GreshoVortex/makeIC.py b/examples/GreshoVortex/makeIC.py index 6aceeed559324f97b0b1e388ff0c3524498b52e4..12edcb6e8154ec6f865d28a6daeb02d385d14bbf 100644 --- a/examples/GreshoVortex/makeIC.py +++ b/examples/GreshoVortex/makeIC.py @@ -30,6 +30,7 @@ factor = 3 boxSize = [ 1.0 , 1.0, 1.0/factor ] L = 120 # Number of particles along one axis gamma = 5./3. # Gas adiabatic index +eta = 1.2349 # 48 ngbs with cubic spline kernel rho = 1 # Gas density P0 = 0. # Constant additional pressure (should have no impact on the dynamics) fileName = "greshoVortex.hdf5" @@ -73,7 +74,7 @@ for i in range(L): v[index,1] = v_phi * (x - boxSize[0] / 2) / r v[index,2] = 0. m[index] = mass - h[index] = 2.251 * boxSize[0] / L + h[index] = eta * boxSize[0] / L P = P0 if r < 0.2: P = P + 5. + 12.5*r2 @@ -105,6 +106,14 @@ grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + #Particle group grp = file.create_group("/PartType0") ds = grp.create_dataset('Coordinates', (numPart, 3), 'd') diff --git a/examples/MultiTypes/makeIC.py b/examples/MultiTypes/makeIC.py index 3a41910c22c260086b5384b248a5c86ab6340a5e..cf889f9b6eab502f692cd6c8b4506c31664ecdcb 100644 --- a/examples/MultiTypes/makeIC.py +++ b/examples/MultiTypes/makeIC.py @@ -32,6 +32,7 @@ Lgas = int(sys.argv[1]) # Number of particles along one axis rhoGas = 2. # Density P = 1. # Pressure gamma = 5./3. # Gas adiabatic index +eta = 1.2349 # 48 ngbs with cubic spline kernel rhoDM = 1. Ldm = int(sys.argv[2]) # Number of particles along one axis @@ -61,11 +62,18 @@ grp.attrs["NumFilesPerSnapshot"] = 1 grp.attrs["MassTable"] = [0.0, massDM, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 - #Runtime parameters grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + # Gas Particle group grp = file.create_group("/PartType0") @@ -80,7 +88,7 @@ ds = grp.create_dataset('Masses', (numGas,1), 'f') ds[()] = m m = zeros(1) -h = full((numGas, 1), 1.1255 * boxSize / Lgas) +h = full((numGas, 1), eta * boxSize / Lgas) ds = grp.create_dataset('SmoothingLength', (numGas,1), 'f') ds[()] = h h = zeros(1) diff --git a/examples/PerturbedBox/makeIC.py b/examples/PerturbedBox/makeIC.py index 69c1a69199c9a5262f5ae6c4e95ca14699300fd4..ee1d845fc2149892909a54bf588046b0b1691b03 100644 --- a/examples/PerturbedBox/makeIC.py +++ b/examples/PerturbedBox/makeIC.py @@ -90,6 +90,14 @@ grp.attrs["NumPart_Total"] = numPart grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + #Particle group grp = file.create_group("/PartType0") ds = grp.create_dataset('Coordinates', (numPart, 3), 'd') diff --git a/examples/SedovBlast/makeIC.py b/examples/SedovBlast/makeIC.py index 75ff81165df51780848e3d8ac679a6dbeb17a039..e64942e8e92ee6fe67142f841f566019b1a668be 100644 --- a/examples/SedovBlast/makeIC.py +++ b/examples/SedovBlast/makeIC.py @@ -33,6 +33,7 @@ P = 1.e-5 # Pressure E0= 1.e2 # Energy of the explosion pert = 0.1 gamma = 5./3. # Gas adiabatic index +eta = 1.2349 # 48 ngbs with cubic spline kernel fileName = "sedov.hdf5" @@ -67,7 +68,7 @@ for i in range(L): v[index,1] = 0. v[index,2] = 0. m[index] = mass - h[index] = 1.1255 * boxSize / L + h[index] = eta * boxSize / L u[index] = internalEnergy ids[index] = index + 1 if sqrt((x - boxSize/2.)**2 + (y - boxSize/2.)**2 + (z - boxSize/2.)**2) < 2.01 * boxSize/L: @@ -98,6 +99,14 @@ grp.attrs["Flag_Entropy_ICs"] = 0 grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + #Particle group grp = file.create_group("/PartType0") grp.create_dataset('Coordinates', data=coords, dtype='d') diff --git a/examples/SedovBlast/makeIC_fcc.py b/examples/SedovBlast/makeIC_fcc.py index 17f07440909cb5478d09a5b7a1444c72af2f3a47..0d3a017a9b7f3b30b61e723e3d1646d7797b40a4 100644 --- a/examples/SedovBlast/makeIC_fcc.py +++ b/examples/SedovBlast/makeIC_fcc.py @@ -33,6 +33,7 @@ P = 1.e-5 # Pressure E0= 1.e2 # Energy of the explosion pert = 0.025 gamma = 5./3. # Gas adiabatic index +eta = 1.2349 # 48 ngbs with cubic spline kernel fileName = "sedov.hdf5" @@ -70,7 +71,7 @@ for i in range(L): v[index,1] = 0. v[index,2] = 0. m[index] = mass - h[index] = 1.1255 * hbox + h[index] = eta * hbox u[index] = internalEnergy ids[index] = index + 1 if sqrt((x - boxSize/2.)**2 + (y - boxSize/2.)**2 + (z - boxSize/2.)**2) < 1.2 * hbox: @@ -101,6 +102,14 @@ grp.attrs["Flag_Entropy_ICs"] = 0 grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + #Particle group grp = file.create_group("/PartType0") grp.create_dataset('Coordinates', data=coords, dtype='d') diff --git a/examples/SedovBlast/run.sh b/examples/SedovBlast/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..58646cf42eecc3f31fdb8a63ca2108c02d9580ba --- /dev/null +++ b/examples/SedovBlast/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Generate the initial conditions if they are not present. +if [ ! -e sedov.hdf5 ] +then + echo "Generating initial conditions for the SedovBlast example..." + python makeIC_fcc.py +fi + +../swift -s sedov.yml diff --git a/examples/SedovBlast/sedov.yml b/examples/SedovBlast/sedov.yml new file mode 100644 index 0000000000000000000000000000000000000000..f354ef5679eb5b6176ab90298bb307c6c2b27f0e --- /dev/null +++ b/examples/SedovBlast/sedov.yml @@ -0,0 +1,48 @@ + +# Define the system of units to use internally. +UnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + nr_threads: 16 # The number of threads per MPI rank to use. + nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. + cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). + cell_sub_size: 5000 # Maximal number of interactions per sub-task (this is the default value). + cell_split_size: 400 # Maximal number of particles per cell (this is the default value). + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 1. # The tolerance for the targetted number of neighbours. + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. + max_smoothing_length: 1. # Maximal smoothing length allowed (in internal units). + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./sedov.hdf5 # The file to read + h_scaling: 1. # A scaling factor to apply to all smoothing lengths in the ICs. + shift_x: 0. # A shift to apply to all particles read from the ICs (in internal units). + shift_y: 0. + shift_z: 0. + +# Parameters govering domain decomposition +DomainDecomposition: + initial_type: m # The initial strategy ("g", "m", "w", or "v"). See documentation for details. + initial_grid_x: 10 # Grid size if the 'g' strategy is chosen. + initial_grid_y: 10 + initial_grid_z: 10 + repartition_type: b # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details. + diff --git a/examples/SodShock/makeIC.py b/examples/SodShock/makeIC.py index 0ac0564116f8a6ceb57b4f41d23eb9907df0440d..8ae19050c11c0712579b44646c8870d7574d113b 100644 --- a/examples/SodShock/makeIC.py +++ b/examples/SodShock/makeIC.py @@ -43,14 +43,14 @@ vol = boxSize[0] * boxSize[1] * boxSize[2] glass1 = h5py.File("glass_001.hdf5") pos1 = glass1["/PartType0/Coordinates"][:,:] pos1 = pos1 / factor # Particles are in [0:0.25, 0:0.25, 0:0.25] - +glass_h1 = glass1["/PartType0/SmoothingLength"][:] / factor #Read in high density glass # glass2 = h5py.File("../Glass/glass_50000.hdf5") glass2 = h5py.File("glass_002.hdf5") pos2 = glass2["/PartType0/Coordinates"][:,:] pos2 = pos2 / factor # Particles are in [0:0.25, 0:0.25, 0:0.25] - +glass_h2 = glass2["/PartType0/SmoothingLength"][:] / factor #Generate high density region rho1 = 1. @@ -61,9 +61,10 @@ coord1 = append(coord1, coord1 + [0.25, 0, 0], 0) # coord1 = append(coord1, pos1 + [0, 0.5, 0.5], 0) N1 = size(coord1)/3 v1 = zeros((N1, 3)) -h1 = ones(N1) * 2.251 * 0.5 * vol / (size(pos1)/3)**(1./3.) u1 = ones(N1) * P1 / ((gamma - 1.) * rho1) m1 = ones(N1) * vol * 0.5 * rho1 / N1 +h1 = append(glass_h1, glass_h1, 0) +h1 = append(h1, h1, 0) #Generate low density region rho2 = 0.25 @@ -74,9 +75,10 @@ coord2 = append(coord2, coord2 + [0.25, 0, 0], 0) # coord2 = append(coord2, pos2 + [0, 0.5, 0.5], 0) N2 = size(coord2)/3 v2 = zeros((N2, 3)) -h2 = ones(N2) * 2.251 * 0.5 * vol / (size(pos2)/3)**(1./3.) u2 = ones(N2) * P2 / ((gamma - 1.) * rho2) m2 = ones(N2) * vol * 0.5 * rho2 / N2 +h2 = append(glass_h2, glass_h2, 0) +h2 = append(h2, h2, 0) #Merge arrays numPart = N1 + N2 @@ -89,8 +91,8 @@ ids = zeros(numPart, dtype='L') for i in range(1, numPart+1): ids[i-1] = i -#Final operations -h /= 2 +#Final operation since we come from Gadget-2 cubic spline ICs +h /= 1.825752 #File file = h5py.File(fileName, 'w') @@ -110,6 +112,14 @@ grp.attrs["Flag_Entropy_ICs"] = 0 grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + #Particle group grp = file.create_group("/PartType0") grp.create_dataset('Coordinates', data=coords, dtype='d') diff --git a/examples/SodShock/run.sh b/examples/SodShock/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..646f1e3a337170e2e406c24e7505e42b81de364b --- /dev/null +++ b/examples/SodShock/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Generate the initial conditions if they are not present. +if [ ! -e sodShock.hdf5 ] +then + echo "Generating initial conditions for the SodShock example..." + python makeIC.py +fi + +../swift -s sodShock.yml diff --git a/examples/SodShock/sodShock.yml b/examples/SodShock/sodShock.yml new file mode 100644 index 0000000000000000000000000000000000000000..5fe7be7b9fc13bb5bc67556d79d8ff9d9eff81d9 --- /dev/null +++ b/examples/SodShock/sodShock.yml @@ -0,0 +1,48 @@ + +# Define the system of units to use internally. +UnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + nr_threads: 16 # The number of threads per MPI rank to use. + nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. + cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). + cell_sub_size: 5000 # Maximal number of interactions per sub-task. + cell_split_size: 400 # Maximal number of particles per cell (this is the default value). + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 1. # The tolerance for the targetted number of neighbours. + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. + max_smoothing_length: 0.01 # Maximal smoothing length allowed (in internal units). + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./sodShock.hdf5 # The file to read + h_scaling: 1. # A scaling factor to apply to all smoothing lengths in the ICs. + shift_x: 0. # A shift to apply to all particles read from the ICs (in internal units). + shift_y: 0. + shift_z: 0. + +# Parameters govering domain decomposition +DomainDecomposition: + initial_type: m # The initial strategy ("g", "m", "w", or "v"). See documentation for details. + initial_grid_x: 10 # Grid size if the 'g' strategy is chosen. + initial_grid_y: 10 + initial_grid_z: 10 + repartition_type: b # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details. + diff --git a/examples/UniformBox/makeIC.py b/examples/UniformBox/makeIC.py index c175349e658799cbcb30dfe2619a1594bafc18b9..1484f60596e68734f0f98685ab2ab845f2e0b407 100644 --- a/examples/UniformBox/makeIC.py +++ b/examples/UniformBox/makeIC.py @@ -32,6 +32,7 @@ L = int(sys.argv[1]) # Number of particles along one axis rho = 2. # Density P = 1. # Pressure gamma = 5./3. # Gas adiabatic index +eta = 1.2349 # 48 ngbs with cubic spline kernel fileName = "uniformBox.hdf5" #--------------------------------------------------- @@ -55,11 +56,18 @@ grp.attrs["NumFilesPerSnapshot"] = 1 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 - #Runtime parameters grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + #Particle group grp = file.create_group("/PartType0") @@ -73,7 +81,7 @@ ds = grp.create_dataset('Masses', (numPart,1), 'f') ds[()] = m m = zeros(1) -h = full((numPart, 1), 1.1255 * boxSize / L) +h = full((numPart, 1), eta * boxSize / L) ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f') ds[()] = h h = zeros(1) diff --git a/examples/UniformBox/makeICbig.py b/examples/UniformBox/makeICbig.py index e475fdcbd9f3c4811e3dcfdf20bbd321be3d8b29..bd5cf627fb535595b3abb224cbc8de50589f12cf 100644 --- a/examples/UniformBox/makeICbig.py +++ b/examples/UniformBox/makeICbig.py @@ -32,6 +32,7 @@ N = int(sys.argv[2]) # Write N particles at a time to avoid requiring a lot of rho = 2. # Density P = 1. # Pressure gamma = 5./3. # Gas adiabatic index +eta = 1.2349 # 48 ngbs with cubic spline kernel fileName = "uniformBox_%d.hdf5"%L #--------------------------------------------------- @@ -62,11 +63,19 @@ grp.attrs["NumFilesPerSnapshot"] = 1 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 - #Runtime parameters grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + + #Particle group grp = file.create_group("/PartType0") @@ -89,7 +98,7 @@ for n in range(n_iterations): ds_m[offset:offset+N] = m m = zeros(1) - h = full((N, 1), 1.1255 * boxSize / L) + h = full((N, 1), eta * boxSize / L) ds_h[offset:offset+N] = h h = zeros(1) @@ -122,7 +131,7 @@ m = full((remainder, 1), mass) ds_m[offset:offset+remainder] = m m = zeros(1) -h = full((remainder, 1), 1.1255 * boxSize / L) +h = full((remainder, 1), eta * boxSize / L) ds_h[offset:offset+remainder] = h h = zeros(1) @@ -139,7 +148,7 @@ coords = zeros((remainder, 3)) coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L) coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L) coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L) -ds_x[offset:offset+remainder,:] = coords +ods_x[offset:offset+remainder,:] = coords print "Done", offset+remainder,"/", numPart diff --git a/examples/UniformBox/run.sh b/examples/UniformBox/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..ca78b0ac0425bf1b3f6dd9d30bfc95d35083739f --- /dev/null +++ b/examples/UniformBox/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Generate the initial conditions if they are not present. +if [ ! -e uniformBox.hdf5 ] +then + echo "Generating initial conditions for the uniform box example..." + python makeIC.py 100 +fi + +../swift -s uniformBox.yml diff --git a/examples/UniformBox/uniformBox.yml b/examples/UniformBox/uniformBox.yml new file mode 100644 index 0000000000000000000000000000000000000000..2d5512815b60511b5dbc373df43fae4658272093 --- /dev/null +++ b/examples/UniformBox/uniformBox.yml @@ -0,0 +1,48 @@ + +# Define the system of units to use internally. +UnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + nr_threads: 16 # The number of threads per MPI rank to use. + nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. + cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). + cell_sub_size: 5000 # Maximal number of interactions per sub-task (this is the default value). + cell_split_size: 400 # Maximal number of particles per cell (this is the default value). + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 1. # The tolerance for the targetted number of neighbours. + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. + max_smoothing_length: 0.1 # Maximal smoothing length allowed (in internal units). + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./uniformBox.hdf5 # The file to read + h_scaling: 1. # A scaling factor to apply to all smoothing lengths in the ICs. + shift_x: 0. # A shift to apply to all particles read from the ICs (in internal units). + shift_y: 0. + shift_z: 0. + +# Parameters govering domain decomposition +DomainDecomposition: + initial_type: m # The initial strategy ("g", "m", "w", or "v"). See documentation for details. + initial_grid_x: 10 # Grid size if the 'g' strategy is chosen. + initial_grid_y: 10 + initial_grid_z: 10 + repartition_type: b # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details. + diff --git a/examples/UniformDMBox/makeIC.py b/examples/UniformDMBox/makeIC.py index 061b4d0ad1959d9e25356aff80e78adb9c1c4faa..449d780fb31bc23dd194f772be45d35e6b0bbe3f 100644 --- a/examples/UniformDMBox/makeIC.py +++ b/examples/UniformDMBox/makeIC.py @@ -52,11 +52,19 @@ grp.attrs["NumFilesPerSnapshot"] = 1 grp.attrs["MassTable"] = [0.0, mass, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 - #Runtime parameters grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic +#Units +grp = file.create_group("/Units") +grp.attrs["Unit length in cgs (U_L)"] = 1. +grp.attrs["Unit mass in cgs (U_M)"] = 1. +grp.attrs["Unit time in cgs (U_t)"] = 1. +grp.attrs["Unit current in cgs (U_I)"] = 1. +grp.attrs["Unit temperature in cgs (U_T)"] = 1. + + #Particle group grp = file.create_group("/PartType1") diff --git a/examples/main.c b/examples/main.c index c88f92a07a747c327692b5e0fbbc7dc07b93ac0c..5cfae5efba9157ba7b727115b03ac467287edc3d 100644 --- a/examples/main.c +++ b/examples/main.c @@ -23,20 +23,11 @@ #include "../config.h" /* Some standard headers. */ +#include <fenv.h> +#include <unistd.h> #include <stdio.h> #include <stdlib.h> -#include <unistd.h> #include <string.h> -#include <pthread.h> -#include <math.h> -#include <float.h> -#include <limits.h> -#include <fenv.h> - -/* Conditional headers. */ -#ifdef HAVE_LIBZ -#include <zlib.h> -#endif /* MPI headers. */ #ifdef WITH_MPI @@ -51,58 +42,55 @@ #define ENGINE_POLICY engine_policy_none #endif +/** + * @brief Help messages for the command line parameters. + */ +void print_help_message() { + + printf("\nUsage: swift [OPTION] PARAMFILE\n\n"); + printf("Valid options are:\n"); + printf(" %2s %8s %s\n", "-c", "", "Run with cosmological time integration"); + printf( + " %2s %8s %s\n", "-d", "", + "Dry run. Read the parameter file, allocate memory but does not read "); + printf( + " %2s %8s %s\n", "", "", + "the particles from ICs and exit before the start of time integration."); + printf(" %2s %8s %s\n", "", "", + "Allows user to check validy of parameter and IC files as well as " + "memory limits."); + printf(" %2s %8s %s\n", "-e", "", + "Enable floating-point exceptions (debugging mode)"); + printf(" %2s %8s %s\n", "-f", "{int}", + "Overwrite the CPU frequency (Hz) to be used for time measurements"); + printf(" %2s %8s %s\n", "-g", "", + "Run with an external gravitational potential"); + printf(" %2s %8s %s\n", "-G", "", "Run with self-gravity"); + printf(" %2s %8s %s\n", "-s", "", "Run with SPH"); + printf(" %2s %8s %s\n", "-v", "[12]", + "Increase the level of verbosity 1: MPI-rank 0 writes "); + printf(" %2s %8s %s\n", "", "", "2: All MPI-ranks write"); + printf(" %2s %8s %s\n", "-y", "{int}", + "Time-step frequency at which task graphs are dumped"); + printf(" %2s %8s %s\n", "-h", "", "Print this help message and exit"); + printf( + "\nSee the file parameter_example.yml for an example of " + "parameter file.\n"); +} + /** * @brief Main routine that loads a few particles and generates some output. * */ - int main(int argc, char *argv[]) { - int c, icount, periodic = 1; - size_t Ngas = 0, Ngpart = 0; - long long N_total[2] = {0, 0}; - int nr_threads = 1, nr_queues = -1; - int dump_tasks = 0; - int data[2]; - double dim[3] = {1.0, 1.0, 1.0}, shift[3] = {0.0, 0.0, 0.0}; - double h_max = -1.0, scaling = 1.0; - double time_end = DBL_MAX; - struct part *parts = NULL; - struct gpart *gparts = NULL; - struct space s; - struct engine e; - struct UnitSystem us; struct clocks_time tic, toc; - char ICfileName[200] = ""; - char dumpfile[30]; - float dt_max = 0.0f, dt_min = 0.0f; - int nr_nodes = 1, myrank = 0; - FILE *file_thread; - int with_outputs = 1; - int verbose = 0, talking; - unsigned long long cpufreq = 0; - -#ifdef WITH_MPI - struct partition initial_partition; - enum repartition_type reparttype = REPART_NONE; - - initial_partition.type = INITPART_GRID; - initial_partition.grid[0] = 1; - initial_partition.grid[1] = 1; - initial_partition.grid[2] = 1; -#ifdef HAVE_METIS - /* Defaults make use of METIS. */ - reparttype = REPART_METIS_BOTH; - initial_partition.type = INITPART_METIS_NOWEIGHT; -#endif -#endif -/* Choke on FP-exceptions. */ -// feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW ); + int nr_nodes = 1, myrank = 0; #ifdef WITH_MPI /* Start by initializing MPI. */ - int res, prov; + int res = 0, prov = 0; if ((res = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &prov)) != MPI_SUCCESS) error("Call to MPI_Init failed with error %i.", res); @@ -117,304 +105,242 @@ int main(int argc, char *argv[]) { if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) != MPI_SUCCESS) error("Call to MPI_Comm_set_errhandler failed with error %i.", res); - if (myrank == 0) message("MPI is up and running with %i node(s).", nr_nodes); + if (myrank == 0) + printf("[0000][00000.0] MPI is up and running with %i node(s).\n", + nr_nodes); + if (nr_nodes == 1) { + message("WARNING: you are running with one MPI rank."); + message("WARNING: you should use the non-MPI version of this program."); + } fflush(stdout); - - /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */ - factor(nr_nodes, &initial_partition.grid[0], &initial_partition.grid[1]); - factor(nr_nodes / initial_partition.grid[1], &initial_partition.grid[0], - &initial_partition.grid[2]); - factor(initial_partition.grid[0] * initial_partition.grid[1], - &initial_partition.grid[1], &initial_partition.grid[0]); #endif - /* Initialize CPU frequency, this also starts time. */ - clocks_set_cpufreq(cpufreq); - - /* Greeting message */ - if (myrank == 0) greetings(); - #if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) if ((ENGINE_POLICY) & engine_policy_setaffinity) { /* Ensure the NUMA node on which we initialise (first touch) everything * doesn't change before engine_init allocates NUMA-local workers. - * Otherwise, - * we may be scheduled elsewhere between the two times. + * Otherwise, we may be scheduled elsewhere between the two times. */ cpu_set_t affinity; CPU_ZERO(&affinity); CPU_SET(sched_getcpu(), &affinity); if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity) != 0) { - message("failed to set entry thread's affinity"); - } else { - message("set entry thread's affinity"); + error("failed to set entry thread's affinity"); } } #endif - /* Init the space. */ - bzero(&s, sizeof(struct space)); + /* Welcome to SWIFT, you made the right choice */ + if (myrank == 0) greetings(); - /* Parse the options */ - while ((c = getopt(argc, argv, "a:c:d:e:f:h:m:oP:q:R:s:t:v:w:y:z:")) != -1) - switch (c) { - case 'a': - if (sscanf(optarg, "%lf", &scaling) != 1) - error("Error parsing cutoff scaling."); - if (myrank == 0) message("scaling cutoff by %.3f.", scaling); - fflush(stdout); - break; + int dry_run = 0; + int dump_tasks = 0; + int with_cosmology = 0; + int with_external_gravity = 0; + int with_self_gravity = 0; + int with_hydro = 0; + int with_fp_exceptions = 0; + int verbose = 0; + char paramFileName[200] = ""; + unsigned long long cpufreq = 0; + + /* Parse the parameters */ + int c; + while ((c = getopt(argc, argv, "cdef:gGhsv:y")) != -1) switch (c) { case 'c': - if (sscanf(optarg, "%lf", &time_end) != 1) - error("Error parsing final time."); - if (myrank == 0) message("time_end set to %.3e.", time_end); - fflush(stdout); + with_cosmology = 1; break; case 'd': - if (sscanf(optarg, "%f", &dt_min) != 1) - error("Error parsing minimal timestep."); - if (myrank == 0) message("dt_min set to %e.", dt_min); - fflush(stdout); + dry_run = 1; break; case 'e': - if (sscanf(optarg, "%f", &dt_max) != 1) - error("Error parsing maximal timestep."); - if (myrank == 0) message("dt_max set to %e.", dt_max); - fflush(stdout); + with_fp_exceptions = 1; break; case 'f': - if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name."); - break; - case 'h': - if (sscanf(optarg, "%llu", &cpufreq) != 1) - error("Error parsing CPU frequency."); - if (myrank == 0) message("CPU frequency set to %llu.", cpufreq); - fflush(stdout); - break; - case 'm': - if (sscanf(optarg, "%lf", &h_max) != 1) error("Error parsing h_max."); - if (myrank == 0) message("maximum h set to %e.", h_max); - fflush(stdout); - break; - case 'o': - with_outputs = 0; - break; - case 'P': -/* Partition type is one of "g", "m", "w", or "v"; "g" can be - * followed by three numbers defining the grid. */ -#ifdef WITH_MPI - switch (optarg[0]) { - case 'g': - initial_partition.type = INITPART_GRID; - if (strlen(optarg) > 2) { - if (sscanf(optarg, "g %i %i %i", &initial_partition.grid[0], - &initial_partition.grid[1], - &initial_partition.grid[2]) != 3) - error("Error parsing grid."); - } - break; -#ifdef HAVE_METIS - case 'm': - initial_partition.type = INITPART_METIS_NOWEIGHT; - break; - case 'w': - initial_partition.type = INITPART_METIS_WEIGHT; - break; -#endif - case 'v': - initial_partition.type = INITPART_VECTORIZE; - break; + if (sscanf(optarg, "%llu", &cpufreq) != 1) { + if (myrank == 0) printf("Error parsing CPU frequency (-f).\n"); + if (myrank == 0) print_help_message(); + return 1; } -#endif break; - case 'q': - if (sscanf(optarg, "%d", &nr_queues) != 1) - error("Error parsing number of queues."); + case 'g': + with_external_gravity = 1; break; - case 'R': -/* Repartition type "n", "b", "v", "e" or "x". - * Note only none is available without METIS. */ -#ifdef WITH_MPI - switch (optarg[0]) { - case 'n': - reparttype = REPART_NONE; - break; -#ifdef HAVE_METIS - case 'b': - reparttype = REPART_METIS_BOTH; - break; - case 'v': - reparttype = REPART_METIS_VERTEX; - break; - case 'e': - reparttype = REPART_METIS_EDGE; - break; - case 'x': - reparttype = REPART_METIS_VERTEX_EDGE; - break; -#endif - } -#endif + case 'G': + with_self_gravity = 1; break; + case 'h': + if (myrank == 0) print_help_message(); + return 0; case 's': - if (sscanf(optarg, "%lf %lf %lf", &shift[0], &shift[1], &shift[2]) != 3) - error("Error parsing shift."); - if (myrank == 0) - message("will shift parts by [ %.3f %.3f %.3f ].", shift[0], shift[1], - shift[2]); - break; - case 't': - if (sscanf(optarg, "%d", &nr_threads) != 1) - error("Error parsing number of threads."); + with_hydro = 1; break; case 'v': - /* verbose = 1: MPI rank 0 writes - verbose = 2: all MPI ranks write */ - if (sscanf(optarg, "%d", &verbose) != 1) - error("Error parsing verbosity level."); - break; - case 'w': - if (sscanf(optarg, "%d", &space_subsize) != 1) - error("Error parsing sub size."); - if (myrank == 0) message("sub size set to %i.", space_subsize); + if (sscanf(optarg, "%d", &verbose) != 1) { + if (myrank == 0) printf("Error parsing verbosity level (-v).\n"); + if (myrank == 0) print_help_message(); + return 1; + } break; case 'y': - if (sscanf(optarg, "%d", &dump_tasks) != 1) - error("Error parsing dump_tasks (-y)"); - break; - case 'z': - if (sscanf(optarg, "%d", &space_splitsize) != 1) - error("Error parsing split size."); - if (myrank == 0) message("split size set to %i.", space_splitsize); + if (sscanf(optarg, "%d", &dump_tasks) != 1) { + if (myrank == 0) printf("Error parsing dump_tasks (-y). \n"); + if (myrank == 0) print_help_message(); + return 1; + } break; case '?': - error("Unknown option."); + if (myrank == 0) print_help_message(); + return 1; break; } + if (optind == argc - 1) { + if (!strcpy(paramFileName, argv[optind++])) + error("Error reading parameter file name."); + } else if (optind > argc - 1) { + if (myrank == 0) printf("Error: A parameter file name must be provided\n"); + if (myrank == 0) print_help_message(); + return 1; + } else { + if (myrank == 0) printf("Error: Too many parameters given\n"); + if (myrank == 0) print_help_message(); + return 1; + } + if (!with_self_gravity && !with_hydro && !with_external_gravity) { + if (myrank == 0) + printf("Error: At least one of -s, -g or -G must be chosen.\n"); + if (myrank == 0) print_help_message(); + return 1; + } -#ifdef WITH_MPI + /* Genesis 1.1: And then, there was time ! */ + clocks_set_cpufreq(cpufreq); + + if (myrank == 0 && dry_run) + message( + "Executing a dry run. No i/o or time integration will be performed."); + + /* Report CPU frequency. */ if (myrank == 0) { - message("Running with %i thread(s) per node.", nr_threads); - message("Using initial partition %s", - initial_partition_name[initial_partition.type]); - if (initial_partition.type == INITPART_GRID) - message("grid set to [ %i %i %i ].", initial_partition.grid[0], - initial_partition.grid[1], initial_partition.grid[2]); - message("Using %s repartitioning", repartition_name[reparttype]); + cpufreq = clocks_get_cpufreq(); + message("CPU frequency used for tick conversion: %llu Hz", cpufreq); + } - if (nr_nodes == 1) { - message("WARNING: you are running with one MPI rank."); - message("WARNING: you should use the non-MPI version of this program."); - } - fflush(stdout); + /* Do we choke on FP-exceptions ? */ + if (with_fp_exceptions) { + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); + if (myrank == 0) message("Floating point exceptions will be reported."); } -#else - if (myrank == 0) message("Running with %i thread(s).", nr_threads); -#endif /* How large are the parts? */ if (myrank == 0) { - message("sizeof(struct part) is %li bytes.", (long int)sizeof(struct part)); - message("sizeof(struct xpart) is %li bytes.", - (long int)sizeof(struct xpart)); - message("sizeof(struct gpart) is %li bytes.", - (long int)sizeof(struct gpart)); + message("sizeof(struct part) is %4zi bytes.", sizeof(struct part)); + message("sizeof(struct xpart) is %4zi bytes.", sizeof(struct xpart)); + message("sizeof(struct gpart) is %4zi bytes.", sizeof(struct gpart)); + } + + /* How vocal are we ? */ + const int talking = (verbose == 1 && myrank == 0) || (verbose == 2); + + /* Read the parameter file */ + struct swift_params *params = malloc(sizeof(struct swift_params)); + if (params == NULL) error("Error allocating memory for the parameter file."); + if (myrank == 0) { + message("Reading parameters from file '%s'", paramFileName); + parser_read_file(paramFileName, params); + // parser_print_params(¶ms); + parser_write_params_to_file(params, "used_parameters.yml"); } +#ifdef WITH_MPI + /* Broadcast the parameter file */ + MPI_Bcast(params, sizeof(struct swift_params), MPI_BYTE, 0, MPI_COMM_WORLD); +#endif /* Initialize unit system */ - initUnitSystem(&us); + struct UnitSystem us; + units_init(&us, params); if (myrank == 0) { message("Unit system: U_M = %e g.", us.UnitMass_in_cgs); message("Unit system: U_L = %e cm.", us.UnitLength_in_cgs); message("Unit system: U_t = %e s.", us.UnitTime_in_cgs); message("Unit system: U_I = %e A.", us.UnitCurrent_in_cgs); message("Unit system: U_T = %e K.", us.UnitTemperature_in_cgs); - message("Density units: %e a^%f h^%f.", - conversionFactor(&us, UNIT_CONV_DENSITY), - aFactor(&us, UNIT_CONV_DENSITY), hFactor(&us, UNIT_CONV_DENSITY)); - message("Entropy units: %e a^%f h^%f.", - conversionFactor(&us, UNIT_CONV_ENTROPY), - aFactor(&us, UNIT_CONV_ENTROPY), hFactor(&us, UNIT_CONV_ENTROPY)); } - /* Report CPU frequency. */ +/* Prepare the domain decomposition scheme */ +#ifdef WITH_MPI + struct partition initial_partition; + enum repartition_type reparttype; + partition_init(&initial_partition, &reparttype, params, nr_nodes); + + /* Let's report what we did */ if (myrank == 0) { - cpufreq = clocks_get_cpufreq(); - message("CPU frequency used for tick conversion: %llu Hz", cpufreq); + message("Using initial partition %s", + initial_partition_name[initial_partition.type]); + if (initial_partition.type == INITPART_GRID) + message("grid set to [ %i %i %i ].", initial_partition.grid[0], + initial_partition.grid[1], initial_partition.grid[2]); + message("Using %s repartitioning", repartition_name[reparttype]); } +#endif - /* Check we have sensible time step bounds */ - if (dt_min > dt_max) - error("Minimal time step size must be large than maximal time step size "); - - /* Check whether an IC file has been provided */ - if (strcmp(ICfileName, "") == 0) - error("An IC file name must be provided via the option -f"); - - /* Read particles and space information from (GADGET) IC */ - + /* Read particles and space information from (GADGET) ICs */ + char ICfileName[200] = ""; + parser_get_param_string(params, "InitialConditions:file_name", ICfileName); + if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); + struct part *parts = NULL; + struct gpart *gparts = NULL; + size_t Ngas = 0, Ngpart = 0; + double dim[3] = {0., 0., 0.}; + int periodic = 0; if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) - read_ic_parallel(ICfileName, dim, &parts, &Ngas, &periodic, myrank, nr_nodes, - MPI_COMM_WORLD, MPI_INFO_NULL); + read_ic_parallel(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); #else - read_ic_serial(ICfileName, dim, &parts, &Ngas, &periodic, myrank, nr_nodes, - MPI_COMM_WORLD, MPI_INFO_NULL); + read_ic_serial(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); #endif #else - read_ic_single(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic); + read_ic_single(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, + dry_run); #endif - if (myrank == 0) { clocks_gettime(&toc); - message("reading particle properties took %.3f %s.", - clocks_diff(&tic, &toc), clocks_getunit()); + message("Reading initial conditions took %.3f %s.", clocks_diff(&tic, &toc), + clocks_getunit()); fflush(stdout); } + /* Discard gparts if we don't have gravity + * (Better implementation of i/o will come)*/ + if (!with_external_gravity && !with_self_gravity) { + free(gparts); + gparts = NULL; + for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL; + Ngpart = 0; + } + + /* Get the total number of particles across all nodes. */ + long long N_total[2] = {0, 0}; #if defined(WITH_MPI) long long N_long[2] = {Ngas, Ngpart}; MPI_Reduce(&N_long, &N_total, 2, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); - if (myrank == 0) - message("Read %lld gas particles and %lld DM particles from the ICs", - N_total[0], N_total[1]); #else N_total[0] = Ngas; - N_total[1] = Ngpart - Ngas; - message("Read %lld gas particles and %lld DM particles from the ICs", - N_total[0], N_total[1]); + N_total[1] = Ngpart; #endif + if (myrank == 0) + message("Read %lld gas particles and %lld gparts from the ICs.", N_total[0], + N_total[1]); - /* Apply h scaling */ - if (scaling != 1.0) - for (size_t k = 0; k < Ngas; k++) parts[k].h *= scaling; - - /* Apply shift */ - if (shift[0] != 0 || shift[1] != 0 || shift[2] != 0) { - for (size_t k = 0; k < Ngas; k++) { - parts[k].x[0] += shift[0]; - parts[k].x[1] += shift[1]; - parts[k].x[2] += shift[2]; - } - for (size_t k = 0; k < Ngpart; k++) { - gparts[k].x[0] += shift[0]; - gparts[k].x[1] += shift[1]; - gparts[k].x[2] += shift[2]; - } - } - - /* Set default number of queues. */ - if (nr_queues < 0) nr_queues = nr_threads; - - /* How vocal are we ? */ - talking = (verbose == 1 && myrank == 0) || (verbose == 2); - - /* Initialize the space with this data. */ + /* Initialize the space with these data. */ if (myrank == 0) clocks_gettime(&tic); - space_init(&s, dim, parts, gparts, Ngas, Ngpart, periodic, h_max, - myrank == 0); - if (myrank == 0 && verbose) { + struct space s; + space_init(&s, params, dim, parts, gparts, Ngas, Ngpart, periodic, talking, + dry_run); + if (myrank == 0) { clocks_gettime(&toc); message("space_init took %.3f %s.", clocks_diff(&tic, &toc), clocks_getunit()); @@ -431,45 +357,47 @@ int main(int argc, char *argv[]) { message("%zi parts in %i cells.", s.nr_parts, s.tot_cells); message("%zi gparts in %i cells.", s.nr_gparts, s.tot_cells); message("maximum depth is %d.", s.maxdepth); - // message( "cutoffs in [ %g %g ]." , s.h_min , s.h_max ); fflush(stdout); } /* Verify that each particle is in it's proper cell. */ - if (myrank == 0) { - icount = 0; + if (talking && !dry_run) { + int icount = 0; space_map_cells_pre(&s, 0, &map_cellcheck, &icount); message("map_cellcheck picked up %i parts.", icount); } - if (myrank == 0) { - data[0] = s.maxdepth; - data[1] = 0; + /* Verify the maximal depth of cells. */ + if (talking && !dry_run) { + int data[2] = {s.maxdepth, 0}; space_map_cells_pre(&s, 0, &map_maxdepth, data); message("nr of cells at depth %i is %i.", data[0], data[1]); } - /* Initialize the engine with this space. */ + /* Construct the engine policy */ + int engine_policies = ENGINE_POLICY | engine_policy_steal; + if (with_hydro) engine_policies |= engine_policy_hydro; + if (with_self_gravity) engine_policies |= engine_policy_self_gravity; + if (with_external_gravity) engine_policies |= engine_policy_external_gravity; + if (with_cosmology) engine_policies |= engine_policy_cosmology; + + /* Initialize the engine with the space and policies. */ if (myrank == 0) clocks_gettime(&tic); - if (myrank == 0) message("nr_nodes is %i.", nr_nodes); - engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank, - ENGINE_POLICY | engine_policy_steal | engine_policy_hydro, 0, - time_end, dt_min, dt_max, talking); - if (myrank == 0 && verbose) { + struct engine e; + engine_init(&e, &s, params, nr_nodes, myrank, engine_policies, talking); + if (myrank == 0) { clocks_gettime(&toc); message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), clocks_getunit()); fflush(stdout); } -#ifdef WITH_MPI - /* Split the space. */ - engine_split(&e, &initial_partition); - engine_redistribute(&e); -#endif + /* Now that everything is ready, no need for the parameters any more */ + free(params); + params = NULL; - if (with_outputs) { - /* Write the state of the system as it is before starting time integration. - */ + int with_outputs = 1; + if (with_outputs && !dry_run) { + /* Write the state of the system before starting time integration. */ if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) @@ -495,26 +423,42 @@ int main(int argc, char *argv[]) { for (k = 0; k < runner_hist_N; k++) runner_hist_bins[k] = 0; #endif + /* Get some info to the user. */ if (myrank == 0) { message( "Running on %lld gas particles and %lld DM particles until t=%.3e with " "%i threads and %i queues (dt_min=%.3e, dt_max=%.3e)...", - N_total[0], N_total[1], time_end, e.nr_threads, e.sched.nr_queues, + N_total[0], N_total[1], e.timeEnd, e.nr_threads, e.sched.nr_queues, e.dt_min, e.dt_max); fflush(stdout); } + /* Time to say good-bye if this was not a serious run. */ + if (dry_run) { +#ifdef WITH_MPI + if ((res = MPI_Finalize()) != MPI_SUCCESS) + error("call to MPI_Finalize failed with error %i.", res); +#endif + if (myrank == 0) + message("Time integration ready to start. End of dry-run."); + return 0; + } + +#ifdef WITH_MPI + /* Split the space. */ + engine_split(&e, &initial_partition); + engine_redistribute(&e); +#endif + /* Initialise the particles */ engine_init_particles(&e); /* Legend */ if (myrank == 0) - printf( - "# Step Time time-step Number of updates CPU Wall-clock time " - "[%s]\n", - clocks_getunit()); + printf("# %6s %14s %14s %10s %10s %16s [%s]\n", "Step", "Time", "Time-step", + "Updates", "g-Updates", "Wall-clock time", clocks_getunit()); - /* Let loose a runner on the space. */ + /* Main simulation loop */ for (int j = 0; !engine_is_done(&e); j++) { /* Repartition the space amongst the nodes? */ @@ -557,7 +501,9 @@ int main(int argc, char *argv[]) { #ifdef WITH_MPI /* Make sure output file is empty, only on one rank. */ - sprintf(dumpfile, "thread_info_MPI-step%d.dat", j); + char dumpfile[30]; + snprintf(dumpfile, 30, "thread_info_MPI-step%d.dat", j); + FILE *file_thread; if (myrank == 0) { file_thread = fopen(dumpfile, "w"); fclose(file_thread); @@ -602,7 +548,9 @@ int main(int argc, char *argv[]) { } #else - sprintf(dumpfile, "thread_info-step%d.dat", j); + char dumpfile[30]; + snprintf(dumpfile, 30, "thread_info-step%d.dat", j); + FILE *file_thread; file_thread = fopen(dumpfile, "w"); for (int l = 0; l < e.sched.nr_tasks; l++) if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) @@ -616,26 +564,6 @@ int main(int argc, char *argv[]) { fclose(file_thread); #endif } - - /* Dump a line of aggregate output. */ - /* if (myrank == 0) { */ - /* printf("%i %e %.16e %.16e %.16e %.3e %.3e %i %.3e %.3e", j, e.time, - */ - /* e.ekin + e.epot, e.ekin, e.epot, e.dt, e.dt_step, - * e.count_step, */ - /* e.dt_min, e.dt_max); */ - /* for (k = 0; k < timer_count; k++) */ - /* printf(" %.3f", clocks_from_ticks(timers[k]); */ - /* printf("\n"); */ - /* fflush(stdout); */ - /* } */ - - /* if (myrank == 0) { */ - /* printf("%i %e", j, e.time); */ - /* printf(" %.3f", clocks_from_ticks(timers[timer_count - 1]); */ - /* printf("\n"); */ - /* fflush(stdout); */ - /* } */ } /* Print the values of the runner histogram. */ @@ -673,7 +601,7 @@ int main(int argc, char *argv[]) { } #ifdef WITH_MPI - if (MPI_Finalize() != MPI_SUCCESS) + if ((res = MPI_Finalize()) != MPI_SUCCESS) error("call to MPI_Finalize failed with error %i.", res); #endif diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml new file mode 100644 index 0000000000000000000000000000000000000000..b91e99baf383a399b72bfb73f1791ab7ac6f3d91 --- /dev/null +++ b/examples/parameter_example.yml @@ -0,0 +1,48 @@ + +# Define the system of units to use internally. +UnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + nr_threads: 2 # The number of threads per MPI rank to use. + nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. + cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). + cell_sub_size: 8000000 # Maximal number of interactions per sub-task (this is the default value). + cell_split_size: 400 # Maximal number of particles per cell (this is the default value). + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 1. # The tolerance for the targetted number of neighbours. + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. + max_smoothing_length: 3. # Maximal smoothing length allowed (in internal units). + +# Parameters related to the initial conditions +InitialConditions: + file_name: SedovBlast/sedov.hdf5 # The file to read + h_scaling: 1. # A scaling factor to apply to all smoothing lengths in the ICs. + shift_x: 0. # A shift to apply to all particles read from the ICs (in internal units). + shift_y: 0. + shift_z: 0. + +# Parameters govering domain decomposition +DomainDecomposition: + initial_type: m # The initial strategy ("g", "m", "w", or "v"). See documentation for details. + initial_grid_x: 10 # Grid size if the 'g' strategy is chosen. + initial_grid_y: 10 + initial_grid_z: 10 + repartition_type: b # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details. + diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index eaff41ebae1bad0f1307d23a3204186ecbc63b2f..895c32ef9c3d1490e6d30b7dc79e40171a228ee9 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -60,7 +60,7 @@ pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", "kick", "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", - "psort", "split_cell", "rewait", "count"] + "part_sort", "gpart_sort", "split_cell", "rewait", "count"] TASKCOLOURS = {"none": "black", "sort": "lightblue", @@ -77,7 +77,8 @@ TASKCOLOURS = {"none": "black", "grav_mm": "mediumturquoise", "grav_up": "mediumvioletred", "grav_down": "mediumnightblue", - "psort": "steelblue", + "part_sort": "steelblue", + "gpart_sort": "teal" , "split_cell": "seagreen", "rewait": "olive", "count": "powerblue"} diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py index b7d1823ad746d6a10b5e67fc9f7315b13be4649f..d59fe6417b524b8cb3cf8f6117fca3b8b3f3c780 100755 --- a/examples/plot_tasks_MPI.py +++ b/examples/plot_tasks_MPI.py @@ -66,7 +66,7 @@ pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", "kick", "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", - "psort", "split_cell", "rewait", "count"] + "part_sort", "gpart_sort", "split_cell", "rewait", "count"] TASKCOLOURS = {"none": "black", "sort": "lightblue", @@ -83,7 +83,8 @@ TASKCOLOURS = {"none": "black", "grav_mm": "mediumturquoise", "grav_up": "mediumvioletred", "grav_down": "mediumnightblue", - "psort": "steelblue", + "part_sort": "steelblue", + "gpart_sort": "teal", "split_cell": "seagreen", "rewait": "olive", "count": "powerblue"} diff --git a/examples/runs.sh b/examples/runs.sh deleted file mode 100755 index 339d8659675843f2491068ed8d30b528cb147c34..0000000000000000000000000000000000000000 --- a/examples/runs.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Set some global stuff -export OMP_WAIT_POLICY=PASSIVE - -# Generate the initial conditions if they are not present. -if [ ! -e SodShock/sodShock.hdf5 ] -then - echo "Generating initial conditions for the SodShock example..." - cd SodShock - python makeIC.py - cd .. -fi -if [ ! -e SedovBlast/sedov.hdf5 ] -then - echo "Generating initial conditions for the SedovBlast example..." - cd SedovBlast/ - python makeIC_fcc.py - cd .. -fi -if [ ! -e CosmoVolume/cosmoVolume.hdf5 ] -then - echo "Downloading initial conditions for the CosmoVolume example..." - cd CosmoVolume - ./getIC.sh - cd .. -fi - - -# Loop over number of cores -for cpu in {1..32} -do - - # Sod-Shock runs - if [ ! -e SodShock_${cpu}.dump ] - then - ./swift -t $cpu -f SodShock/sodShock.hdf5 -m 0.01 -w 5000 -c 1. -d 1e-7 -e 0.01 > SodShock_fixed_${cpu}.dump - fi - - # Sedov blast - if [ ! -e SedovBlast_${cpu}.dump ] - then - ./swift -t $cpu -f SedovBlast/sedov.hdf5 -m 0.02 -w 5000 -c 1. -d 1e-7 -e 0.01 > SedovBlast_fixed_${cpu}.dump - fi - - # Cosmological volume - if [ ! -e CosmoVolume_${cpu}.dump ] - then - ./swift -t $cpu -f CosmoVolume/cosmoVolume.hdf5 -m 0.6 -w 5000 -c 1. -d 1e-7 -e 0.01 > CosmoVolume_fixed_${cpu}.dump - fi - -done - diff --git a/src/Makefile.am b/src/Makefile.am index f44d47819672d10445fd969fe2ff20dbcb49463b..a96f35b3cf0d8a23aec4f8c0f8d16bec8638cbcd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -35,17 +35,17 @@ endif # List required headers include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \ - common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h + common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ - kernel.c tools.c part.c partition.c clocks.c + kernel_hydro.c kernel_gravity.c tools.c part.c partition.c clocks.c parser.c # Include files for distribution, not installation. -nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \ - runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h \ +nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ + vector.h runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h \ gravity.h gravity_io.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \ diff --git a/src/cell.c b/src/cell.c index df11782048dfa80c697f53feefe8fabc104eb23b..61acfaaea7a0af01a78ab773541564e9a2723f4e 100644 --- a/src/cell.c +++ b/src/cell.c @@ -45,6 +45,7 @@ /* Local headers. */ #include "atomic.h" #include "error.h" +#include "gravity.h" #include "hydro.h" #include "space.h" #include "timers.h" @@ -89,14 +90,18 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { c->ti_end_min = pc->ti_end_min; c->ti_end_max = pc->ti_end_max; c->count = pc->count; + c->gcount = pc->gcount; c->tag = pc->tag; - /* Fill the progeny recursively, depth-first. */ + /* Number of new cells created. */ int count = 1; + + /* Fill the progeny recursively, depth-first. */ for (int k = 0; k < 8; k++) if (pc->progeny[k] >= 0) { struct cell *temp = space_getcell(s); temp->count = 0; + temp->gcount = 0; temp->loc[0] = c->loc[0]; temp->loc[1] = c->loc[1]; temp->loc[2] = c->loc[2]; @@ -122,7 +127,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { } /** - * @brief Link the cells recursively to the given part array. + * @brief Link the cells recursively to the given #part array. * * @param c The #cell. * @param parts The #part array. @@ -130,7 +135,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { * @return The number of particles linked. */ -int cell_link(struct cell *c, struct part *parts) { +int cell_link_parts(struct cell *c, struct part *parts) { c->parts = parts; @@ -139,14 +144,40 @@ int cell_link(struct cell *c, struct part *parts) { int offset = 0; for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) - offset += cell_link(c->progeny[k], &parts[offset]); + offset += cell_link_parts(c->progeny[k], &parts[offset]); } } - /* Return the total number of unpacked cells. */ + /* Return the total number of linked particles. */ return c->count; } +/** + * @brief Link the cells recursively to the given #gpart array. + * + * @param c The #cell. + * @param gparts The #gpart array. + * + * @return The number of particles linked. + */ + +int cell_link_gparts(struct cell *c, struct gpart *gparts) { + + c->gparts = gparts; + + /* Fill the progeny recursively, depth-first. */ + if (c->split) { + int offset = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) + offset += cell_link_gparts(c->progeny[k], &gparts[offset]); + } + } + + /* Return the total number of linked particles. */ + return c->gcount; +} + /** * @brief Pack the data of the given cell and all it's sub-cells. * @@ -164,6 +195,7 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->ti_end_min = c->ti_end_min; pc->ti_end_max = c->ti_end_max; pc->count = c->count; + pc->gcount = c->gcount; c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag; /* Fill in the progeny, depth-first recursion. */ @@ -574,6 +606,27 @@ void cell_init_parts(struct cell *c, void *data) { c->ti_end_max = 0; } +/** + * @brief Initialises all g-particles to a valid state even if the ICs were + *stupid + * + * @param c Cell to act upon + * @param data Unused parameter + */ +void cell_init_gparts(struct cell *c, void *data) { + + struct gpart *gp = c->gparts; + const int gcount = c->gcount; + + for (int i = 0; i < gcount; ++i) { + gp[i].ti_begin = 0; + gp[i].ti_end = 0; + gravity_first_init_gpart(&gp[i]); + } + c->ti_end_min = 0; + c->ti_end_max = 0; +} + /** * @brief Converts hydro quantities to a valid state after the initial density *calculation diff --git a/src/cell.h b/src/cell.h index b0451b311fda9c300427da6b3a9a25955090d799..a471eac44bfd3533c4220ab8c5ff2ddec724e87f 100644 --- a/src/cell.h +++ b/src/cell.h @@ -44,7 +44,7 @@ struct pcell { int ti_end_min, ti_end_max; /* Number of particles in this cell. */ - int count; + int count, gcount; /* tag used for MPI communication. */ int tag; @@ -141,7 +141,7 @@ struct cell { double mass, e_pot, e_int, e_kin; /* Number of particles updated in this cell. */ - int updated; + int updated, g_updated; /* Linking pointer for "memory management". */ struct cell *next; @@ -175,8 +175,10 @@ void cell_gunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); int cell_getsize(struct cell *c); -int cell_link(struct cell *c, struct part *parts); +int cell_link_parts(struct cell *c, struct part *parts); +int cell_link_gparts(struct cell *c, struct gpart *gparts); void cell_init_parts(struct cell *c, void *data); +void cell_init_gparts(struct cell *c, void *data); void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell *c, void *data); diff --git a/src/common_io.c b/src/common_io.c index f6a4803333581b69671e3adc223b46122ec5364c..2a635723d5bd4db7bce0a0172e8c083bf479ac32 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -42,9 +42,12 @@ /* Local includes. */ #include "const.h" #include "error.h" -#include "kernel.h" +#include "kernel_hydro.h" #include "version.h" +const char* particle_type_names[NUM_PARTICLE_TYPES] = { + "Gas", "DM", "Boundary", "Dummy", "Star", "BH"}; + /** * @brief Converts a C data type to the HDF5 equivalent. * @@ -279,15 +282,15 @@ void writeUnitSystem(hid_t h_file, struct UnitSystem* us) { if (h_grpunit < 0) error("Error while creating Unit System group"); writeAttribute_d(h_grpunit, "Unit mass in cgs (U_M)", - getBaseUnit(us, UNIT_MASS)); + units_get_base_unit(us, UNIT_MASS)); writeAttribute_d(h_grpunit, "Unit length in cgs (U_L)", - getBaseUnit(us, UNIT_LENGTH)); + units_get_base_unit(us, UNIT_LENGTH)); writeAttribute_d(h_grpunit, "Unit time in cgs (U_t)", - getBaseUnit(us, UNIT_TIME)); + units_get_base_unit(us, UNIT_TIME)); writeAttribute_d(h_grpunit, "Unit current in cgs (U_I)", - getBaseUnit(us, UNIT_CURRENT)); + units_get_base_unit(us, UNIT_CURRENT)); writeAttribute_d(h_grpunit, "Unit temperature in cgs (U_T)", - getBaseUnit(us, UNIT_TEMPERATURE)); + units_get_base_unit(us, UNIT_TEMPERATURE)); H5Gclose(h_grpunit); } @@ -402,52 +405,68 @@ void createXMFfile() { *snapshot * * @param xmfFile The file to write in. - * @param Nparts The number of particles. * @param hdfFileName The name of the HDF5 file corresponding to this output. * @param time The current simulation time. */ -void writeXMFheader(FILE* xmfFile, long long Nparts, char* hdfFileName, - float time) { +void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time) { /* Write end of file */ + fprintf(xmfFile, "<!-- XMF description for file: %s -->\n", hdfFileName); fprintf(xmfFile, "<Grid GridType=\"Collection\" CollectionType=\"Spatial\">\n"); fprintf(xmfFile, "<Time Type=\"Single\" Value=\"%f\"/>\n", time); - fprintf(xmfFile, "<Grid Name=\"Gas\" GridType=\"Uniform\">\n"); - fprintf(xmfFile, - "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%lld\"/>\n", - Nparts); - fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n"); - fprintf(xmfFile, - "<DataItem Dimensions=\"%lld 3\" NumberType=\"Double\" " - "Precision=\"8\" " - "Format=\"HDF\">%s:/PartType0/Coordinates</DataItem>\n", - Nparts, hdfFileName); - fprintf(xmfFile, "</Geometry>"); } /** * @brief Writes the end of the XMF file (closes all open markups) * * @param xmfFile The file to write in. + * @param output The number of this output. + * @param time The current simulation time. */ -void writeXMFfooter(FILE* xmfFile) { +void writeXMFoutputfooter(FILE* xmfFile, int output, float time) { /* Write end of the section of this time step */ - fprintf(xmfFile, "\n</Grid>\n"); - fprintf(xmfFile, "</Grid>\n"); - fprintf(xmfFile, "\n</Grid>\n"); + fprintf(xmfFile, + "\n</Grid> <!-- End of meta-data for output=%03i, time=%f -->\n", + output, time); + fprintf(xmfFile, "\n</Grid> <!-- timeSeries -->\n"); fprintf(xmfFile, "</Domain>\n"); fprintf(xmfFile, "</Xdmf>\n"); fclose(xmfFile); } +void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N, + enum PARTICLE_TYPE ptype) { + fprintf(xmfFile, "\n<Grid Name=\"%s\" GridType=\"Uniform\">\n", + particle_type_names[ptype]); + fprintf(xmfFile, + "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%zi\"/>\n", N); + fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n"); + fprintf(xmfFile, + "<DataItem Dimensions=\"%zi 3\" NumberType=\"Double\" " + "Precision=\"8\" " + "Format=\"HDF\">%s:/PartType%d/Coordinates</DataItem>\n", + N, hdfFileName, ptype); + fprintf(xmfFile, + "</Geometry>\n <!-- Done geometry for %s, start of particle fields " + "list -->\n", + particle_type_names[ptype]); +} + +void writeXMFgroupfooter(FILE* xmfFile, enum PARTICLE_TYPE ptype) { + fprintf(xmfFile, "</Grid> <!-- End of meta-data for parttype=%s -->\n", + particle_type_names[ptype]); +} + /** * @brief Writes the lines corresponding to an array of the HDF5 output * * @param xmfFile The file in which to write * @param fileName The name of the HDF5 file associated to this XMF descriptor. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array in the HDF5 file. * @param N The number of particles. * @param dim The dimension of the quantity (1 for scalars, 3 for vectors). @@ -455,21 +474,21 @@ void writeXMFfooter(FILE* xmfFile) { * * @todo Treat the types in a better way. */ -void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, - int dim, enum DATA_TYPE type) { +void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName, + char* name, size_t N, int dim, enum DATA_TYPE type) { fprintf(xmfFile, "<Attribute Name=\"%s\" AttributeType=\"%s\" Center=\"Node\">\n", name, dim == 1 ? "Scalar" : "Vector"); if (dim == 1) fprintf(xmfFile, - "<DataItem Dimensions=\"%lld\" NumberType=\"Double\" " - "Precision=\"%d\" Format=\"HDF\">%s:/PartType0/%s</DataItem>\n", - N, type == FLOAT ? 4 : 8, fileName, name); + "<DataItem Dimensions=\"%zi\" NumberType=\"Double\" " + "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n", + N, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name); else fprintf(xmfFile, - "<DataItem Dimensions=\"%lld %d\" NumberType=\"Double\" " - "Precision=\"%d\" Format=\"HDF\">%s:/PartType0/%s</DataItem>\n", - N, dim, type == FLOAT ? 4 : 8, fileName, name); + "<DataItem Dimensions=\"%zi %d\" NumberType=\"Double\" " + "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n", + N, dim, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name); fprintf(xmfFile, "</Attribute>\n"); } @@ -483,13 +502,14 @@ void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, * @param gparts The array of #gpart freshly read in. * @param Ndm The number of DM particles read in. */ -void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) { +void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) { /* Let's give all these gparts a negative id */ for (size_t i = 0; i < Ndm; ++i) { /* 0 or negative ids are not allowed */ - if (gparts[i].id <= 0) error("0 or negative ID for DM particle"); + if (gparts[i].id <= 0) + error("0 or negative ID for DM particle %zd: ID=%lld", i, gparts[i].id); gparts[i].id = -gparts[i].id; } @@ -507,8 +527,9 @@ void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) { * @param Ngas The number of gas particles read in. * @param Ndm The number of DM particles read in. */ -void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, - size_t Ngas, size_t Ndm) { +void duplicate_hydro_gparts(struct part* const parts, + struct gpart* const gparts, size_t Ngas, + size_t Ndm) { for (size_t i = 0; i < Ngas; ++i) { @@ -537,16 +558,19 @@ void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, * @param dmparts The array of #gpart containg DM particles to be filled. * @param Ndm The number of DM particles. */ -void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts, - size_t Ndm) { +void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, + struct gpart* const dmparts, size_t Ndm) { size_t count = 0; /* Loop over all gparts */ for (size_t i = 0; i < Ntot; ++i) { + /* message("i=%zd count=%zd id=%lld part=%p", i, count, gparts[i].id, + * gparts[i].part); */ + /* And collect the DM ones */ - if (gparts[i].id < 0) { + if (gparts[i].id < 0LL) { memcpy(&dmparts[count], &gparts[i], sizeof(struct gpart)); dmparts[count].id = -dmparts[count].id; count++; diff --git a/src/common_io.h b/src/common_io.h index 2623a03f9a25ce0e650dde4f698da6eb49177e26..b7f3a1a317d69937dde8692eead8f00c75649477 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -24,6 +24,7 @@ #include "../config.h" /* Includes. */ +#include "kernel_hydro.h" #include "part.h" #include "units.h" @@ -70,14 +71,20 @@ enum PARTICLE_TYPE { NUM_PARTICLE_TYPES }; +extern const char* particle_type_names[]; + +#define FILENAME_BUFFER_SIZE 150 +#define PARTICLE_GROUP_BUFFER_SIZE 20 + hid_t hdf5Type(enum DATA_TYPE type); size_t sizeOfType(enum DATA_TYPE type); -void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts, - size_t Ndm); -void prepare_dm_gparts(struct gpart* gparts, size_t Ndm); -void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, - size_t Ngas, size_t Ndm); +void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, + struct gpart* const dmparts, size_t Ndm); +void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm); +void duplicate_hydro_gparts(struct part* const parts, + struct gpart* const gparts, size_t Ngas, + size_t Ndm); void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data); @@ -92,10 +99,13 @@ void writeAttribute_s(hid_t grp, char* name, const char* str); void createXMFfile(); FILE* prepareXMFfile(); -void writeXMFfooter(FILE* xmfFile); -void writeXMFheader(FILE* xmfFile, long long N, char* hdfFileName, float time); -void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, - int dim, enum DATA_TYPE type); +void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time); +void writeXMFoutputfooter(FILE* xmfFile, int outputCount, float time); +void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N, + enum PARTICLE_TYPE ptype); +void writeXMFgroupfooter(FILE* xmfFile, enum PARTICLE_TYPE ptype); +void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName, + char* name, size_t N, int dim, enum DATA_TYPE type); void writeCodeDescription(hid_t h_file); void writeSPHflavour(hid_t h_file); diff --git a/src/const.h b/src/const.h index 3bd9edff8227a87d040ec7309998364c946307af..6a52ec4796a4904629a57ffa8b32a3107bde263e 100644 --- a/src/const.h +++ b/src/const.h @@ -70,9 +70,4 @@ #define GADGET2_SPH //#define DEFAULT_SPH -/* System of units */ -#define const_unit_length_in_cgs 1 /* 3.08567810e16 /\* 1Mpc *\/ */ -#define const_unit_mass_in_cgs 1 /* 1.9891e33 /\* 1 M_sun *\/ */ -#define const_unit_velocity_in_cgs 1 /* 1e5 /\* km s^-1 *\/ */ - #endif /* SWIFT_CONST_H */ diff --git a/src/debug.c b/src/debug.c index 4c1434118c98aab7def28d3a53493767d249d774..53a03d66aee2c169a555ed00a2efa2d5b984066a 100644 --- a/src/debug.c +++ b/src/debug.c @@ -60,7 +60,7 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id, /* Look for the particle. */ for (size_t i = 0; i < N; i++) if (parts[i].id == id) { - printf("## Particle[%zd]:\n id=%lld", i, parts[i].id); + printf("## Particle[%zd]:\n id=%lld ", i, parts[i].id); hydro_debug_particle(&parts[i], &xparts[i]); found = 1; break; @@ -76,12 +76,12 @@ void printgParticle(struct gpart *gparts, long long int id, size_t N) { /* Look for the particle. */ for (size_t i = 0; i < N; i++) if (gparts[i].id == -id) { - printf("## gParticle[%zd] (DM) :\n id=%lld", i, -gparts[i].id); + printf("## gParticle[%zd] (DM) :\n id=%lld ", i, -gparts[i].id); gravity_debug_particle(&gparts[i]); found = 1; break; } else if (gparts[i].id > 0 && gparts[i].part->id == id) { - printf("## gParticle[%zd] (hydro) :\n id=%lld", i, gparts[i].id); + printf("## gParticle[%zd] (hydro) :\n id=%lld ", i, gparts[i].id); gravity_debug_particle(&gparts[i]); found = 1; break; diff --git a/src/engine.c b/src/engine.c index c34214c05b6fb45991208dd78689b58ba5d9731f..e49d6da779d4333a00a60da920144d92a9241305 100644 --- a/src/engine.c +++ b/src/engine.c @@ -56,10 +56,11 @@ #include "partition.h" #include "timers.h" -const char *engine_policy_names[12] = { - "none", "rand", "steal", "keep", - "block", "fix_dt", "cpu_tight", "mpi", - "numa_affinity", "hydro", "self_gravity", "external_gravity"}; +const char *engine_policy_names[13] = { + "none", "rand", "steal", "keep", + "block", "fix_dt", "cpu_tight", "mpi", + "numa_affinity", "hydro", "self_gravity", "external_gravity", + "cosmology_integration"}; /** The rank of the engine as a global variable (for messages). */ int engine_rank; @@ -87,14 +88,17 @@ struct link *engine_addlink(struct engine *e, struct link *l, struct task *t) { } /** - * @brief Generate the ghost and kick tasks for a hierarchy of cells. + * @brief Generate the ghosts all the O(Npart) tasks for a hierarchy of cells. + * + * Tasks are only created here. The dependencies will be added later on. * * @param e The #engine. * @param c The #cell. * @param super The super #cell. */ -void engine_mkghosts(struct engine *e, struct cell *c, struct cell *super) { +void engine_make_ghost_tasks(struct engine *e, struct cell *c, + struct cell *super) { struct scheduler *s = &e->sched; @@ -128,46 +132,64 @@ void engine_mkghosts(struct engine *e, struct cell *c, struct cell *super) { /* Recurse. */ if (c->split) for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) engine_mkghosts(e, c->progeny[k], super); + if (c->progeny[k] != NULL) + engine_make_ghost_tasks(e, c->progeny[k], super); } /** * @brief Redistribute the particles amongst the nodes according * to their cell's node IDs. * + * The strategy here is as follows: + * 1) Each node counts the number of particles it has to send to each other + * node. + * 2) The number of particles of each type is then exchanged. + * 3) The particles to send are placed in a temporary buffer in which the + * part-gpart links are preserved. + * 4) Each node allocates enough space for the new particles. + * 5) (Asynchronous) communications are issued to transfer the data. + * + * * @param e The #engine. */ - void engine_redistribute(struct engine *e) { #ifdef WITH_MPI - int nr_nodes = e->nr_nodes, nodeID = e->nodeID; + const int nr_nodes = e->nr_nodes; + const int nodeID = e->nodeID; struct space *s = e->s; - int my_cells = 0; - int *cdim = s->cdim; struct cell *cells = s->cells; - int nr_cells = s->nr_cells; + const int nr_cells = s->nr_cells; + const int *cdim = s->cdim; + const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + struct part *parts = s->parts; + struct xpart *xparts = s->xparts; + struct gpart *gparts = s->gparts; ticks tic = getticks(); - /* Start by sorting the particles according to their nodes and - getting the counts. The counts array is indexed as - count[from * nr_nodes + to]. */ - int *counts; - size_t *dest; - double ih[3], dim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; - if ((counts = (int *)malloc(sizeof(int) *nr_nodes *nr_nodes)) == NULL || - (dest = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL) - error("Failed to allocate count and dest buffers."); + /* Allocate temporary arrays to store the counts of particles to be sent + and the destination of each particle */ + int *counts, *g_counts; + if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate count temporary buffer."); + if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate gcount temporary buffer."); bzero(counts, sizeof(int) * nr_nodes * nr_nodes); - struct part *parts = s->parts; + bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes); + + // Allocate the destination index arrays. + int *dest, *g_dest; + if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) + error("Failed to allocate dest temporary buffer."); + if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) + error("Failed to allocate g_dest temporary buffer."); + + /* Get destination of each particle */ for (size_t k = 0; k < s->nr_parts; k++) { + + /* Periodic boundary conditions */ for (int j = 0; j < 3; j++) { if (parts[k].x[j] < 0.0) parts[k].x[j] += dim[j]; @@ -180,36 +202,121 @@ void engine_redistribute(struct engine *e) { error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].", cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */ dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ counts[nodeID * nr_nodes + dest[k]] += 1; } + + /* Sort the particles according to their cell index. */ space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose); + /* We need to re-link the gpart partners of parts. */ + int current_dest = dest[0]; + size_t count_this_dest = 0; + for (size_t k = 0; k < s->nr_parts; ++k) { + if (s->parts[k].gpart != NULL) { + + /* As the addresses will be invalidated by the communications, we will */ + /* instead store the absolute index from the start of the sub-array */ + /* of particles to be sent to a given node. */ + /* Recall that gparts without partners have a negative id. */ + /* We will restore the pointers on the receiving node later on. */ + if (dest[k] != current_dest) { + current_dest = dest[k]; + count_this_dest = 0; + } + + /* Debug */ + /* if(s->parts[k].gpart->id < 0) */ + /* error("Trying to link a partnerless gpart !"); */ + + s->parts[k].gpart->id = count_this_dest; + count_this_dest++; + } + } + + /* Get destination of each g-particle */ + for (size_t k = 0; k < s->nr_gparts; k++) { + + /* Periodic boundary conditions */ + for (int j = 0; j < 3; j++) { + if (gparts[k].x[j] < 0.0) + gparts[k].x[j] += dim[j]; + else if (gparts[k].x[j] >= dim[j]) + gparts[k].x[j] -= dim[j]; + } + const int cid = cell_getid(cdim, gparts[k].x[0] * ih[0], + gparts[k].x[1] * ih[1], gparts[k].x[2] * ih[2]); + /* if (cid < 0 || cid >= s->nr_cells) + error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].", + cid, k, g_parts[k].x[0], g_parts[k].x[1], g_parts[k].x[2]); */ + g_dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ + g_counts[nodeID * nr_nodes + g_dest[k]] += 1; + } + + /* Sort the gparticles according to their cell index. */ + space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose); + /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to allreduce particle transfer counts."); - /* Get the new number of parts for this node, be generous in allocating. */ - size_t nr_parts = 0; + /* Get all the g_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce gparticle transfer counts."); + + /* Each node knows how many parts and gparts will be transferred to every + other node. We can start preparing to receive data */ + + /* Get the new number of parts and gparts for this node */ + size_t nr_parts = 0, nr_gparts = 0; for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_gparts += g_counts[k * nr_nodes + nodeID]; + + /* Allocate the new arrays with some extra margin */ struct part *parts_new = NULL; - struct xpart *xparts_new = NULL, *xparts = s->xparts; + struct xpart *xparts_new = NULL; + struct gpart *gparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, - sizeof(struct part) * nr_parts * 1.2) != 0 || - posix_memalign((void **)&xparts_new, part_align, - sizeof(struct xpart) * nr_parts * 1.2) != 0) + sizeof(struct part) * nr_parts * + engine_redistribute_alloc_margin) != 0) error("Failed to allocate new part data."); - - /* Emit the sends and recvs for the particle data. */ + if (posix_memalign((void **)&xparts_new, xpart_align, + sizeof(struct xpart) * nr_parts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new xpart data."); + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * nr_gparts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new gpart data."); + + /* Prepare MPI requests for the asynchronous communications */ MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) == + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 6 * nr_nodes)) == NULL) error("Failed to allocate MPI request list."); - for (int k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; - for (size_t offset_send = 0, offset_recv = 0, k = 0; k < nr_nodes; k++) { - int ind_send = nodeID * nr_nodes + k; - int ind_recv = k * nr_nodes + nodeID; + for (int k = 0; k < 6 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; + + /* Emit the sends and recvs for the particle and gparticle data. */ + size_t offset_send = 0, offset_recv = 0; + size_t g_offset_send = 0, g_offset_recv = 0; + for (int k = 0; k < nr_nodes; k++) { + + /* Indices in the count arrays of the node of interest */ + const int ind_send = nodeID * nr_nodes + k; + const int ind_recv = k * nr_nodes + nodeID; + + /* Are we sending any part/xpart ? */ if (counts[ind_send] > 0) { + + /* message("Sending %d part to node %d", counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ if (k == nodeID) { memcpy(&parts_new[offset_recv], &s->parts[offset_send], sizeof(struct part) * counts[ind_recv]); @@ -217,36 +324,73 @@ void engine_redistribute(struct engine *e) { sizeof(struct xpart) * counts[ind_recv]); offset_send += counts[ind_send]; offset_recv += counts[ind_recv]; + + /* Else, emit some communications */ } else { - if (MPI_Isend(&s->parts[offset_send], counts[ind_send], - e->part_mpi_type, k, 2 * ind_send + 0, MPI_COMM_WORLD, - &reqs[4 * k]) != MPI_SUCCESS) - error("Failed to isend parts to node %zi.", k); - if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], - e->xpart_mpi_type, k, 2 * ind_send + 1, MPI_COMM_WORLD, - &reqs[4 * k + 1]) != MPI_SUCCESS) - error("Failed to isend xparts to node %zi.", k); + if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type, + k, 3 * ind_send + 0, MPI_COMM_WORLD, + &reqs[6 * k]) != MPI_SUCCESS) + error("Failed to isend parts to node %i.", k); + if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type, + k, 3 * ind_send + 1, MPI_COMM_WORLD, + &reqs[6 * k + 1]) != MPI_SUCCESS) + error("Failed to isend xparts to node %i.", k); offset_send += counts[ind_send]; } } + + /* Are we sending any gpart ? */ + if (g_counts[ind_send] > 0) { + + /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ + if (k == nodeID) { + memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send], + sizeof(struct gpart) * g_counts[ind_recv]); + g_offset_send += g_counts[ind_send]; + g_offset_recv += g_counts[ind_recv]; + + /* Else, emit some communications */ + } else { + if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send], + gpart_mpi_type, k, 3 * ind_send + 2, MPI_COMM_WORLD, + &reqs[6 * k + 2]) != MPI_SUCCESS) + error("Failed to isend gparts to node %i.", k); + g_offset_send += g_counts[ind_send]; + } + } + + /* Now emit the corresponding Irecv() */ + + /* Are we receiving any part/xpart from this node ? */ if (k != nodeID && counts[ind_recv] > 0) { - if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], e->part_mpi_type, - k, 2 * ind_recv + 0, MPI_COMM_WORLD, - &reqs[4 * k + 2]) != MPI_SUCCESS) - error("Failed to emit irecv of parts from node %zi.", k); - if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], - e->xpart_mpi_type, k, 2 * ind_recv + 1, MPI_COMM_WORLD, - &reqs[4 * k + 3]) != MPI_SUCCESS) - error("Failed to emit irecv of parts from node %zi.", k); + if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k, + 3 * ind_recv + 0, MPI_COMM_WORLD, + &reqs[6 * k + 3]) != MPI_SUCCESS) + error("Failed to emit irecv of parts from node %i.", k); + if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type, + k, 3 * ind_recv + 1, MPI_COMM_WORLD, + &reqs[6 * k + 4]) != MPI_SUCCESS) + error("Failed to emit irecv of xparts from node %i.", k); offset_recv += counts[ind_recv]; } + + /* Are we receiving any gpart from this node ? */ + if (k != nodeID && g_counts[ind_recv] > 0) { + if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv], + gpart_mpi_type, k, 3 * ind_recv + 2, MPI_COMM_WORLD, + &reqs[6 * k + 5]) != MPI_SUCCESS) + error("Failed to emit irecv of gparts from node %i.", k); + g_offset_recv += g_counts[ind_recv]; + } } /* Wait for all the sends and recvs to tumble in. */ - MPI_Status stats[4 * nr_nodes]; + MPI_Status stats[6 * nr_nodes]; int res; - if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { - for (int k = 0; k < 4 * nr_nodes; k++) { + if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 6 * nr_nodes; k++) { char buff[MPI_MAX_ERROR_STRING]; int res; MPI_Error_string(stats[k].MPI_ERROR, buff, &res); @@ -255,35 +399,90 @@ void engine_redistribute(struct engine *e) { error("Failed during waitall for part data."); } + /* We now need to restore the part<->gpart links */ + size_t offset_parts = 0, offset_gparts = 0; + for (int node = 0; node < nr_nodes; ++node) { + + const int ind_recv = node * nr_nodes + nodeID; + const size_t count_parts = counts[ind_recv]; + const size_t count_gparts = g_counts[ind_recv]; + + /* Loop over the gparts received from that node */ + for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) { + + /* Does this gpart have a partner ? */ + if (gparts_new[k].id >= 0) { + + const size_t partner_index = offset_parts + gparts_new[k].id; + + /* Re-link */ + gparts_new[k].part = &parts_new[partner_index]; + gparts_new[k].part->gpart = &gparts_new[k]; + } + } + + offset_parts += count_parts; + offset_gparts += count_gparts; + } + /* Verify that all parts are in the right place. */ - /* for ( k = 0 ; k < nr_parts ; k++ ) { - cid = cell_getid( cdim , parts_new[k].x[0]*ih[0] , parts_new[k].x[1]*ih[1] - , parts_new[k].x[2]*ih[2] ); + /* for ( int k = 0 ; k < nr_parts ; k++ ) { + int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0], + parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] ); if ( cells[ cid ].nodeID != nodeID ) - error( "Received particle (%i) that does not belong here (nodeID=%i)." - , k , cells[ cid ].nodeID ); - } */ + error( "Received particle (%i) that does not belong here + (nodeID=%i).", k , cells[ cid ].nodeID ); + } */ + + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < nr_gparts; ++k) { + + if (gparts_new[k].id > 0) { + + if (gparts_new[k].part->gpart != &gparts_new[k]) + error("Linking problem !"); + + if (gparts_new[k].x[0] != gparts_new[k].part->x[0] || + gparts_new[k].x[1] != gparts_new[k].part->x[1] || + gparts_new[k].x[2] != gparts_new[k].part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < nr_parts; ++k) { + + if (parts_new[k].gpart != NULL) { + + if (parts_new[k].gpart->part != &parts_new[k]) error("Linking problem !"); + } + } /* Set the new part data, free the old. */ free(parts); free(xparts); + free(gparts); s->parts = parts_new; s->xparts = xparts_new; + s->gparts = gparts_new; s->nr_parts = nr_parts; - s->size_parts = 1.2 * nr_parts; + s->nr_gparts = nr_gparts; + s->size_parts = engine_redistribute_alloc_margin * nr_parts; + s->size_gparts = engine_redistribute_alloc_margin * nr_gparts; - /* Be verbose about what just happened. */ - for (int k = 0; k < nr_cells; k++) - if (cells[k].nodeID == nodeID) my_cells += 1; - if (e->verbose) - message("node %i now has %zi parts in %i cells.", nodeID, nr_parts, - my_cells); - - /* Clean up other stuff. */ + /* Clean up the temporary stuff. */ free(reqs); free(counts); free(dest); + /* Be verbose about what just happened. */ + if (e->verbose) { + int my_cells = 0; + for (int k = 0; k < nr_cells; k++) + if (cells[k].nodeID == nodeID) my_cells += 1; + message("node %i now has %zi parts and %zi gparts in %i cells.", nodeID, + nr_parts, nr_gparts, my_cells); + } + if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -509,7 +708,7 @@ void engine_exchange_cells(struct engine *e) { /* Wait for each count to come in and start the recv. */ for (int k = 0; k < nr_proxies; k++) { - int pid; + int pid = MPI_UNDEFINED; if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || pid == MPI_UNDEFINED) error("MPI_Waitany failed."); @@ -529,7 +728,7 @@ void engine_exchange_cells(struct engine *e) { /* Wait for each pcell array to come in from the proxies. */ for (int k = 0; k < nr_proxies; k++) { - int pid; + int pid = MPI_UNDEFINED; if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || pid == MPI_UNDEFINED) error("MPI_Waitany failed."); @@ -545,31 +744,40 @@ void engine_exchange_cells(struct engine *e) { /* Count the number of particles we need to import and re-allocate the buffer if needed. */ - int count_in = 0; + int count_parts_in = 0, count_gparts_in = 0; for (int k = 0; k < nr_proxies; k++) - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) - count_in += e->proxies[k].cells_in[j]->count; - if (count_in > s->size_parts_foreign) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + count_parts_in += e->proxies[k].cells_in[j]->count; + count_gparts_in += e->proxies[k].cells_in[j]->gcount; + } + if (count_parts_in > s->size_parts_foreign) { if (s->parts_foreign != NULL) free(s->parts_foreign); - s->size_parts_foreign = 1.1 * count_in; + s->size_parts_foreign = 1.1 * count_parts_in; if (posix_memalign((void **)&s->parts_foreign, part_align, sizeof(struct part) * s->size_parts_foreign) != 0) error("Failed to allocate foreign part data."); } + if (count_gparts_in > s->size_gparts_foreign) { + if (s->gparts_foreign != NULL) free(s->gparts_foreign); + s->size_gparts_foreign = 1.1 * count_gparts_in; + if (posix_memalign((void **)&s->gparts_foreign, gpart_align, + sizeof(struct gpart) * s->size_gparts_foreign) != 0) + error("Failed to allocate foreign gpart data."); + } /* Unpack the cells and link to the particle data. */ struct part *parts = s->parts_foreign; + struct gpart *gparts = s->gparts_foreign; for (int k = 0; k < nr_proxies; k++) { for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - cell_link(e->proxies[k].cells_in[j], parts); + cell_link_parts(e->proxies[k].cells_in[j], parts); + cell_link_gparts(e->proxies[k].cells_in[j], gparts); parts = &parts[e->proxies[k].cells_in[j]->count]; + gparts = &gparts[e->proxies[k].cells_in[j]->gcount]; } } s->nr_parts_foreign = parts - s->parts_foreign; - - /* Is the parts buffer large enough? */ - if (s->nr_parts_foreign > s->size_parts_foreign) - error("Foreign parts buffer too small."); + s->nr_gparts_foreign = gparts - s->gparts_foreign; /* Free the pcell buffer. */ free(pcells); @@ -587,16 +795,24 @@ void engine_exchange_cells(struct engine *e) { * @brief Exchange straying parts with other nodes. * * @param e The #engine. - * @param offset The index in the parts array as of which the foreign parts - *reside. - * @param ind The ID of the foreign #cell. - * @param N The number of stray parts. + * @param offset_parts The index in the parts array as of which the foreign + * parts reside. + * @param ind_part The foreign #cell ID of each part. + * @param Npart The number of stray parts, contains the number of parts received + * on return. + * @param offset_gparts The index in the gparts array as of which the foreign + * parts reside. + * @param ind_gpart The foreign #cell ID of each gpart. + * @param Ngpart The number of stray gparts, contains the number of gparts + * received on return. * - * @return The number of arrived parts copied to parts and xparts. + * Note that this function does not mess-up the linkage between parts and + * gparts, i.e. the received particles have correct linkeage. */ -int engine_exchange_strays(struct engine *e, int offset, size_t *ind, - size_t N) { +void engine_exchange_strays(struct engine *e, size_t offset_parts, + int *ind_part, size_t *Npart, size_t offset_gparts, + int *ind_gpart, size_t *Ngpart) { #ifdef WITH_MPI @@ -606,25 +822,49 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Re-set the proxies. */ for (int k = 0; k < e->nr_proxies; k++) e->proxies[k].nr_parts_out = 0; - /* Put the parts into the corresponding proxies. */ - for (size_t k = 0; k < N; k++) { - const int node_id = e->s->cells[ind[k]].nodeID; + /* Put the parts and gparts into the corresponding proxies. */ + for (size_t k = 0; k < *Npart; k++) { + /* Get the target node and proxy ID. */ + const int node_id = e->s->cells[ind_part[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) error("Bad node ID %i.", node_id); const int pid = e->proxy_ind[node_id]; - if (pid < 0) + if (pid < 0) { error( "Do not have a proxy for the requested nodeID %i for part with " "id=%llu, x=[%e,%e,%e].", - node_id, s->parts[offset + k].id, s->parts[offset + k].x[0], - s->parts[offset + k].x[1], s->parts[offset + k].x[2]); - proxy_parts_load(&e->proxies[pid], &s->parts[offset + k], - &s->xparts[offset + k], 1); + node_id, s->parts[offset_parts + k].id, + s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1], + s->parts[offset_parts + k].x[2]); + } + + /* Re-link the associated gpart with the buffer offset of the part. */ + if (s->parts[offset_parts + k].gpart != NULL) { + s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_in; + } + + /* Load the part and xpart into the proxy. */ + proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k], + &s->xparts[offset_parts + k], 1); + } + for (size_t k = 0; k < *Ngpart; k++) { + const int node_id = e->s->cells[ind_gpart[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lli, x=[%e,%e,%e].", + node_id, s->gparts[offset_parts + k].id, + s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1], + s->gparts[offset_gparts + k].x[2]); + proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); } /* Launch the proxies. */ - MPI_Request reqs_in[2 * engine_maxproxies]; - MPI_Request reqs_out[2 * engine_maxproxies]; + MPI_Request reqs_in[3 * engine_maxproxies]; + MPI_Request reqs_out[3 * engine_maxproxies]; for (int k = 0; k < e->nr_proxies; k++) { proxy_parts_exch1(&e->proxies[k]); reqs_in[k] = e->proxies[k].req_parts_count_in; @@ -633,7 +873,7 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Wait for each count to come in and start the recv. */ for (int k = 0; k < e->nr_proxies; k++) { - int pid; + int pid = MPI_UNDEFINED; if (MPI_Waitany(e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE) != MPI_SUCCESS || pid == MPI_UNDEFINED) @@ -648,11 +888,18 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Count the total number of incoming particles and make sure we have enough space to accommodate them. */ - size_t count_in = 0; - for (int k = 0; k < e->nr_proxies; k++) count_in += e->proxies[k].nr_parts_in; - if (e->verbose) message("sent out %zi particles, got %zi back.", N, count_in); - if (offset + count_in > s->size_parts) { - s->size_parts = (offset + count_in) * 1.05; + int count_parts_in = 0; + int count_gparts_in = 0; + for (int k = 0; k < e->nr_proxies; k++) { + count_parts_in += e->proxies[k].nr_parts_in; + count_gparts_in += e->proxies[k].nr_gparts_in; + } + if (e->verbose) { + message("sent out %zi/%zi parts/gparts, got %i/%i back.", *Npart, *Ngpart, + count_parts_in, count_gparts_in); + } + if (offset_parts + count_parts_in > s->size_parts) { + s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; struct part *parts_new = NULL; struct xpart *xparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, @@ -660,37 +907,61 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, posix_memalign((void **)&xparts_new, part_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); - memcpy(parts_new, s->parts, sizeof(struct part) * offset); - memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset); + memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts); + memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts); free(s->parts); free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; } + if (offset_gparts + count_gparts_in > s->size_gparts) { + s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); + free(s->gparts); + s->gparts = gparts_new; + } /* Collect the requests for the particle data from the proxies. */ int nr_in = 0, nr_out = 0; for (int k = 0; k < e->nr_proxies; k++) { if (e->proxies[k].nr_parts_in > 0) { - reqs_in[2 * k] = e->proxies[k].req_parts_in; - reqs_in[2 * k + 1] = e->proxies[k].req_xparts_in; + reqs_in[3 * k] = e->proxies[k].req_parts_in; + reqs_in[3 * k + 1] = e->proxies[k].req_xparts_in; + nr_in += 2; + } else { + reqs_in[3 * k] = reqs_in[3 * k + 1] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_gparts_in > 0) { + reqs_in[3 * k + 2] = e->proxies[k].req_gparts_in; nr_in += 1; - } else - reqs_in[2 * k] = reqs_in[2 * k + 1] = MPI_REQUEST_NULL; + } else { + reqs_in[3 * k + 2] = MPI_REQUEST_NULL; + } if (e->proxies[k].nr_parts_out > 0) { - reqs_out[2 * k] = e->proxies[k].req_parts_out; - reqs_out[2 * k + 1] = e->proxies[k].req_xparts_out; + reqs_out[3 * k] = e->proxies[k].req_parts_out; + reqs_out[3 * k + 1] = e->proxies[k].req_xparts_out; + nr_out += 2; + } else { + reqs_out[3 * k] = reqs_out[3 * k + 1] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_gparts_out > 0) { + reqs_out[3 * k + 2] = e->proxies[k].req_gparts_out; nr_out += 1; - } else - reqs_out[2 * k] = reqs_out[2 * k + 1] = MPI_REQUEST_NULL; + } else { + reqs_out[3 * k + 2] = MPI_REQUEST_NULL; + } } /* Wait for each part array to come in and collect the new parts from the proxies. */ - size_t count = 0; - for (int k = 0; k < 2 * (nr_in + nr_out); k++) { + int count_parts = 0, count_gparts = 0; + for (int k = 0; k < nr_in; k++) { int err, pid; - if ((err = MPI_Waitany(2 * e->nr_proxies, reqs_in, &pid, + if ((err = MPI_Waitany(3 * e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; int res; @@ -698,26 +969,46 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, error("MPI_Waitany failed (%s).", buff); } if (pid == MPI_UNDEFINED) break; - // message( "request from proxy %i has arrived." , pid ); - if (reqs_in[pid & ~1] == MPI_REQUEST_NULL && - reqs_in[pid | 1] == MPI_REQUEST_NULL) { + // message( "request from proxy %i has arrived." , pid / 3 ); + pid = 3 * (pid / 3); + + /* If all the requests for a given proxy have arrived... */ + if (reqs_in[pid + 0] == MPI_REQUEST_NULL && + reqs_in[pid + 1] == MPI_REQUEST_NULL && + reqs_in[pid + 2] == MPI_REQUEST_NULL) { + /* Copy the particle data to the part/xpart/gpart arrays. */ struct proxy *p = &e->proxies[pid >> 1]; - memcpy(&s->parts[offset + count], p->parts_in, + memcpy(&s->parts[offset_parts + count_parts], p->parts_in, sizeof(struct part) * p->nr_parts_in); - memcpy(&s->xparts[offset + count], p->xparts_in, + memcpy(&s->xparts[offset_parts + count_parts], p->xparts_in, sizeof(struct xpart) * p->nr_parts_in); + memcpy(&s->gparts[offset_gparts + count_gparts], p->gparts_in, + sizeof(struct gpart) * p->nr_gparts_in); /* for (int k = offset; k < offset + count; k++) message( "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.", s->parts[k].id, s->parts[k].x[0], s->parts[k].x[1], s->parts[k].x[2], s->parts[k].h, p->nodeID); */ - count += p->nr_parts_in; + + /* Re-link the gparts. */ + for (int k = 0; k < p->nr_gparts_in; k++) { + struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k]; + if (gp->id >= 0) { + struct part *p = &s->parts[offset_gparts + count_parts + gp->id]; + gp->part = p; + p->gpart = gp; + } + } + + /* Advance the counters. */ + count_parts += p->nr_parts_in; + count_gparts += p->nr_gparts_in; } } /* Wait for all the sends to have finished too. */ if (nr_out > 0) - if (MPI_Waitall(2 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != + if (MPI_Waitall(3 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) error("MPI_Waitall on sends failed."); @@ -726,49 +1017,51 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, clocks_getunit()); /* Return the number of harvested parts. */ - return count; + *Npart = count_parts; + *Ngpart = count_gparts; #else error("SWIFT was not compiled with MPI support."); - return 0; #endif } /** - * @brief Fill the #space's task list. + * @brief Constructs the top-level pair tasks for the first hydro loop over + *neighbours * - * @param e The #engine we are working with. + * Here we construct all the tasks for all possible neighbouring non-empty + * local cells in the hierarchy. No dependencies are being added thus far. + * Additional loop over neighbours can later be added by simply duplicating + * all the tasks created by this function. + * + * @param e The #engine. */ - -void engine_maketasks(struct engine *e) { +void engine_make_hydroloop_tasks(struct engine *e) { struct space *s = e->s; struct scheduler *sched = &e->sched; - struct cell *cells = s->cells; - const int nr_cells = s->nr_cells; const int nodeID = e->nodeID; const int *cdim = s->cdim; - const ticks tic = getticks(); - - /* Re-set the scheduler. */ - scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell); - - /* Add the space sorting tasks. */ - for (int i = 0; i < e->nr_threads; i++) - scheduler_addtask(sched, task_type_psort, task_subtype_none, i, 0, NULL, - NULL, 0); + struct cell *cells = s->cells; /* Run through the highest level of cells and add pairs. */ - for (int i = 0; i < cdim[0]; i++) - for (int j = 0; j < cdim[1]; j++) + for (int i = 0; i < cdim[0]; i++) { + for (int j = 0; j < cdim[1]; j++) { for (int k = 0; k < cdim[2]; k++) { - int cid = cell_getid(cdim, i, j, k); - if (cells[cid].count == 0) continue; + + /* Get the cell */ + const int cid = cell_getid(cdim, i, j, k); struct cell *ci = &cells[cid]; + + /* Skip cells without hydro particles */ if (ci->count == 0) continue; + + /* If the cells is local build a self-interaction */ if (ci->nodeID == nodeID) scheduler_addtask(sched, task_type_self, task_subtype_density, 0, 0, ci, NULL, 0); + + /* Now loop over all the neighbours of this cell */ for (int ii = -1; ii < 2; ii++) { int iii = i + ii; if (!s->periodic && (iii < 0 || iii >= cdim[0])) continue; @@ -781,67 +1074,43 @@ void engine_maketasks(struct engine *e) { int kkk = k + kk; if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue; kkk = (kkk + cdim[2]) % cdim[2]; - int cjd = cell_getid(cdim, iii, jjj, kkk); + + /* Get the neighbouring cell */ + const int cjd = cell_getid(cdim, iii, jjj, kkk); struct cell *cj = &cells[cjd]; + + /* Is that neighbour local and does it have particles ? */ if (cid >= cjd || cj->count == 0 || (ci->nodeID != nodeID && cj->nodeID != nodeID)) continue; - int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; + + /* Construct the pair task */ + const int sid = + sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; scheduler_addtask(sched, task_type_pair, task_subtype_density, sid, 0, ci, cj, 1); } } } } + } + } +} - /* /\* Add the gravity mm tasks. *\/ */ - /* for (int i = 0; i < nr_cells; i++) */ - /* if (cells[i].gcount > 0) { */ - /* scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0, - */ - /* &cells[i], NULL, 0); */ - /* for (int j = i + 1; j < nr_cells; j++) */ - /* if (cells[j].gcount > 0) */ - /* scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, - * 0, */ - /* &cells[i], &cells[j], 0); */ - /* } */ - - /* Split the tasks. */ - scheduler_splittasks(sched); - - /* Allocate the list of cell-task links. The maximum number of links - is the number of cells (s->tot_cells) times the number of neighbours (27) - times the number of interaction types (2, density and force). */ - if (e->links != NULL) free(e->links); - e->size_links = s->tot_cells * 27 * 2; - if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL) - error("Failed to allocate cell-task links."); - e->nr_links = 0; +/** + * @brief Counts the tasks associated with one cell and constructs the links + * + * For each hydrodynamic task, construct the links with the corresponding cell. + * Similarly, construct the dependencies for all the sorting tasks. + * + * @param e The #engine. + */ +void engine_count_and_link_tasks(struct engine *e) { - /* /\* Add the gravity up/down tasks at the top-level cells and push them - * down. *\/ */ - /* for (int k = 0; k < nr_cells; k++) */ - /* if (cells[k].nodeID == nodeID && cells[k].gcount > 0) { */ - - /* /\* Create tasks at top level. *\/ */ - /* struct task *up = */ - /* scheduler_addtask(sched, task_type_grav_up, task_subtype_none, 0, - * 0, */ - /* &cells[k], NULL, 0); */ - /* struct task *down = */ - /* scheduler_addtask(sched, task_type_grav_down, task_subtype_none, 0, - * 0, */ - /* &cells[k], NULL, 0); */ - - /* /\* Push tasks down the cell hierarchy. *\/ */ - /* engine_addtasks_grav(e, &cells[k], up, down); */ - /* } */ + struct scheduler *sched = &e->sched; + const int nr_tasks = sched->nr_tasks; - /* Count the number of tasks associated with each cell and - store the density tasks in each cell, and make each sort - depend on the sorts of its progeny. */ - for (int k = 0; k < sched->nr_tasks; k++) { + for (int k = 0; k < nr_tasks; k++) { /* Get the current task. */ struct task *t = &sched->tasks[k]; @@ -896,16 +1165,27 @@ void engine_maketasks(struct engine *e) { /* } */ /* } */ } +} - /* Append a ghost task to each cell, and add kick tasks to the - super cells. */ - for (int k = 0; k < nr_cells; k++) engine_mkghosts(e, &cells[k], NULL); +/** + * @brief Duplicates the first hydro loop and construct all the + * dependencies for the hydro part + * + * This is done by looping over all the previously constructed tasks + * and adding another task involving the same cells but this time + * corresponding to the second hydro loop over neighbours. + * With all the relevant tasks for a given cell available, we construct + * all the dependencies for that cell. + * + * @param e The #engine. + */ +void engine_make_extra_hydroloop_tasks(struct engine *e) { - /* Run through the tasks and make force tasks for each density task. - Each force task depends on the cell ghosts and unlocks the kick task - of its super-cell. */ - int sched_nr_tasks = sched->nr_tasks; - for (int k = 0; k < sched_nr_tasks; k++) { + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int nr_tasks = sched->nr_tasks; + + for (int k = 0; k < nr_tasks; k++) { /* Get a pointer to the task. */ struct task *t = &sched->tasks[k]; @@ -915,20 +1195,39 @@ void engine_maketasks(struct engine *e) { /* Self-interaction? */ if (t->type == task_type_self && t->subtype == task_subtype_density) { - scheduler_addunlock(sched, t->ci->super->init, t); - scheduler_addunlock(sched, t, t->ci->super->ghost); + + /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask( sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0); - scheduler_addunlock(sched, t->ci->super->ghost, t2); - scheduler_addunlock(sched, t2, t->ci->super->kick); + + /* Add the link between the new loop and the cell */ t->ci->force = engine_addlink(e, t->ci->force, t2); atomic_inc(&t->ci->nr_force); + + /* Now, build all the dependencies for the hydro */ + /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ + scheduler_addunlock(sched, t->ci->super->init, t); + scheduler_addunlock(sched, t, t->ci->super->ghost); + scheduler_addunlock(sched, t->ci->super->ghost, t2); + scheduler_addunlock(sched, t2, t->ci->super->kick); } /* Otherwise, pair interaction? */ else if (t->type == task_type_pair && t->subtype == task_subtype_density) { + + /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask( sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0); + + /* Add the link between the new loop and both cells */ + t->ci->force = engine_addlink(e, t->ci->force, t2); + atomic_inc(&t->ci->nr_force); + t->cj->force = engine_addlink(e, t->cj->force, t2); + atomic_inc(&t->cj->nr_force); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super-cells */ + /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ if (t->ci->nodeID == nodeID) { scheduler_addunlock(sched, t->ci->super->init, t); scheduler_addunlock(sched, t, t->ci->super->ghost); @@ -941,17 +1240,27 @@ void engine_maketasks(struct engine *e) { scheduler_addunlock(sched, t->cj->super->ghost, t2); scheduler_addunlock(sched, t2, t->cj->super->kick); } - t->ci->force = engine_addlink(e, t->ci->force, t2); - atomic_inc(&t->ci->nr_force); - t->cj->force = engine_addlink(e, t->cj->force, t2); - atomic_inc(&t->cj->nr_force); } /* Otherwise, sub interaction? */ else if (t->type == task_type_sub && t->subtype == task_subtype_density) { + + /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask(sched, task_type_sub, task_subtype_force, t->flags, 0, t->ci, t->cj, 0); + + /* Add the link between the new loop and both cells */ + t->ci->force = engine_addlink(e, t->ci->force, t2); + atomic_inc(&t->ci->nr_force); + if (t->cj != NULL) { + t->cj->force = engine_addlink(e, t->cj->force, t2); + atomic_inc(&t->cj->nr_force); + } + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super-cells */ + /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ if (t->ci->nodeID == nodeID) { scheduler_addunlock(sched, t, t->ci->super->ghost); scheduler_addunlock(sched, t->ci->super->ghost, t2); @@ -963,40 +1272,166 @@ void engine_maketasks(struct engine *e) { scheduler_addunlock(sched, t->cj->super->ghost, t2); scheduler_addunlock(sched, t2, t->cj->super->kick); } - t->ci->force = engine_addlink(e, t->ci->force, t2); - atomic_inc(&t->ci->nr_force); - if (t->cj != NULL) { - t->cj->force = engine_addlink(e, t->cj->force, t2); - atomic_inc(&t->cj->nr_force); - } } /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */ /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */ /* scheduler_addunlock(sched, t->ci->grav_down, t); */ } +} -/* Add the communication tasks if MPI is being used. */ -#ifdef WITH_MPI +/** + * @brief Constructs the top-level pair tasks for the gravity M-M interactions + * + * Correct implementation is still lacking here. + * + * @param e The #engine. + */ +void engine_make_gravityinteraction_tasks(struct engine *e) { + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nr_cells = s->nr_cells; + struct cell *cells = s->cells; - /* Loop over the proxies. */ - for (int pid = 0; pid < e->nr_proxies; pid++) { + /* Loop over all cells. */ + for (int i = 0; i < nr_cells; i++) { - /* Get a handle on the proxy. */ - struct proxy *p = &e->proxies[pid]; + /* If it has gravity particles, add a self-task */ + if (cells[i].gcount > 0) { + scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0, + &cells[i], NULL, 0); - /* Loop through the proxy's incoming cells and add the - recv tasks. */ - for (int k = 0; k < p->nr_cells_in; k++) - engine_addtasks_recv(e, p->cells_in[k], NULL, NULL); + /* Loop over all remainding cells */ + for (int j = i + 1; j < nr_cells; j++) { - /* Loop through the proxy's outgoing cells and add the - send tasks. */ - for (int k = 0; k < p->nr_cells_out; k++) - engine_addtasks_send(e, p->cells_out[k], p->cells_in[0]); + /* If that other cell has gravity parts, add a pair interaction */ + if (cells[j].gcount > 0) { + scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0, + &cells[i], &cells[j], 0); + } + } + } } +} -#endif +/** + * @brief Constructs the gravity tasks building the multipoles and propagating + *them to the children + * + * Correct implementation is still lacking here. + * + * @param e The #engine. + */ +void engine_make_gravityrecursive_tasks(struct engine *e) { + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int nr_cells = s->nr_cells; + struct cell *cells = s->cells; + + for (int k = 0; k < nr_cells; k++) { + + /* Only do this for local cells containing gravity particles */ + if (cells[k].nodeID == nodeID && cells[k].gcount > 0) { + + /* Create tasks at top level. */ + struct task *up = + scheduler_addtask(sched, task_type_grav_up, task_subtype_none, 0, 0, + &cells[k], NULL, 0); + struct task *down = + scheduler_addtask(sched, task_type_grav_down, task_subtype_none, 0, 0, + &cells[k], NULL, 0); + + /* Push tasks down the cell hierarchy. */ + engine_addtasks_grav(e, &cells[k], up, down); + } + } +} + +/** + * @brief Fill the #space's task list. + * + * @param e The #engine we are working with. + */ +void engine_maketasks(struct engine *e) { + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + struct cell *cells = s->cells; + const int nr_cells = s->nr_cells; + const ticks tic = getticks(); + + /* Re-set the scheduler. */ + scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell); + + /* Add the space sorting tasks. */ + for (int i = 0; i < e->nr_threads; i++) { + scheduler_addtask(sched, task_type_part_sort, task_subtype_none, i, 0, NULL, + NULL, 0); + scheduler_addtask(sched, task_type_gpart_sort, task_subtype_none, i, 0, + NULL, NULL, 0); + } + + /* Construct the firt hydro loop over neighbours */ + engine_make_hydroloop_tasks(e); + + /* Add the gravity mm tasks. */ + if ((e->policy & engine_policy_self_gravity) == engine_policy_self_gravity) + engine_make_gravityinteraction_tasks(e); + + /* Split the tasks. */ + scheduler_splittasks(sched); + + /* Allocate the list of cell-task links. The maximum number of links + is the number of cells (s->tot_cells) times the number of neighbours (27) + times the number of interaction types (2, density and force). */ + if (e->links != NULL) free(e->links); + e->size_links = s->tot_cells * 27 * 2; + if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL) + error("Failed to allocate cell-task links."); + e->nr_links = 0; + + /* Add the gravity up/down tasks at the top-level cells and push them down. */ + if ((e->policy & engine_policy_self_gravity) == engine_policy_self_gravity) + engine_make_gravityrecursive_tasks(e); + + /* Count the number of tasks associated with each cell and + store the density tasks in each cell, and make each sort + depend on the sorts of its progeny. */ + engine_count_and_link_tasks(e); + + /* Append a ghost task to each cell, and add kick tasks to the + super cells. */ + for (int k = 0; k < nr_cells; k++) + engine_make_ghost_tasks(e, &cells[k], NULL); + + /* Run through the tasks and make force tasks for each density task. + Each force task depends on the cell ghosts and unlocks the kick task + of its super-cell. */ + engine_make_extra_hydroloop_tasks(e); + + /* Add the communication tasks if MPI is being used. */ + if ((e->policy & engine_policy_mpi) == engine_policy_mpi) { + + /* Loop over the proxies. */ + for (int pid = 0; pid < e->nr_proxies; pid++) { + + /* Get a handle on the proxy. */ + struct proxy *p = &e->proxies[pid]; + + /* Loop through the proxy's incoming cells and add the + recv tasks. */ + for (int k = 0; k < p->nr_cells_in; k++) + engine_addtasks_recv(e, p->cells_in[k], NULL, NULL); + + /* Loop through the proxy's outgoing cells and add the + send tasks. */ + for (int k = 0; k < p->nr_cells_out; k++) + engine_addtasks_send(e, p->cells_out[k], p->cells_in[0]); + } + } /* Set the unlocks per task. */ scheduler_set_unlocks(sched); @@ -1024,9 +1459,10 @@ void engine_maketasks(struct engine *e) { int engine_marktasks(struct engine *e) { struct scheduler *s = &e->sched; - const int nr_tasks = s->nr_tasks, *ind = s->tasks_ind; + const int ti_end = e->ti_current; + const int nr_tasks = s->nr_tasks; + const int *const ind = s->tasks_ind; struct task *tasks = s->tasks; - const float ti_end = e->ti_current; const ticks tic = getticks(); /* Much less to do here if we're on a fixed time-step. */ @@ -1126,6 +1562,7 @@ int engine_marktasks(struct engine *e) { else if (t->type == task_type_kick) { t->skip = (t->ci->ti_end_min > ti_end); t->ci->updated = 0; + t->ci->g_updated = 0; } /* Drift? */ @@ -1182,6 +1619,7 @@ void engine_print_task_counts(struct engine *e) { printf(" skipped=%i ]\n", counts[task_type_count]); fflush(stdout); message("nr_parts = %zi.", e->s->nr_parts); + message("nr_gparts = %zi.", e->s->nr_gparts); } /** @@ -1192,7 +1630,7 @@ void engine_print_task_counts(struct engine *e) { void engine_rebuild(struct engine *e) { - ticks tic = getticks(); + const ticks tic = getticks(); /* Clear the forcerebuild flag, whatever it was. */ e->forcerebuild = 0; @@ -1213,7 +1651,7 @@ void engine_rebuild(struct engine *e) { error("engine_marktasks failed after space_rebuild."); /* Print the status of the system */ - engine_print_task_counts(e); + if (e->verbose) engine_print_task_counts(e); if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -1235,7 +1673,7 @@ void engine_prepare(struct engine *e) { /* Collect the values of rebuild from all nodes. */ #ifdef WITH_MPI - int buff; + int buff = 0; if (MPI_Allreduce(&rebuild, &buff, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate the rebuild flag across nodes."); @@ -1311,7 +1749,7 @@ void engine_collect_kick(struct cell *c) { if (c->kick != NULL) return; /* Counters for the different quantities. */ - int updated = 0; + int updated = 0, g_updated = 0; double e_kin = 0.0, e_int = 0.0, e_pot = 0.0; float mom[3] = {0.0f, 0.0f, 0.0f}, ang[3] = {0.0f, 0.0f, 0.0f}; int ti_end_min = max_nr_timesteps, ti_end_max = 0; @@ -1334,6 +1772,7 @@ void engine_collect_kick(struct cell *c) { ti_end_min = min(ti_end_min, cp->ti_end_min); ti_end_max = max(ti_end_max, cp->ti_end_max); updated += cp->updated; + g_updated += cp->g_updated; e_kin += cp->e_kin; e_int += cp->e_int; e_pot += cp->e_pot; @@ -1351,6 +1790,7 @@ void engine_collect_kick(struct cell *c) { c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; c->updated = updated; + c->g_updated = g_updated; c->e_kin = e_kin; c->e_int = e_int; c->e_pot = e_pot; @@ -1414,7 +1854,15 @@ void engine_init_particles(struct engine *e) { /* Make sure all particles are ready to go */ /* i.e. clean-up any stupid state in the ICs */ - space_map_cells_pre(s, 1, cell_init_parts, NULL); + if ((e->policy & engine_policy_hydro) == engine_policy_hydro) { + space_map_cells_pre(s, 1, cell_init_parts, NULL); + } + if (((e->policy & engine_policy_self_gravity) == + engine_policy_self_gravity) || + ((e->policy & engine_policy_external_gravity) == + engine_policy_external_gravity)) { + space_map_cells_pre(s, 1, cell_init_gparts, NULL); + } engine_prepare(e); @@ -1488,7 +1936,7 @@ void engine_init_particles(struct engine *e) { */ void engine_step(struct engine *e) { - int updates = 0; + int updates = 0, g_updates = 0; int ti_end_min = max_nr_timesteps, ti_end_max = 0; double e_pot = 0.0, e_int = 0.0, e_kin = 0.0; float mom[3] = {0.0, 0.0, 0.0}; @@ -1515,6 +1963,7 @@ void engine_step(struct engine *e) { e_int += c->e_int; e_pot += c->e_pot; updates += c->updated; + g_updates += c->g_updated; mom[0] += c->mom[0]; mom[1] += c->mom[1]; mom[2] += c->mom[2]; @@ -1526,7 +1975,8 @@ void engine_step(struct engine *e) { /* Aggregate the data from the different nodes. */ #ifdef WITH_MPI { - int in_i[4], out_i[4]; + int in_i[1], out_i[1]; + in_i[0] = 0; out_i[0] = ti_end_min; if (MPI_Allreduce(out_i, in_i, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD) != MPI_SUCCESS) @@ -1539,18 +1989,20 @@ void engine_step(struct engine *e) { ti_end_max = in_i[0]; } { - double in_d[4], out_d[4]; + double in_d[5], out_d[5]; out_d[0] = updates; - out_d[1] = e_kin; - out_d[2] = e_int; - out_d[3] = e_pot; - if (MPI_Allreduce(out_d, in_d, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) != + out_d[1] = g_updates; + out_d[2] = e_kin; + out_d[3] = e_int; + out_d[4] = e_pot; + if (MPI_Allreduce(out_d, in_d, 5, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate energies."); updates = in_d[0]; - e_kin = in_d[1]; - e_int = in_d[2]; - e_pot = in_d[3]; + g_updates = in_d[1]; + e_kin = in_d[2]; + e_int = in_d[3]; + e_pot = in_d[4]; } #endif @@ -1575,8 +2027,8 @@ void engine_step(struct engine *e) { if (e->nodeID == 0) { /* Print some information to the screen */ - printf("%d %e %e %d %.3f\n", e->step, e->time, e->timeStep, updates, - e->wallclock_time); + printf(" %6d %14e %14e %10d %10d %21.3f\n", e->step, e->time, e->timeStep, + updates, g_updates, e->wallclock_time); fflush(stdout); /* Write some energy statistics */ @@ -1779,7 +2231,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { engine_makeproxies(e); /* Re-allocate the local parts. */ - if (e->nodeID == 0) + if (e->verbose) message("Re-allocating parts array from %zi to %zi.", s->size_parts, (size_t)(s->nr_parts * 1.2)); s->size_parts = s->nr_parts * 1.2; @@ -1787,7 +2239,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { struct xpart *xparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, sizeof(struct part) * s->size_parts) != 0 || - posix_memalign((void **)&xparts_new, part_align, + posix_memalign((void **)&xparts_new, xpart_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); memcpy(parts_new, s->parts, sizeof(struct part) * s->nr_parts); @@ -1796,6 +2248,50 @@ void engine_split(struct engine *e, struct partition *initial_partition) { free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; + + /* Re-link the gparts. */ + for (size_t k = 0; k < s->nr_parts; k++) + if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; + + /* Re-allocate the local gparts. */ + if (e->verbose) + message("Re-allocating gparts array from %zi to %zi.", s->size_gparts, + (size_t)(s->nr_gparts * 1.2)); + s->size_gparts = s->nr_gparts * 1.2; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->nr_gparts); + free(s->gparts); + s->gparts = gparts_new; + + /* Re-link the parts. */ + for (size_t k = 0; k < s->nr_gparts; k++) + if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k]; + + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < s->nr_gparts; ++k) { + + if (s->gparts[k].id > 0) { + + if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !"); + + if (s->gparts[k].x[0] != s->gparts[k].part->x[0] || + s->gparts[k].x[1] != s->gparts[k].part->x[1] || + s->gparts[k].x[2] != s->gparts[k].part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < s->nr_parts; ++k) { + + if (s->parts[k].gpart != NULL) { + + if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !"); + } + } + #else error("SWIFT was not compiled with MPI support."); #endif @@ -1825,30 +2321,25 @@ static bool hyperthreads_present(void) { * * @param e The #engine. * @param s The #space in which this #runner will run. - * @param dt The initial time step to use. - * @param nr_threads The number of threads to spawn. - * @param nr_queues The number of task queues to create. + * @param params The parsed parameter file. * @param nr_nodes The number of MPI ranks. * @param nodeID The MPI rank of this node. * @param policy The queuing policy to use. - * @param timeBegin Time at the begininning of the simulation. - * @param timeEnd Time at the end of the simulation. - * @param dt_min Minimal allowed timestep (unsed with fixdt policy) - * @param dt_max Maximal allowed timestep * @param verbose Is this #engine talkative ? */ -void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, - int nr_queues, int nr_nodes, int nodeID, int policy, - float timeBegin, float timeEnd, float dt_min, float dt_max, - int verbose) { +void engine_init(struct engine *e, struct space *s, + const struct swift_params *params, int nr_nodes, int nodeID, + int policy, int verbose) { + + /* Clean-up everything */ + bzero(e, sizeof(struct engine)); /* Store the values. */ e->s = s; - e->nr_threads = nr_threads; + e->nr_threads = parser_get_param_int(params, "Scheduler:nr_threads"); e->policy = policy; e->step = 0; - e->nullstep = 0; e->nr_nodes = nr_nodes; e->nodeID = nodeID; e->proxy_ind = NULL; @@ -1857,23 +2348,29 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, e->forcerepart = REPART_NONE; e->links = NULL; e->nr_links = 0; - e->timeBegin = timeBegin; - e->timeEnd = timeEnd; - e->timeOld = timeBegin; - e->time = timeBegin; + e->timeBegin = parser_get_param_double(params, "TimeIntegration:time_begin"); + e->timeEnd = parser_get_param_double(params, "TimeIntegration:time_end"); + e->timeOld = e->timeBegin; + e->time = e->timeBegin; e->ti_old = 0; e->ti_current = 0; e->timeStep = 0.; - e->dt_min = dt_min; - e->dt_max = dt_max; + e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min"); + e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max"); e->file_stats = NULL; e->verbose = verbose; + e->count_step = 0; e->wallclock_time = 0.f; engine_rank = nodeID; /* Make the space link back to the engine. */ s->e = e; + /* Get the number of queues */ + int nr_queues = parser_get_param_int(params, "Scheduler:nr_queues"); + if (nr_queues <= 0) nr_queues = e->nr_threads; + s->nr_queues = nr_queues; + #if defined(HAVE_SETAFFINITY) const int nr_cores = sysconf(_SC_NPROCESSORS_ONLN); int cpuid[nr_cores]; @@ -1969,22 +2466,29 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, engine_print_policy(e); /* Print information about the hydro scheme */ - if (e->nodeID == 0) message("Hydrodynamic scheme: %s", SPH_IMPLEMENTATION); + if ((e->policy & engine_policy_hydro) == engine_policy_hydro) { + if (e->nodeID == 0) message("Hydrodynamic scheme: %s.", SPH_IMPLEMENTATION); + if (e->nodeID == 0) + message("Hydrodynamic kernel: %s with %.2f +/- %.2f neighbours.", + kernel_name, kernel_nwneigh, const_delta_nwneigh); + } /* Check we have sensible time bounds */ - if (timeBegin >= timeEnd) + if (e->timeBegin >= e->timeEnd) error( "Final simulation time (t_end = %e) must be larger than the start time " "(t_beg = %e)", - timeEnd, timeBegin); + e->timeEnd, e->timeBegin); - /* Check we have sensible time step bounds */ + /* Check we have sensible time-step values */ if (e->dt_min > e->dt_max) error( - "Minimal time step size must be smaller than maximal time step size "); + "Minimal time-step size (%e) must be smaller than maximal time-step " + "size (%e)", + e->dt_min, e->dt_max); /* Deal with timestep */ - e->timeBase = (timeEnd - timeBegin) / max_nr_timesteps; + e->timeBase = (e->timeEnd - e->timeBegin) / max_nr_timesteps; e->ti_current = 0; /* Fixed time-step case */ @@ -2003,12 +2507,12 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, if (e->nodeID == 0) { message("Absolute minimal timestep size: %e", e->timeBase); - float dt_min = timeEnd - timeBegin; + float dt_min = e->timeEnd - e->timeBegin; while (dt_min > e->dt_min) dt_min /= 2.f; message("Minimal timestep size (on time-line): %e", dt_min); - float dt_max = timeEnd - timeBegin; + float dt_max = e->timeEnd - e->timeBegin; while (dt_max > e->dt_max) dt_max /= 2.f; message("Maximal timestep size (on time-line): %e", dt_max); @@ -2027,8 +2531,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, /* Construct types for MPI communications */ #ifdef WITH_MPI - part_create_mpi_type(&e->part_mpi_type); - xpart_create_mpi_type(&e->xpart_mpi_type); + part_create_mpi_types(); #endif /* First of all, init the barrier and lock it. */ @@ -2043,23 +2546,26 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, e->barrier_launchcount = 0; /* Init the scheduler with enough tasks for the initial sorting tasks. */ - int nr_tasks = 2 * s->tot_cells + e->nr_threads; + const int nr_tasks = 2 * s->tot_cells + 2 * e->nr_threads; scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, scheduler_flag_steal, e->nodeID); - s->nr_queues = nr_queues; /* Create the sorting tasks. */ - for (int i = 0; i < e->nr_threads; i++) - scheduler_addtask(&e->sched, task_type_psort, task_subtype_none, i, 0, NULL, - NULL, 0); + for (int i = 0; i < e->nr_threads; i++) { + scheduler_addtask(&e->sched, task_type_part_sort, task_subtype_none, i, 0, + NULL, NULL, 0); + + scheduler_addtask(&e->sched, task_type_gpart_sort, task_subtype_none, i, 0, + NULL, NULL, 0); + } scheduler_ranktasks(&e->sched); /* Allocate and init the threads. */ - if ((e->runners = - (struct runner *)malloc(sizeof(struct runner) * nr_threads)) == NULL) + if ((e->runners = (struct runner *)malloc(sizeof(struct runner) * + e->nr_threads)) == NULL) error("Failed to allocate threads array."); - for (int k = 0; k < nr_threads; k++) { + for (int k = 0; k < e->nr_threads; k++) { e->runners[k].id = k; e->runners[k].e = e; e->barrier_running += 1; @@ -2071,7 +2577,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, /* Set a reasonable queue ID. */ e->runners[k].cpuid = cpuid[k % nr_cores]; - if (nr_queues < nr_threads) + if (nr_queues < e->nr_threads) e->runners[k].qid = cpuid[k % nr_cores] * nr_queues / nr_cores; else e->runners[k].qid = k; @@ -2090,7 +2596,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, #endif } else { e->runners[k].cpuid = k; - e->runners[k].qid = k * nr_queues / nr_threads; + e->runners[k].qid = k * nr_queues / e->nr_threads; } // message( "runner %i on cpuid=%i with qid=%i." , e->runners[k].id , // e->runners[k].cpuid , e->runners[k].qid ); diff --git a/src/engine.h b/src/engine.h index 741ae1f553494e435394f529606b4cb794b0e3d2..e1c3f61d1293fc01e24b9bcb0673d75fa3ce4648 100644 --- a/src/engine.h +++ b/src/engine.h @@ -38,6 +38,7 @@ #include "scheduler.h" #include "space.h" #include "task.h" +#include "parser.h" #include "partition.h" /* Some constants. */ @@ -53,7 +54,8 @@ enum engine_policy { engine_policy_setaffinity = (1 << 7), engine_policy_hydro = (1 << 8), engine_policy_self_gravity = (1 << 9), - engine_policy_external_gravity = (1 << 10) + engine_policy_external_gravity = (1 << 10), + engine_policy_cosmology = (1 << 11) }; extern const char *engine_policy_names[]; @@ -62,6 +64,8 @@ extern const char *engine_policy_names[]; #define engine_maxtaskspercell 96 #define engine_maxproxies 64 #define engine_tasksreweight 10 +#define engine_parts_size_grow 1.05 +#define engine_redistribute_alloc_margin 1.2 /* The rank of the engine as a global variable (for messages). */ extern int engine_rank; @@ -124,7 +128,7 @@ struct engine { FILE *file_stats; /* The current step number. */ - int step, nullstep; + int step; /* The number of particles updated in the previous step. */ int count_step; @@ -160,20 +164,13 @@ struct engine { /* Are we talkative ? */ int verbose; - -#ifdef WITH_MPI - /* MPI data type for the particle transfers */ - MPI_Datatype part_mpi_type; - MPI_Datatype xpart_mpi_type; -#endif }; /* Function prototypes. */ void engine_barrier(struct engine *e, int tid); -void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, - int nr_queues, int nr_nodes, int nodeID, int policy, - float timeBegin, float timeEnd, float dt_min, float dt_max, - int verbose); +void engine_init(struct engine *e, struct space *s, + const struct swift_params *params, int nr_nodes, int nodeID, + int policy, int verbose); void engine_launch(struct engine *e, int nr_runners, unsigned int mask, unsigned int submask); void engine_prepare(struct engine *e); @@ -182,7 +179,9 @@ void engine_init_particles(struct engine *e); void engine_step(struct engine *e); void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); -int engine_exchange_strays(struct engine *e, int offset, size_t *ind, size_t N); +void engine_exchange_strays(struct engine *e, size_t offset_parts, + int *ind_part, size_t *Npart, size_t offset_gparts, + int *ind_gpart, size_t *Ngpart); void engine_rebuild(struct engine *e); void engine_repartition(struct engine *e); void engine_makeproxies(struct engine *e); diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index 82bc52ad3e05794c8c05896075edc463a69197ff..92a9f64c1f84a9e949f4c0e9485f892b5c808cdc 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -22,14 +22,61 @@ /** * @brief Computes the gravity time-step of a given particle * - * @param p Pointer to the particle data - * @param xp Pointer to the extended particle data + * @param gp Pointer to the g-particle data * */ -__attribute__((always_inline)) INLINE static float gravity_compute_timestep( - struct part* p, struct xpart* xp) { +__attribute__((always_inline)) + INLINE static float gravity_compute_timestep(struct gpart* gp) { /* Currently no limit is imposed */ return FLT_MAX; } + +/** + * @brief Initialises the g-particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) + INLINE static void gravity_first_init_gpart(struct gpart* gp) {} + +/** + * @brief Prepares a g-particle for the gravity calculation + * + * Zeroes all the relevant arrays in preparation for the sums taking place in + * the variaous tasks + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) + INLINE static void gravity_init_part(struct gpart* gp) { + + /* Zero the acceleration */ + gp->a_grav[0] = 0.f; + gp->a_grav[1] = 0.f; + gp->a_grav[2] = 0.f; +} + +/** + * @brief Finishes the gravity calculation. + * + * Multiplies the forces and accelerations by the appropiate constants + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) + INLINE static void gravity_end_force(struct gpart* gp) {} + +/** + * @brief Kick the additional variables + * + * @param gp The particle to act upon + * @param dt The time-step for this kick + * @param half_dt The half time-step for this kick + */ +__attribute__((always_inline)) INLINE static void gravity_kick_extra( + struct gpart* gp, float dt, float half_dt) {} diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h index 98e0c40a5700b4da70f27fb0955592bb5d2287c3..654745bfeb70dddba772af9e23797713376377a7 100644 --- a/src/gravity/Default/gravity_debug.h +++ b/src/gravity/Default/gravity_debug.h @@ -24,5 +24,5 @@ __attribute__((always_inline)) "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "mass=%.3e t_begin=%d, t_end=%d\n", p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2], - p->a[0], p->a[1], p->a[2], p->mass, p->ti_begin, p->ti_end); + p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin, p->ti_end); } diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index e62be446e8263bf02e3fd73f902b28cb1c3b16cf..62023345f174eb8cb9bae4d4438bdd50c9969494 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -22,19 +22,12 @@ /* Includes. */ #include "const.h" -#include "kernel.h" +#include "kernel_gravity.h" #include "vector.h" -/** - * @file runner_iact_grav.h - * @brief Gravity interaction functions. - * - */ - /** * @brief Gravity potential */ - __attribute__((always_inline)) INLINE static void runner_iact_grav( float r2, float *dx, struct gpart *pi, struct gpart *pj) { @@ -56,8 +49,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav( /* Aggregate the accelerations. */ for (k = 0; k < 3; k++) { w = acc * dx[k]; - pi->a[k] -= w * mj; - pj->a[k] += w * mi; + pi->a_grav[k] -= w * mj; + pj->a_grav[k] += w * mi; } } @@ -107,8 +100,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_grav( ai.v = w.v * mj.v; aj.v = w.v * mi.v; for (j = 0; j < VEC_SIZE; j++) { - pi[j]->a[k] -= ai.f[j]; - pj[j]->a[k] += aj.f[j]; + pi[j]->a_grav[k] -= ai.f[j]; + pj[j]->a_grav[k] += aj.f[j]; } } diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h index d707d69631e65eed8ad21a7fa9601c07d3c71263..129c4b39828ca73d2d80d79edbdaa8ec4d5a9e01 100644 --- a/src/gravity/Default/gravity_io.h +++ b/src/gravity/Default/gravity_io.h @@ -48,6 +48,8 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param Ndm The number of DM particles on that MPI rank. * @param Ndm_total The total number of g-particles (only used in MPI mode) @@ -59,17 +61,20 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles( * */ __attribute__((always_inline)) INLINE static void darkmatter_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int Ndm, long long Ndm_total, - int mpi_rank, long long offset, struct gpart* gparts, - struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, + int Ndm, long long Ndm_total, int mpi_rank, long long offset, + struct gpart* gparts, struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, Ndm, 3, gparts, - Ndm_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, Ndm, 1, gparts, - Ndm_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, Ndm, 3, gparts, - Ndm_total, mpi_rank, offset, v_full, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, Ndm, 1, gparts, - Ndm_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + Ndm, 3, gparts, Ndm_total, mpi_rank, offset, x, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, Ndm, + 1, gparts, Ndm_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + Ndm, 3, gparts, Ndm_total, mpi_rank, offset, v_full, us, + UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, Ndm, 1, gparts, Ndm_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); } diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index 7ce7b81892582f2a90f7dd07f7f244c0d4ed8afb..0dfdb82e4ec11c9153f77439027d7e4451ded7f4 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -29,7 +29,7 @@ struct gpart { float v_full[3]; /* Particle acceleration. */ - float a[3]; + float a_grav[3]; /* Particle mass. */ float mass; @@ -50,4 +50,4 @@ struct gpart { struct part* part; }; -} __attribute__((aligned(part_align))); +} __attribute__((aligned(gpart_align))); diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index fca4a346047d7dce0741924a69e95fdad5a5ce45..03953b07ad4e172d96b6e3382814e036a538e2bd 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -91,13 +91,16 @@ __attribute__((always_inline)) const float ih2 = ih * ih; const float ih4 = ih2 * ih2; - /* Final operation on the density. */ - p->rho = ih * ih2 * (p->rho + p->mass * kernel_root); - p->rho_dh = (p->rho_dh - 3.0f * p->mass * kernel_root) * ih4; - p->density.wcount = - (p->density.wcount + kernel_root) * (4.0f / 3.0 * M_PI * kernel_gamma3); - p->density.wcount_dh = - p->density.wcount_dh * ih * (4.0f / 3.0 * M_PI * kernel_gamma3); + /* Final operation on the density (add self-contribution). */ + p->rho += p->mass * kernel_root; + p->rho_dh -= 3.0f * p->mass * kernel_root * kernel_igamma; + p->density.wcount += kernel_root; + + /* Finish the calculation by inserting the missing h-factors */ + p->rho *= ih * ih2; + p->rho_dh *= ih4; + p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3); + p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4); } /** diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index b5b631501b2f9c398cf1f7e5ee32fd5c962ba86e..4f85299b9d61b3a66389bac3527a63068ab96db9 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -22,7 +22,7 @@ /* Includes. */ #include "const.h" -#include "kernel.h" +#include "kernel_hydro.h" #include "part.h" #include "vector.h" diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h index 958bf5a1869718b57678246ff3b1985e54145824..0e9ad46ddc1d4e8c8d3ffdbf3e81262ec49a7092 100644 --- a/src/hydro/Default/hydro_io.h +++ b/src/hydro/Default/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,26 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, u, us, UNIT_CONV_ENERGY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, u, us, + UNIT_CONV_ENERGY_PER_UNIT_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 8cc553363122099c748e3e3e1941611e986c8581..22c5734ed5762400285521b30f9aa60795c45325 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -101,7 +101,7 @@ __attribute__((always_inline)) p->rho *= ih * ih2; p->rho_dh *= ih4; p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3); - p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma3); + p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4); const float irho = 1.f / p->rho; diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h index 46e156bb99015069f9958aeea05954e2be6db5e0..a4d1f7dd4397ebfc850b582e1ca81fc0d4edb76a 100644 --- a/src/hydro/Gadget2/hydro_debug.h +++ b/src/hydro/Gadget2/hydro_debug.h @@ -23,13 +23,13 @@ __attribute__((always_inline)) "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "h=%.3e, " - "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, P=%.3e, S=%.3e, " + "wcount=%d, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, P=%.3e, S=%.3e, " "dS/dt=%.3e, c=%.3e\n" "divV=%.3e, curlV=%.3e, rotV=[%.3e,%.3e,%.3e] \n " "v_sig=%e dh/dt=%.3e t_begin=%d, t_end=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], - p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, + p->h, (int)p->density.wcount, p->density.wcount_dh, p->mass, p->rho_dh, p->rho, p->force.pressure, p->entropy, p->entropy_dt, p->force.soundspeed, p->div_v, p->force.curl_v, p->density.rot_v[0], p->density.rot_v[1], p->density.rot_v[2], p->force.v_sig, p->h_dt, p->ti_begin, p->ti_end); diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index d31b6be383b80a2698b63d27308f6fee9b23518f..d988c678affcf4ca722a965a7e52a7c120b4a924 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -22,7 +22,7 @@ /* Includes. */ #include "const.h" -#include "kernel.h" +#include "kernel_hydro.h" #include "part.h" #include "vector.h" @@ -93,8 +93,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( dv[2] = pi->v[2] - pj->v[2]; const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - pi->div_v += faci * dvdr; - pj->div_v += facj * dvdr; + pi->div_v -= faci * dvdr; + pj->div_v -= facj * dvdr; /* Compute dv cross r */ curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; @@ -211,10 +211,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Balsara term */ const float balsara_i = fabsf(pi->div_v) / - (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi); + (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi); const float balsara_j = fabsf(pj->div_v) / - (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj); + (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj); /* Are the particles moving towards each others ? */ const float omega_ij = fminf(dvdr, 0.f); @@ -309,10 +309,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Balsara term */ const float balsara_i = fabsf(pi->div_v) / - (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi); + (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi); const float balsara_j = fabsf(pj->div_v) / - (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj); + (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj); /* Are the particles moving towards each others ? */ const float omega_ij = fminf(dvdr, 0.f); diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index 17c3d3013644c3572f3c26fc3e270b1c1bc465ed..c1c59dfa4980a2843e7e13bee4c964c9b254cae6 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,27 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, entropy, us, + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, entropy, us, UNIT_CONV_ENTROPY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index f4e3f1a70625430d9bd891c5f7596d71e7b8b231..7db3c275ce7e3389610e8297c287cbd5301c6c64 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -101,7 +101,12 @@ __attribute__((always_inline)) p->rho *= ih * ih2; p->rho_dh *= ih4; p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3); - p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma3); + p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4); + + const float irho = 1.f / p->rho; + + /* Compute the derivative term */ + p->rho_dh = 1.f / (1.f + 0.33333333f * p->h * p->rho_dh * irho); } /** diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 6afb9d8d38a4fc7f1d38b7286720ddb7f3c51ab4..3427ec538613842f8fbcf0d8ba5f9ba5a0b8d540 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -16,12 +16,12 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_RUNNER_IACT_H -#define SWIFT_RUNNER_IACT_H +#ifndef SWIFT_RUNNER_IACT_MINIMAL_H +#define SWIFT_RUNNER_IACT_MINIMAL_H /* Includes. */ #include "const.h" -#include "kernel.h" +#include "kernel_hydro.h" #include "part.h" #include "vector.h" @@ -38,33 +38,31 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2); - float xi, xj; - float h_inv; float wi, wj, wi_dx, wj_dx; - float mi, mj; + + const float r = sqrtf(r2); /* Get the masses. */ - mi = pi->mass; - mj = pj->mass; + const float mi = pi->mass; + const float mj = pj->mass; /* Compute density of pi. */ - h_inv = 1.0 / hi; - xi = r * h_inv; + const float hi_inv = 1.f / hi; + const float xi = r * hi_inv; kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); + pi->rho_dh -= mj * (3.f * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; /* Compute density of pj. */ - h_inv = 1.f / hj; - xj = r * h_inv; + const float hj_inv = 1.f / hj; + const float xj = r * hj_inv; kernel_deval(xj, &wj, &wj_dx); pj->rho += mi * wj; - pj->rho_dh -= mi * (3.0 * wj + xj * wj_dx); + pj->rho_dh -= mi * (3.f * wj + xj * wj_dx); pj->density.wcount += wj; pj->density.wcount_dh -= xj * wj_dx; } @@ -76,24 +74,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r; - float xi; - float h_inv; float wi, wi_dx; - float mj; /* Get the masses. */ - mj = pj->mass; + const float mj = pj->mass; /* Get r and r inverse. */ - r = sqrtf(r2); + const float r = sqrtf(r2); - h_inv = 1.f / hi; - xi = r * h_inv; + const float h_inv = 1.f / hi; + const float xi = r * h_inv; kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); + pi->rho_dh -= mj * (3.f * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; } @@ -148,7 +142,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Compute sound speeds */ const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi); const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj); - float v_sig = ci + cj + 3.f * omega_ij; + const float v_sig = ci + cj + 3.f * omega_ij; /* SPH acceleration term */ const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; @@ -225,7 +219,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Compute sound speeds */ const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi); const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj); - float v_sig = ci + cj + 3.f * omega_ij; + const float v_sig = ci + cj + 3.f * omega_ij; /* SPH acceleration term */ const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; @@ -245,4 +239,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); } -#endif /* SWIFT_RUNNER_IACT_H */ +#endif /* SWIFT_RUNNER_IACT_MINIMAL_H */ diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index 2c56fb489ab84ca7c30426b54cf95e26e3821084..afe5de83f423e43b4d2480cca1ac3e84d6c549de 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,26 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, u, us, UNIT_CONV_ENERGY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, u, us, + UNIT_CONV_ENERGY_PER_UNIT_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/kernel.h b/src/kernel.h deleted file mode 100644 index aead6a95adc35028834d671448223a31a57fc2b6..0000000000000000000000000000000000000000 --- a/src/kernel.h +++ /dev/null @@ -1,617 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ -#ifndef SWIFT_KERNEL_H -#define SWIFT_KERNEL_H - -/* Includes. */ -#include "const.h" -#include "inline.h" -#include "vector.h" - -/** - * @file kernel.h - * @brief SPH kernel functions. Compute W(x,h) and the gradient of W(x,h), - * as well as the blending function used for gravity. - */ - -/* Gravity kernel stuff - * ----------------------------------------------------------------------------------------------- - */ - -/* The gravity kernel is defined as a degree 6 polynomial in the distance - r. The resulting value should be post-multiplied with r^-3, resulting - in a polynomial with terms ranging from r^-3 to r^3, which are - sufficient to model both the direct potential as well as the splines - near the origin. */ - -/* Coefficients for the gravity kernel. */ -#define kernel_grav_degree 6 -#define kernel_grav_ivals 2 -#define kernel_grav_scale (2 * const_iepsilon) -static float kernel_grav_coeffs - [(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] = { - 32.0f * const_iepsilon6, -192.0f / 5.0f * const_iepsilon5, - 0.0f, 32.0f / 3.0f * const_iepsilon3, - 0.0f, 0.0f, - 0.0f, -32.0f / 3.0f * const_iepsilon6, - 192.0f / 5.0f * const_iepsilon5, -48.0f * const_iepsilon4, - 64.0f / 3.0f * const_iepsilon3, 0.0f, - 0.0f, -1.0f / 15.0f, - 0.0f, 0.0f, - 0.0f, 0.0f, - 0.0f, 0.0f, - 1.0f}; - -/** - * @brief Computes the gravity cubic spline for a given distance x. - */ - -__attribute__((always_inline)) INLINE static void kernel_grav_eval(float x, - float *W) { - int ind = fmin(x * kernel_grav_scale, kernel_grav_ivals); - float *coeffs = &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - for (int k = 2; k <= kernel_grav_degree; k++) w = x * w + coeffs[k]; - *W = w; -} - -#ifdef VECTORIZE - -/** - * @brief Computes the gravity cubic spline for a given distance x (Vectorized - * version). - */ - -__attribute__((always_inline)) - INLINE static void kernel_grav_eval_vec(vector *x, vector *w) { - - vector ind, c[kernel_grav_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi(vec_fmin(x->v * vec_set1(kernel_grav_scale), - vec_set1((float)kernel_grav_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < kernel_grav_degree + 1; j++) - c[j].f[k] = kernel_grav_coeffs[ind.i[k] * (kernel_grav_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - - /* And we're off! */ - for (int k = 2; k <= kernel_grav_degree; k++) w->v = (x->v * w->v) + c[k].v; -} - -#endif - -/* Blending function stuff - * -------------------------------------------------------------------------------------------- - */ - -/* Coefficients for the blending function. */ -#define blender_degree 3 -#define blender_ivals 3 -#define blender_scale 4.0f -static float blender_coeffs[(blender_degree + 1) * (blender_ivals + 1)] = { - 0.0f, 0.0f, 0.0f, 1.0f, -32.0f, 24.0f, -6.0f, 1.5f, - -32.0f, 72.0f, -54.0f, 13.5f, 0.0f, 0.0f, 0.0f, 0.0f}; - -/** - * @brief Computes the cubic spline blender for a given distance x. - */ - -__attribute__((always_inline)) INLINE static void blender_eval(float x, - float *W) { - int ind = fmin(x * blender_scale, blender_ivals); - float *coeffs = &blender_coeffs[ind * (blender_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - for (int k = 2; k <= blender_degree; k++) w = x * w + coeffs[k]; - *W = w; -} - -/** - * @brief Computes the cubic spline blender and its derivative for a given - * distance x. - */ - -__attribute__((always_inline)) INLINE static void blender_deval(float x, - float *W, - float *dW_dx) { - int ind = fminf(x * blender_scale, blender_ivals); - float *coeffs = &blender_coeffs[ind * (blender_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - float dw_dx = coeffs[0]; - for (int k = 2; k <= blender_degree; k++) { - dw_dx = dw_dx * x + w; - w = x * w + coeffs[k]; - } - *W = w; - *dW_dx = dw_dx; -} - -#ifdef VECTORIZE - -/** - * @brief Computes the cubic spline blender and its derivative for a given - * distance x (Vectorized version). Gives a sensible answer only if x<2. - */ - -__attribute__((always_inline)) INLINE static void blender_eval_vec(vector *x, - vector *w) { - - vector ind, c[blender_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi( - vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < blender_degree + 1; j++) - c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - - /* And we're off! */ - for (int k = 2; k <= blender_degree; k++) w->v = (x->v * w->v) + c[k].v; -} - -/** - * @brief Computes the cubic spline blender and its derivative for a given - * distance x (Vectorized version). Gives a sensible answer only if x<2. - */ - -__attribute__((always_inline)) - INLINE static void blender_deval_vec(vector *x, vector *w, vector *dw_dx) { - - vector ind, c[blender_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi( - vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < blender_degree + 1; j++) - c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - dw_dx->v = c[0].v; - - /* And we're off! */ - for (int k = 2; k <= blender_degree; k++) { - dw_dx->v = (dw_dx->v * x->v) + w->v; - w->v = (x->v * w->v) + c[k].v; - } -} - -#endif - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -#if defined(CUBIC_SPLINE_KERNEL) - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -/* Coefficients for the kernel. */ -#define kernel_name "Cubic spline" -#define kernel_degree 3 -#define kernel_ivals 2 -#define kernel_gamma 2.0f -#define kernel_gamma2 4.0f -#define kernel_gamma3 8.0f -#define kernel_igamma 0.5f -#define kernel_nwneigh \ - (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \ - 6.0858f) -static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] - __attribute__((aligned(16))) = { - 3.0 / 4.0 * M_1_PI, -3.0 / 2.0 * M_1_PI, 0.0, M_1_PI, - -0.25 * M_1_PI, 3.0 / 2.0 * M_1_PI, -3.0 * M_1_PI, M_2_PI, - 0.0, 0.0, 0.0, 0.0}; -#define kernel_root (kernel_coeffs[kernel_degree]) -#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree]) - -/** - * @brief Computes the cubic spline kernel and its derivative for a given - * distance x. Gives a sensible answer only if x<2. - */ - -__attribute__((always_inline)) INLINE static void kernel_deval(float x, - float *W, - float *dW_dx) { - int ind = fminf(x, kernel_ivals); - float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - float dw_dx = coeffs[0]; - for (int k = 2; k <= kernel_degree; k++) { - dw_dx = dw_dx * x + w; - w = x * w + coeffs[k]; - } - *W = w; - *dW_dx = dw_dx; -} - -#ifdef VECTORIZE - -/** - * @brief Computes the cubic spline kernel and its derivative for a given - * distance x (Vectorized version). Gives a sensible answer only if x<2. - */ - -__attribute__((always_inline)) - INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) { - - vector ind, c[kernel_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi(vec_fmin(x->v, vec_set1((float)kernel_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < kernel_degree + 1; j++) - c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - dw_dx->v = c[0].v; - - /* And we're off! */ - for (int k = 2; k <= kernel_degree; k++) { - dw_dx->v = (dw_dx->v * x->v) + w->v; - w->v = (x->v * w->v) + c[k].v; - } -} - -#endif - -/** - * @brief Computes the cubic spline kernel for a given distance x. Gives a - * sensible answer only if x<2. - */ - -__attribute__((always_inline)) INLINE static void kernel_eval(float x, - float *W) { - int ind = fmin(x, kernel_ivals); - float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k]; - *W = w; -} - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -#elif defined(QUARTIC_SPLINE_KERNEL) - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -/* Coefficients for the kernel. */ -#define kernel_name "Quartic spline" -#define kernel_degree 4 -#define kernel_ivals 3 -#define kernel_gamma 2.5f -#define kernel_gamma2 6.25f -#define kernel_gamma3 15.625f -#define kernel_igamma 0.4f -#define kernel_nwneigh \ - (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \ - 8.2293f) -static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] - __attribute__((aligned(16))) = { - 3.0 / 10.0 * M_1_PI, 0.0, -3.0 / 4.0 * M_1_PI, - 0.0, 23.0 / 32.0 * M_1_PI, -1.0 / 5.0 * M_1_PI, - M_1_PI, -3.0 / 2.0 * M_1_PI, 0.25 * M_1_PI, - 11.0 / 16.0 * M_1_PI, 1.0 / 20.0 * M_1_PI, -0.5 * M_1_PI, - 15.0 / 8.0 * M_1_PI, -25.0 / 8.0 * M_1_PI, 125.0 / 64.0 * M_1_PI, - 0.0, 0.0, 0.0, - 0.0, 0.0}; -#define kernel_root (kernel_coeffs[kernel_degree]) -#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree]) - -/** - * @brief Computes the quartic spline kernel and its derivative for a given - * distance x. Gives a sensible answer only if x<2.5 - */ - -__attribute__((always_inline)) INLINE static void kernel_deval(float x, - float *W, - float *dW_dx) { - int ind = fminf(x + 0.5, kernel_ivals); - float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - float dw_dx = coeffs[0]; - for (int k = 2; k <= kernel_degree; k++) { - dw_dx = dw_dx * x + w; - w = x * w + coeffs[k]; - } - *W = w; - *dW_dx = dw_dx; -} - -#ifdef VECTORIZE - -/** - * @brief Computes the quartic spline kernel and its derivative for a given - * distance x (Vectorized version). Gives a sensible answer only if x<2.5 - */ - -__attribute__((always_inline)) - INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) { - - vector ind, c[kernel_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi(vec_fmin(x->v + 0.5f, vec_set1((float)kernel_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < kernel_degree + 1; j++) - c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - dw_dx->v = c[0].v; - - /* And we're off! */ - for (int k = 2; k <= kernel_degree; k++) { - dw_dx->v = (dw_dx->v * x->v) + w->v; - w->v = (x->v * w->v) + c[k].v; - } -} - -#endif - -/** - * @brief Computes the quartic spline kernel for a given distance x. Gives a - * sensible answer only if x<2.5 - */ - -__attribute__((always_inline)) INLINE static void kernel_eval(float x, - float *W) { - int ind = fmin(x + 0.5f, kernel_ivals); - float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k]; - *W = w; -} - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -#elif defined(QUINTIC_SPLINE_KERNEL) - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -/* Coefficients for the kernel. */ -#define kernel_name "Quintic spline" -#define kernel_degree 5 -#define kernel_ivals 3 -#define kernel_gamma 3.f -#define kernel_gamma2 9.f -#define kernel_gamma3 27.f -#define kernel_igamma 1.0f / 3.0f -#define kernel_nwneigh \ - (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \ - 10.5868f) -static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] - __attribute__((aligned(16))) = { - -1.0 / 12.0 * M_1_PI, 1.0 / 4.0 * M_1_PI, 0.0, - -1.0 / 2.0 * M_1_PI, 0.0, 11.0 / 20.0 * M_1_PI, - 1.0 / 24.0 * M_1_PI, -3.0 / 8.0 * M_1_PI, 5.0 / 4.0 * M_1_PI, - -7.0 / 4.0 * M_1_PI, 5.0 / 8.0 * M_1_PI, 17.0 / 40.0 * M_1_PI, - -1.0 / 120.0 * M_1_PI, 1.0 / 8.0 * M_1_PI, -3.0 / 4.0 * M_1_PI, - 9.0 / 4.0 * M_1_PI, -27.0 / 8.0 * M_1_PI, 81.0 / 40.0 * M_1_PI, - 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0}; -#define kernel_root (kernel_coeffs[kernel_degree]) -#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree]) - -/** - * @brief Computes the quintic spline kernel and its derivative for a given - * distance x. Gives a sensible answer only if x<3. - */ - -__attribute__((always_inline)) INLINE static void kernel_deval(float x, - float *W, - float *dW_dx) { - int ind = fminf(x, kernel_ivals); - float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - float dw_dx = coeffs[0]; - for (int k = 2; k <= kernel_degree; k++) { - dw_dx = dw_dx * x + w; - w = x * w + coeffs[k]; - } - *W = w; - *dW_dx = dw_dx; -} - -#ifdef VECTORIZE - -/** - * @brief Computes the quintic spline kernel and its derivative for a given - * distance x (Vectorized version). Gives a sensible answer only if x<3. - */ - -__attribute__((always_inline)) - INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) { - - vector ind, c[kernel_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi(vec_fmin(x->v, vec_set1((float)kernel_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < kernel_degree + 1; j++) - c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - dw_dx->v = c[0].v; - - /* And we're off! */ - for (int k = 2; k <= kernel_degree; k++) { - dw_dx->v = (dw_dx->v * x->v) + w->v; - w->v = (x->v * w->v) + c[k].v; - } -} - -#endif - -/** - * @brief Computes the quintic spline kernel for a given distance x. Gives a - * sensible answer only if x<3. - */ - -__attribute__((always_inline)) INLINE static void kernel_eval(float x, - float *W) { - int ind = fmin(x, kernel_ivals); - float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k]; - *W = w; -} - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -#elif defined(WENDLAND_C2_KERNEL) - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -/* Coefficients for the kernel. */ -#define kernel_name "Wendland C2" -#define kernel_degree 5 -#define kernel_ivals 1 -#define kernel_gamma 2.f -#define kernel_gamma2 4.f -#define kernel_gamma3 8.f -#define kernel_igamma 0.5f -#define kernel_nwneigh \ - (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \ - 7.261825f) -static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] - __attribute__((aligned(16))) = { - 0.05222272f, -0.39167037f, 1.04445431f, -1.04445431f, 0.f, 0.41778173f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; -#define kernel_root (kernel_coeffs[kernel_degree]) -#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree]) - -/** - * @brief Computes the quintic spline kernel and its derivative for a given - * distance x. Gives a sensible answer only if x<1. - */ - -__attribute__((always_inline)) INLINE static void kernel_deval(float x, - float *W, - float *dW_dx) { - int ind = fminf(0.5f * x, kernel_ivals); - float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - float dw_dx = coeffs[0]; - for (int k = 2; k <= kernel_degree; k++) { - dw_dx = dw_dx * x + w; - w = x * w + coeffs[k]; - } - *W = w; - *dW_dx = dw_dx; -} - -#ifdef VECTORIZE - -/** - * @brief Computes the Wendland C2 kernel and its derivative for a given - * distance x (Vectorized version). Gives a sensible answer only if x<1. - */ - -__attribute__((always_inline)) - INLINE static void kernel_deval_vec(vector *x, vector *w, vector *dw_dx) { - - vector ind, c[kernel_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi(vec_fmin(0.5f * x->v, vec_set1((float)kernel_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < kernel_degree + 1; j++) - c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - dw_dx->v = c[0].v; - - /* And we're off! */ - for (int k = 2; k <= kernel_degree; k++) { - dw_dx->v = (dw_dx->v * x->v) + w->v; - w->v = (x->v * w->v) + c[k].v; - } -} - -#endif - -/** - * @brief Computes the Wendland C2 kernel for a given distance x. Gives a - * sensible answer only if x<1. - */ - -__attribute__((always_inline)) INLINE static void kernel_eval(float x, - float *W) { - int ind = fmin(0.5f * x, kernel_ivals); - float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k]; - *W = w; -} - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -#else - -/* -------------------------------------------------------------------------------------------------------------------- - */ - -#error "A kernel function must be chosen in const.h !!" - -#endif // Kernel choice - -/* Some cross-check functions */ -void SPH_kernel_dump(int N); -void gravity_kernel_dump(float r_max, int N); - -#endif // SWIFT_KERNEL_H diff --git a/src/kernel.c b/src/kernel_gravity.c similarity index 78% rename from src/kernel.c rename to src/kernel_gravity.c index 58f5b0c9fdaa62663c65d5af18afe0a15a813834..639a964c813ef7fd95008857ee17b7dd5ffafb27 100644 --- a/src/kernel.c +++ b/src/kernel_gravity.c @@ -21,32 +21,7 @@ #include <math.h> #include <stdio.h> -#include "kernel.h" - -/** - * @brief Test the SPH kernel function by dumping it in the interval [0,1]. - * - * @param N number of intervals in [0,1]. - */ -void SPH_kernel_dump(int N) { - - int k; - float x, w, dw_dx; - float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - // float dw_dx4[4] __attribute__ ((aligned (16))); - - for (k = 0; k <= N; k++) { - x = ((float)k) / N; - x4[3] = x4[2]; - x4[2] = x4[1]; - x4[1] = x4[0]; - x4[0] = x; - kernel_deval(x, &w, &dw_dx); - // kernel_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 ); - printf(" %e %e %e %e %e %e %e\n", x, w, dw_dx, w4[0], w4[1], w4[2], w4[3]); - } -} +#include "kernel_gravity.h" /** * @brief The Gadget-2 gravity kernel function diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h new file mode 100644 index 0000000000000000000000000000000000000000..7fd4b061a7e94be01a11b06ad23d9113f579ebb8 --- /dev/null +++ b/src/kernel_gravity.h @@ -0,0 +1,209 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_KERNEL_GRAVITY_H +#define SWIFT_KERNEL_GRAVITY_H + +/* Includes. */ +#include "const.h" +#include "inline.h" +#include "vector.h" + +/* Gravity kernel stuff + * ----------------------------------------------------------------------------------------------- + */ + +/* The gravity kernel is defined as a degree 6 polynomial in the distance + r. The resulting value should be post-multiplied with r^-3, resulting + in a polynomial with terms ranging from r^-3 to r^3, which are + sufficient to model both the direct potential as well as the splines + near the origin. */ + +/* Coefficients for the gravity kernel. */ +#define kernel_grav_degree 6 +#define kernel_grav_ivals 2 +#define kernel_grav_scale (2 * const_iepsilon) +static float kernel_grav_coeffs + [(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] = { + 32.0f * const_iepsilon6, -192.0f / 5.0f * const_iepsilon5, + 0.0f, 32.0f / 3.0f * const_iepsilon3, + 0.0f, 0.0f, + 0.0f, -32.0f / 3.0f * const_iepsilon6, + 192.0f / 5.0f * const_iepsilon5, -48.0f * const_iepsilon4, + 64.0f / 3.0f * const_iepsilon3, 0.0f, + 0.0f, -1.0f / 15.0f, + 0.0f, 0.0f, + 0.0f, 0.0f, + 0.0f, 0.0f, + 1.0f}; + +/** + * @brief Computes the gravity cubic spline for a given distance x. + */ + +__attribute__((always_inline)) INLINE static void kernel_grav_eval(float x, + float *W) { + int ind = fmin(x * kernel_grav_scale, kernel_grav_ivals); + float *coeffs = &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)]; + float w = coeffs[0] * x + coeffs[1]; + for (int k = 2; k <= kernel_grav_degree; k++) w = x * w + coeffs[k]; + *W = w; +} + +#ifdef VECTORIZE + +/** + * @brief Computes the gravity cubic spline for a given distance x (Vectorized + * version). + */ + +__attribute__((always_inline)) + INLINE static void kernel_grav_eval_vec(vector *x, vector *w) { + + vector ind, c[kernel_grav_degree + 1]; + int j, k; + + /* Load x and get the interval id. */ + ind.m = vec_ftoi(vec_fmin(x->v * vec_set1(kernel_grav_scale), + vec_set1((float)kernel_grav_ivals))); + + /* load the coefficients. */ + for (k = 0; k < VEC_SIZE; k++) + for (j = 0; j < kernel_grav_degree + 1; j++) + c[j].f[k] = kernel_grav_coeffs[ind.i[k] * (kernel_grav_degree + 1) + j]; + + /* Init the iteration for Horner's scheme. */ + w->v = (c[0].v * x->v) + c[1].v; + + /* And we're off! */ + for (int k = 2; k <= kernel_grav_degree; k++) w->v = (x->v * w->v) + c[k].v; +} + +#endif + +/* Blending function stuff + * -------------------------------------------------------------------------------------------- + */ + +/* Coefficients for the blending function. */ +#define blender_degree 3 +#define blender_ivals 3 +#define blender_scale 4.0f +static float blender_coeffs[(blender_degree + 1) * (blender_ivals + 1)] = { + 0.0f, 0.0f, 0.0f, 1.0f, -32.0f, 24.0f, -6.0f, 1.5f, + -32.0f, 72.0f, -54.0f, 13.5f, 0.0f, 0.0f, 0.0f, 0.0f}; + +/** + * @brief Computes the cubic spline blender for a given distance x. + */ + +__attribute__((always_inline)) INLINE static void blender_eval(float x, + float *W) { + int ind = fmin(x * blender_scale, blender_ivals); + float *coeffs = &blender_coeffs[ind * (blender_degree + 1)]; + float w = coeffs[0] * x + coeffs[1]; + for (int k = 2; k <= blender_degree; k++) w = x * w + coeffs[k]; + *W = w; +} + +/** + * @brief Computes the cubic spline blender and its derivative for a given + * distance x. + */ + +__attribute__((always_inline)) INLINE static void blender_deval(float x, + float *W, + float *dW_dx) { + int ind = fminf(x * blender_scale, blender_ivals); + float *coeffs = &blender_coeffs[ind * (blender_degree + 1)]; + float w = coeffs[0] * x + coeffs[1]; + float dw_dx = coeffs[0]; + for (int k = 2; k <= blender_degree; k++) { + dw_dx = dw_dx * x + w; + w = x * w + coeffs[k]; + } + *W = w; + *dW_dx = dw_dx; +} + +#ifdef VECTORIZE + +/** + * @brief Computes the cubic spline blender and its derivative for a given + * distance x (Vectorized version). Gives a sensible answer only if x<2. + */ + +__attribute__((always_inline)) INLINE static void blender_eval_vec(vector *x, + vector *w) { + + vector ind, c[blender_degree + 1]; + int j, k; + + /* Load x and get the interval id. */ + ind.m = vec_ftoi( + vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals))); + + /* load the coefficients. */ + for (k = 0; k < VEC_SIZE; k++) + for (j = 0; j < blender_degree + 1; j++) + c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j]; + + /* Init the iteration for Horner's scheme. */ + w->v = (c[0].v * x->v) + c[1].v; + + /* And we're off! */ + for (int k = 2; k <= blender_degree; k++) w->v = (x->v * w->v) + c[k].v; +} + +/** + * @brief Computes the cubic spline blender and its derivative for a given + * distance x (Vectorized version). Gives a sensible answer only if x<2. + */ + +__attribute__((always_inline)) + INLINE static void blender_deval_vec(vector *x, vector *w, vector *dw_dx) { + + vector ind, c[blender_degree + 1]; + int j, k; + + /* Load x and get the interval id. */ + ind.m = vec_ftoi( + vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals))); + + /* load the coefficients. */ + for (k = 0; k < VEC_SIZE; k++) + for (j = 0; j < blender_degree + 1; j++) + c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j]; + + /* Init the iteration for Horner's scheme. */ + w->v = (c[0].v * x->v) + c[1].v; + dw_dx->v = c[0].v; + + /* And we're off! */ + for (int k = 2; k <= blender_degree; k++) { + dw_dx->v = (dw_dx->v * x->v) + w->v; + w->v = (x->v * w->v) + c[k].v; + } +} + +#endif + +void gravity_kernel_dump(float r_max, int N); + +#endif // SWIFT_KERNEL_GRAVITY_H diff --git a/src/kernel_hydro.c b/src/kernel_hydro.c new file mode 100644 index 0000000000000000000000000000000000000000..18a930d8ff7f792b2f9606787a6e4c547770629a --- /dev/null +++ b/src/kernel_hydro.c @@ -0,0 +1,49 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <math.h> +#include <stdio.h> + +#include "kernel_hydro.h" + +/** + * @brief Test the SPH kernel function by dumping it in the interval [0,1]. + * + * @param N number of intervals in [0,1]. + */ +void hydro_kernel_dump(int N) { + + int k; + float x, w, dw_dx; + float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + // float dw_dx4[4] __attribute__ ((aligned (16))); + + for (k = 0; k <= N; k++) { + x = ((float)k) / N; + x4[3] = x4[2]; + x4[2] = x4[1]; + x4[1] = x4[0]; + x4[0] = x; + kernel_deval(x, &w, &dw_dx); + // kernel_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 ); + printf(" %e %e %e %e %e %e %e\n", x, w, dw_dx, w4[0], w4[1], w4[2], w4[3]); + } +} diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h new file mode 100644 index 0000000000000000000000000000000000000000..66f51391fb9504ba30363b1980aaad1fcc9174b7 --- /dev/null +++ b/src/kernel_hydro.h @@ -0,0 +1,218 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_KERNEL_HYDRO_H +#define SWIFT_KERNEL_HYDRO_H + +/* Includes. */ +#include "const.h" +#include "error.h" +#include "inline.h" +#include "vector.h" + +/* ------------------------------------------------------------------------- */ +#if defined(CUBIC_SPLINE_KERNEL) + +/* Coefficients for the kernel. */ +#define kernel_name "Cubic spline (M4)" +#define kernel_degree 3 /* Degree of the polynomial */ +#define kernel_ivals 2 /* Number of branches */ +#define kernel_gamma 1.825742 +#define kernel_constant 16. * M_1_PI +static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] + __attribute__((aligned(16))) = {3.f, -3.f, 0.f, 0.5f, /* 0 < u < 0.5 */ + -1.f, 3.f, -3.f, 1.f, /* 0.5 < u < 1 */ + 0.f, 0.f, 0.f, 0.f}; /* 1 < u */ + +/* ------------------------------------------------------------------------- */ +#elif defined(QUARTIC_SPLINE_KERNEL) + +/* Coefficients for the kernel. */ +#define kernel_name "Quartic spline (M5)" +#define kernel_degree 4 +#define kernel_ivals 5 +#define kernel_gamma 2.018932 +#define kernel_constant 15625. * M_1_PI / 512. +static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] + __attribute__((aligned(16))) = { + 6.f, 0.f, -2.4f, 0.f, 0.368f, /* 0 < u < 0.2 */ + -4.f, 8.f, -4.8f, 0.32f, 0.352f, /* 0.2 < u < 0.4 */ + -4.f, 8.f, -4.8f, 0.32f, 0.352f, /* 0.4 < u < 0.6 */ + 1.f, -4.f, 6.f, -4.f, 1.f, /* 0.6 < u < 0.8 */ + 1.f, -4.f, 6.f, -4.f, 1.f, /* 0.8 < u < 1 */ + 0.f, 0.f, 0.f, 0.f, 0.f}; /* 1 < u */ + +/* ------------------------------------------------------------------------- */ +#elif defined(QUINTIC_SPLINE_KERNEL) + +/* Coefficients for the kernel. */ +#define kernel_name "Quintic spline (M6)" +#define kernel_degree 5 +#define kernel_ivals 3 +#define kernel_gamma 2.195775 +#define kernel_constant 2187. * M_1_PI / 40. +static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] + __attribute__((aligned(16))) = { + -10.f, 10.f, 0.f, + -2.2222222f, 0.f, 0.271604938f, /* 0 < u < 1/3 */ + 5.f, -15.f, 16.666667f, + -7.77777777f, 0.925925f, 0.209876543f, /* 1/3 < u < 2/3 */ + -1.f, 5.f, -10.f, + 10.f, -5.f, 1.f, /* 2/3 < u < 1. */ + 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f}; /* 1 < u */ + +/* ------------------------------------------------------------------------- */ +#elif defined(WENDLAND_C2_KERNEL) + +/* Coefficients for the kernel. */ +#define kernel_name "Wendland C2" +#define kernel_degree 5 +#define kernel_ivals 1 +#define kernel_gamma 1.936492 +#define kernel_constant 21. * M_1_PI / 2. +static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] + __attribute__((aligned(16))) = { + 4.f, -15.f, 20.f, -10.f, 0.f, 1.f, /* 0 < u < 1 */ + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; /* 1 < u */ + +/* ------------------------------------------------------------------------- */ +#elif defined(WENDLAND_C4_KERNEL) + +/* Coefficients for the kernel. */ +#define kernel_name "Wendland C4" +#define kernel_degree 8 +#define kernel_ivals 1 +#define kernel_gamma 2.207940 +#define kernel_constant 495. * M_1_PI / 32. +static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] + __attribute__((aligned(16))) = { + 11.666667f, -64.f, 140.f, -149.333333f, 70.f, + 0.f, -9.3333333f, 0.f, 1.f, /* 0 < u < 1 */ + 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f}; /* 1 < u */ + +/* ------------------------------------------------------------------------- */ +#elif defined(WENDLAND_C6_KERNEL) + +/* Coefficients for the kernel. */ +#define kernel_name "Wendland C6" +#define kernel_degree 11 +#define kernel_ivals 1 +#define kernel_gamma 2.449490 +#define kernel_constant 1365. * M_1_PI / 64. +static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] + __attribute__((aligned(16))) = { + 32.f, -231.f, 704.f, -1155.f, 1056.f, -462.f, + 0.f, 66.f, 0.f, -11.f, 0.f, 1.f, /* 0 < u < 1 */ + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; /* 1 < u */ + +/* ------------------------------------------------------------------------- */ +#else + +#error "A kernel function must be chosen in const.h !!" + +/* ------------------------------------------------------------------------- */ +#endif + +/* Ok, now comes the real deal. */ + +/* First some powers of gamma = H/h */ +#define kernel_gamma2 kernel_gamma *kernel_gamma +#define kernel_gamma3 kernel_gamma2 *kernel_gamma +#define kernel_gamma4 kernel_gamma3 *kernel_gamma +#define kernel_igamma 1. / kernel_gamma +#define kernel_igamma2 kernel_igamma *kernel_igamma +#define kernel_igamma3 kernel_igamma2 *kernel_igamma +#define kernel_igamma4 kernel_igamma3 *kernel_igamma + +/* Some powers of eta */ +#define kernel_eta3 const_eta_kernel *const_eta_kernel *const_eta_kernel + +/* The number of neighbours (i.e. N_ngb) */ +#define kernel_nwneigh 4.0 * M_PI *kernel_gamma3 *kernel_eta3 / 3.0 + +/* Kernel self contribution (i.e. W(0,h)) */ +#define kernel_root \ + (kernel_coeffs[kernel_degree]) * kernel_constant *kernel_igamma3 + +/** + * @brief Computes the kernel function and its derivative. + * + * Return 0 if $u > \\gamma = H/h$ + * + * @param u The ratio of the distance to the smoothing length $u = x/h$. + * @param W (return) The value of the kernel function $W(x,h)$. + * @param dW_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$. + */ +__attribute__((always_inline)) INLINE static void kernel_deval( + float u, float *const W, float *const dW_dx) { + + /* Go to the range [0,1[ from [0,H[ */ + const float x = u * (float)kernel_igamma; + + /* Pick the correct branch of the kernel */ + const int ind = (int)fminf(x * (float)kernel_ivals, kernel_ivals); + const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; + + /* First two terms of the polynomial ... */ + float w = coeffs[0] * x + coeffs[1]; + float dw_dx = coeffs[0]; + + /* ... and the rest of them */ + for (int k = 2; k <= kernel_degree; k++) { + dw_dx = dw_dx * x + w; + w = x * w + coeffs[k]; + } + + /* Return everything */ + *W = w * (float)kernel_constant * (float)kernel_igamma3; + *dW_dx = dw_dx * (float)kernel_constant * (float)kernel_igamma4; +} + +/** + * @brief Computes the kernel function. + * + * @param u The ratio of the distance to the smoothing length $u = x/h$. + * @param W (return) The value of the kernel function $W(x,h)$. + */ +__attribute__((always_inline)) INLINE static void kernel_eval(float u, + float *const W) { + /* Go to the range [0,1[ from [0,H[ */ + const float x = u * (float)kernel_igamma; + + /* Pick the correct branch of the kernel */ + const int ind = (int)fminf(x * (float)kernel_ivals, kernel_ivals); + const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; + + /* First two terms of the polynomial ... */ + float w = coeffs[0] * x + coeffs[1]; + + /* ... and the rest of them */ + for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k]; + + /* Return everything */ + *W = w * (float)kernel_constant * (float)kernel_igamma3; +} + +/* Some cross-check functions */ +void hydro_kernel_dump(int N); + +#endif // SWIFT_KERNEL_HYDRO_H diff --git a/src/multipole.h b/src/multipole.h index 91ba6df965ce9d3b088d538411b7f0a8555ba0e4..85ba44d3ce95d958b721d435ccd26b72e30a79c1 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -25,7 +25,7 @@ /* Includes. */ #include "const.h" #include "inline.h" -#include "kernel.h" +#include "kernel_gravity.h" #include "part.h" /* Some constants. */ @@ -127,7 +127,7 @@ __attribute__((always_inline)) INLINE static void multipole_iact_mp( /* Compute the forces on both multipoles. */ #if multipole_order == 1 - for (k = 0; k < 3; k++) p->a[k] += dx[k] * acc; + for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc; #else #error( "Multipoles of order %i not yet implemented." , multipole_order ) #endif diff --git a/src/parallel_io.c b/src/parallel_io.c index cffa99a0fd75566ec3e850076d15e104504eeb40..d1c739b59021f38b2259f82dd06c547e0e7c147d 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -178,9 +178,10 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * * Calls #error() if an error occurs. */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, long long N_total, - int mpi_rank, long long offset, char* part_c, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, long long N_total, int mpi_rank, + long long offset, char* part_c, size_t partSize, struct UnitSystem* us, enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_memspace = 0, h_filespace = 0, h_plist_id = 0; @@ -189,7 +190,6 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; char buffer[150]; @@ -269,14 +269,16 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - if (mpi_rank == 0) writeXMFline(xmfFile, fileName, name, N_total, dim, type); + if (mpi_rank == 0) + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N_total, dim, + type); /* Write unit conversion factors for this data set */ - conversionString(buffer, us, convFactor); + units_conversion_string(buffer, us, convFactor); writeAttribute_d(h_data, "CGS conversion factor", - conversionFactor(us, convFactor)); - writeAttribute_f(h_data, "h-scale exponent", hFactor(us, convFactor)); - writeAttribute_f(h_data, "a-scale exponent", aFactor(us, convFactor)); + units_conversion_factor(us, convFactor)); + writeAttribute_f(h_data, "h-scale exponent", units_h_factor(us, convFactor)); + writeAttribute_f(h_data, "a-scale exponent", units_a_factor(us, convFactor)); writeAttribute_s(h_data, "Conversion factor", buffer); /* Free and close everything */ @@ -328,14 +330,16 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total, \ - mpi_rank, offset, (char*)(&(part[0]).field), us, \ - convFactor) +#define writeArray(grp, fileName, xmfFile, pTypeGroupName, name, type, N, dim, \ + part, N_total, mpi_rank, offset, field, us, convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, pTypeGroupName, name, type, N, \ + dim, N_total, mpi_rank, offset, (char*)(&(part[0]).field), \ + sizeof(part[0]), us, convFactor) /* Import the right hydro definition */ #include "hydro_io.h" +/* Import the right gravity definition */ +#include "gravity_io.h" /** * @brief Reads an HDF5 initial condition file (GADGET-3 type) in parallel @@ -345,6 +349,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param parts (output) The array of #part read from the file. * @param N (output) The number of particles read from the file. * @param periodic (output) 1 if the volume is periodic, 0 if not. + * @param dry_run If 1, don't read the particle. Only allocates the arrays. * * Opens the HDF5 file fileName and reads the particles contained * in the parts array. N is the returned number of particles found @@ -357,16 +362,17 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * */ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info) { + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info, int dry_run) { hid_t h_file = 0, h_grp = 0; - double boxSize[3] = { - 0.0, -1.0, -1.0}; /* GADGET has only cubic boxes (in cosmological mode) */ - int numParticles[6] = { - 0}; /* GADGET has 6 particle types. We only keep the type 0*/ - int numParticles_highWord[6] = {0}; - long long offset = 0; - long long N_total = 0; + /* GADGET has only cubic boxes (in cosmological mode) */ + double boxSize[3] = {0.0, -1.0, -1.0}; + int numParticles[NUM_PARTICLE_TYPES] = {0}; + int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + size_t N[NUM_PARTICLE_TYPES] = {0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; /* Open file */ /* message("Opening file '%s' as IC.", fileName); */ @@ -398,58 +404,118 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); - N_total = ((long long)numParticles[0]) + - ((long long)numParticles_highWord[0] << 32); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = ((long long)numParticles[ptype]) + + ((long long)numParticles_highWord[ptype] << 32); + dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; - /* message("Found %d particles in a %speriodic box of size [%f %f %f].", */ - /* N_total, (periodic ? "": "non-"), dim[0], dim[1], dim[2]); */ + /* message("Found %d particles in a %speriodic box of size + * [%f %f %f].", */ + /* N_total, (periodic ? "": "non-"), dim[0], + * dim[1], dim[2]); */ /* Divide the particles among the tasks. */ - offset = mpi_rank * N_total / mpi_size; - *N = (mpi_rank + 1) * N_total / mpi_size - offset; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + offset[ptype] = mpi_rank * N_total[ptype] / mpi_size; + N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype]; + } /* Close header */ H5Gclose(h_grp); - /* Allocate memory to store particles */ - if (posix_memalign((void*)parts, part_align, *N * sizeof(struct part)) != 0) + /* Allocate memory to store SPH particles */ + *Ngas = N[0]; + if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) != + 0) error("Error while allocating memory for particles"); - bzero(*parts, *N * sizeof(struct part)); + bzero(*parts, *Ngas * sizeof(struct part)); - /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / + /* Allocate memory to store all particles */ + const size_t Ndm = N[1]; + *Ngparts = N[1] + N[0]; + if (posix_memalign((void*)gparts, gpart_align, + *Ngparts * sizeof(struct gpart)) != 0) + error( + "Error while allocating memory for gravity " + "particles"); + bzero(*gparts, *Ngparts * sizeof(struct gpart)); + + /* message("Allocated %8.2f MB for particles.", *N * + * sizeof(struct part) / * (1024.*1024.)); */ - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT); - if (h_grp < 0) error("Error while opening particle group.\n"); + /* message("BoxSize = %lf", dim[0]); */ + /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, + * *Ngparts); */ - /* Read particle fields into the particle structure */ - hydro_read_particles(h_grp, *N, N_total, offset, *parts); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Close particle group */ - H5Gclose(h_grp); + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } + + /* Read particle fields into the particle structure */ + switch (ptype) { + + case GAS: + if (!dry_run) + hydro_read_particles(h_grp, N[ptype], N_total[ptype], offset[ptype], + *parts); + break; + + case DM: + if (!dry_run) + darkmatter_read_particles(h_grp, N[ptype], N_total[ptype], + offset[ptype], *gparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + } + + /* Prepare the DM particles */ + if (!dry_run) prepare_dm_gparts(*gparts, Ndm); + + /* Now duplicate the hydro particle into gparts */ + if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + + /* message("Done Reading particles..."); */ /* Close property handler */ H5Pclose(h_plist_id); /* Close file */ H5Fclose(h_file); - - /* message("Done Reading particles..."); */ } /** - * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor + * @brief Writes an HDF5 output file (GADGET-3 type) with + *its XMF descriptor * * @param e The engine containing all the system. - * @param us The UnitSystem used for the conversion of units in the output + * @param us The UnitSystem used for the conversion of units + *in the output * - * Creates an HDF5 output file and writes the particles contained - * in the engine. If such a file already exists, it is erased and replaced + * Creates an HDF5 output file and writes the particles + *contained + * in the engine. If such a file already exists, it is + *erased and replaced * by the new one. * The companion XMF file is also updated accordingly. * @@ -459,23 +525,27 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, void write_output_parallel(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info) { - hid_t h_file = 0, h_grp = 0, h_grpsph = 0; - int N = e->s->nr_parts; + const size_t Ngas = e->s->nr_parts; + const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; - unsigned int numParticles[6] = {N, 0}; - unsigned int numParticlesHighWord[6] = {0}; - unsigned int flagEntropy[6] = {0}; - long long N_total = 0, offset = 0; - double offset_d = 0., N_d = 0., N_total_d = 0.; int numFiles = 1; struct part* parts = e->s->parts; - FILE* xmfFile = 0; + struct gpart* gparts = e->s->gparts; + struct gpart* dmparts = NULL; static int outputCount = 0; + FILE* xmfFile = 0; + + /* Number of particles of each type */ + // const size_t Ndm = Ntot - Ngas; + + /* MATTHIEU: Temporary fix to preserve master */ + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + /* MATTHIEU: End temporary fix */ /* File name */ - char fileName[200]; - sprintf(fileName, "output_%03i.hdf5", outputCount); + char fileName[FILENAME_BUFFER_SIZE]; + snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); /* First time, we need to create the XMF file */ if (outputCount == 0 && mpi_rank == 0) createXMFfile(); @@ -491,21 +561,26 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, error("Error while opening file '%s'.", fileName); } - /* Compute offset in the file and total number of particles */ - /* Done using double to allow for up to 2^50=10^15 particles */ - N_d = (double)N; - MPI_Exscan(&N_d, &offset_d, 1, MPI_DOUBLE, MPI_SUM, comm); - N_total_d = offset_d + N_d; - MPI_Bcast(&N_total_d, 1, MPI_DOUBLE, mpi_size - 1, comm); - if (N_total_d > 1.e15) - error( - "Error while computing the offset for parallel output: Simulation has " - "more than 10^15 particles.\n"); - N_total = (long long)N_total_d; - offset = (long long)offset_d; + /* Compute offset in the file and total number of + * particles */ + size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; + MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = offset[ptype] + N[ptype]; + + /* The last rank now has the correct N_total. Let's + * broadcast from there */ + MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm); - /* Write the part of the XMF file corresponding to this specific output */ - if (mpi_rank == 0) writeXMFheader(xmfFile, N_total, fileName, e->time); + /* Now everybody konws its offset and the total number of + * particles of each + * type */ + + /* Write the part of the XMF file corresponding to this + * specific output */ + if (mpi_rank == 0) writeXMFoutputheader(xmfFile, fileName, e->time); /* Open header to write simulation properties */ /* message("Writing runtime parameters..."); */ @@ -526,19 +601,28 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, /* Print the relevant information and print status */ writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); - writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, 6); double dblTime = e->time; writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1); /* GADGET-2 legacy values */ - numParticles[0] = (unsigned int)N_total; - writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, 6); - numParticlesHighWord[0] = (unsigned int)(N_total >> 32); + /* Number of particles of each type */ + unsigned int numParticles[NUM_PARTICLE_TYPES] = {0}; + unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + numParticles[ptype] = (unsigned int)N_total[ptype]; + numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32); + } + writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total, + NUM_PARTICLE_TYPES); + writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord, - 6); + NUM_PARTICLE_TYPES); double MassTable[6] = {0., 0., 0., 0., 0., 0.}; - writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, 6); - writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, 6); + writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); + unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); /* Close header */ @@ -556,21 +640,71 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, /* Print the system of Units */ writeUnitSystem(h_file, us); - /* Create SPH particles group */ - /* message("Writing particle arrays..."); */ - h_grp = - H5Gcreate(h_file, "/PartType0", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (h_grp < 0) error("Error while creating particle group.\n"); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { + + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; + + /* Add the global information for that particle type to + * the XMF meta-file */ + if (mpi_rank == 0) + writeXMFgroupheader(xmfFile, fileName, N_total[ptype], ptype); + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gcreate(h_file, partTypeGroupName, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } - /* Write particle fields from the particle structure */ - hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, mpi_rank, offset, - parts, us); + /* Read particle fields into the particle structure */ + switch (ptype) { - /* Close particle group */ - H5Gclose(h_grp); + case GAS: + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, offset[ptype], + parts, us); + + break; + + case DM: + /* Allocate temporary array */ + if (posix_memalign((void*)&dmparts, gpart_align, + Ndm * sizeof(struct gpart)) != 0) + error( + "Error while allocating temporart memory for " + "DM particles"); + bzero(dmparts, Ndm * sizeof(struct gpart)); + + /* Collect the DM particles from gpart */ + collect_dm_gparts(gparts, Ntot, dmparts, Ndm); + + /* Write DM particles */ + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, + offset[ptype], dmparts, us); + + /* Free temporary array */ + free(dmparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + if (mpi_rank == 0) writeXMFgroupfooter(xmfFile, ptype); + } /* Write LXMF file descriptor */ - if (mpi_rank == 0) writeXMFfooter(xmfFile); + if (mpi_rank == 0) writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ diff --git a/src/parallel_io.h b/src/parallel_io.h index a0589944ec845c712abde1e64e305980748db0e7..f3691cb29b8d5e7f17382f1f81ba230c3898a929 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -32,8 +32,9 @@ #if defined(HAVE_HDF5) && defined(WITH_MPI) && defined(HAVE_PARALLEL_HDF5) void read_ic_parallel(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info); + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info, int dry_run); void write_output_parallel(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..0f767bc434ef596df403fb12d3ae0f77ea546df3 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,493 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +/* Needs to be included so that strtok returns char * instead of a int *. */ +#include <string.h> +#include <stdlib.h> +#include <ctype.h> + +/* This object's header. */ +#include "parser.h" + +/* Local headers. */ +#include "error.h" + +#define PARSER_COMMENT_STRING "#" +#define PARSER_COMMENT_CHAR '#' +#define PARSER_VALUE_CHAR ':' +#define PARSER_VALUE_STRING ":" +#define PARSER_START_OF_FILE "---" +#define PARSER_END_OF_FILE "..." + +/* Private functions. */ +static int count_char(const char *str, char val); +static int is_empty(const char *str); +static int count_indentation(const char *str); +static void parse_line(char *line, struct swift_params *params); +static void parse_value(char *line, struct swift_params *params); +static void parse_section_param(char *line, int *isFirstParam, + char *sectionName, struct swift_params *params); + +static int lineNumber = 0; + +/** + * @brief Reads an input file and stores each parameter in a structure. + * + * @param file_name Name of file to be read + * @param params Structure to be populated from file + */ + +void parser_read_file(const char *file_name, struct swift_params *params) { + /* Open file for reading */ + FILE *file = fopen(file_name, "r"); + + /* Line to parsed. */ + char line[PARSER_MAX_LINE_SIZE]; + + /* Initialise parameter count. */ + params->count = 0; + + /* Check if parameter file exits. */ + if (file == NULL) { + error("Error opening parameter file: %s", file_name); + } + + /* Read until the end of the file is reached.*/ + while (!feof(file)) { + if (fgets(line, PARSER_MAX_LINE_SIZE, file) != NULL) { + lineNumber++; + parse_line(line, params); + } + } + + fclose(file); +} + +/** + * @brief Counts the number of times a specific character appears in a string. + * + * @param str String to be checked + * @param val Character to be counted + * + * @return Number of occurrences of val inside str + */ + +static int count_char(const char *str, char val) { + int count = 0; + + /* Check if the line contains the character */ + while (*str) { + if (*str++ == val) ++count; + } + + return count; +} + +/** + * @brief Counts the number of white spaces that prefix a string. + * + * @param str String to be checked + * + * @return Number of white spaces prefixing str + */ + +static int count_indentation(const char *str) { + int count = 0; + + /* Check if the line contains the character */ + while (*(++str) == ' ') { + count++; + } + return count; +} + +/** + * @brief Checks if a string is empty. + * + * @param str String to be checked + * + * @return Returns 1 if str is empty, 0 otherwise + */ + +static int is_empty(const char *str) { + int retParam = 1; + while (*str != '\0') { + if (!isspace(*str)) { + retParam = 0; + break; + } + str++; + } + + return retParam; +} + +/** + * @brief Parses a line from a file and stores any parameters in a structure. + * + * @param line Line to be parsed. + * @param params Structure to be populated from file. + */ +static void parse_line(char *line, struct swift_params *params) { + /* Parse line if it doesn't begin with a comment. */ + if (*line != PARSER_COMMENT_CHAR) { + char trim_line[PARSER_MAX_LINE_SIZE]; + char tmp_str[PARSER_MAX_LINE_SIZE]; + char *token; + + /* Remove comments at the end of a line. */ + token = strtok(line, PARSER_COMMENT_STRING); + strcpy(tmp_str, token); + + /* Check if the line is just white space. */ + if (!is_empty(tmp_str)) { + /* Trim '\n' characters from string. */ + token = strtok(tmp_str, "\n"); + strcpy(trim_line, token); + + /* Check if the line contains a value and parse it. */ + if (strchr(trim_line, PARSER_VALUE_CHAR)) { + parse_value(trim_line, params); + } + /* Check for invalid lines,not including the start and end of file. */ + /* Note: strcmp returns 0 if both strings are the same.*/ + else if (strcmp(trim_line, PARSER_START_OF_FILE) && + strcmp(trim_line, PARSER_END_OF_FILE)) { + error("Invalid line:%d '%s'.", lineNumber, trim_line); + } + } + } +} + +/** + * @brief Performs error checking and stores a parameter in a structure. + * + * @param line Line containing the parameter + * @param params Structure to be written to + * + */ + +static void parse_value(char *line, struct swift_params *params) { + static int inSection = 0; + static char section[PARSER_MAX_LINE_SIZE]; /* Keeps track of current section + name. */ + static int isFirstParam = 1; + char tmpStr[PARSER_MAX_LINE_SIZE]; + + char *token; + + /* Check for more than one value on the same line. */ + if (count_char(line, PARSER_VALUE_CHAR) > 1) { + error("Inavlid line:%d '%s', only one value allowed per line.", lineNumber, + line); + } + + /* Check that standalone parameters have correct indentation. */ + if (!inSection && *line == ' ') { + error( + "Invalid line:%d '%s', standalone parameter defined with incorrect " + "indentation.", + lineNumber, line); + } + + /* Check that it is a parameter inside a section.*/ + if (*line == ' ' || *line == '\t') { + parse_section_param(line, &isFirstParam, section, params); + } else {/*Else it is the start of a new section or standalone parameter. */ + /* Take first token as the parameter name. */ + token = strtok(line, " :\t"); + strcpy(tmpStr, token); + + /* Take second token as the parameter value. */ + token = strtok(NULL, " #\n"); + + /* If second token is NULL then the line must be a section heading. */ + if (token == NULL) { + strcat(tmpStr, PARSER_VALUE_STRING); + strcpy(section, tmpStr); + inSection = 1; + isFirstParam = 1; + } else { + /* Must be a standalone parameter so no need to prefix name with a + * section. */ + strcpy(params->data[params->count].name, tmpStr); + strcpy(params->data[params->count++].value, token); + inSection = 0; + isFirstParam = 1; + } + } +} + +/** + * @brief Parses a parameter that appears in a section and stores it in a + *structure. + * + * @param line Line containing the parameter + * @param isFirstParam Shows if the first parameter of a section has been found + * @param sectionName String containing the current section name + * @param params Structure to be written to + * + */ + +static void parse_section_param(char *line, int *isFirstParam, + char *sectionName, + struct swift_params *params) { + static int sectionIndent = 0; + char tmpStr[PARSER_MAX_LINE_SIZE]; + char paramName[PARSER_MAX_LINE_SIZE]; + char *token; + + /* Count indentation of each parameter and check that it + * is consistent with the first parameter in the section. */ + if (*isFirstParam) { + sectionIndent = count_indentation(line); + *isFirstParam = 0; + } else if (count_indentation(line) != sectionIndent) { + error("Invalid line:%d '%s', parameter has incorrect indentation.", + lineNumber, line); + } + + /* Take first token as the parameter name and trim leading white space. */ + token = strtok(line, " :\t"); + strcpy(tmpStr, token); + + /* Take second token as the parameter value. */ + token = strtok(NULL, " #\n"); + + /* Prefix the parameter name with its section name and + * copy it into the parameter structure. */ + strcpy(paramName, sectionName); + strcat(paramName, tmpStr); + strcpy(params->data[params->count].name, paramName); + strcpy(params->data[params->count++].value, token); +} + +/** + * @brief Retrieve integer parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @return Value of the parameter found + */ +int parser_get_param_int(const struct swift_params *params, const char *name) { + + char str[PARSER_MAX_LINE_SIZE]; + int retParam = 0; + + for (int i = 0; i < params->count; i++) { + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%d%s", &retParam, str) != 1) { + error( + "Tried parsing int '%s' but found '%s' with illegal integer " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return retParam; + } + } + + error("Cannot find '%s' in the structure.", name); + return 0; +} + +/** + * @brief Retrieve char parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @return Value of the parameter found + */ +char parser_get_param_char(const struct swift_params *params, + const char *name) { + + char str[PARSER_MAX_LINE_SIZE]; + char retParam = 0; + + for (int i = 0; i < params->count; i++) { + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%c%s", &retParam, str) != 1) { + error( + "Tried parsing char '%s' but found '%s' with illegal char " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return retParam; + } + } + + error("Cannot find '%s' in the structure.", name); + return 0; +} + +/** + * @brief Retrieve float parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @return Value of the parameter found + */ +float parser_get_param_float(const struct swift_params *params, + const char *name) { + + char str[PARSER_MAX_LINE_SIZE]; + float retParam = 0.f; + + for (int i = 0; i < params->count; i++) { + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%f%s", &retParam, str) != 1) { + error( + "Tried parsing float '%s' but found '%s' with illegal float " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return retParam; + } + } + + error("Cannot find '%s' in the structure.", name); + return 0.f; +} + +/** + * @brief Retrieve double parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @return Value of the parameter found + */ +double parser_get_param_double(const struct swift_params *params, + const char *name) { + + char str[PARSER_MAX_LINE_SIZE]; + double retParam = 0.; + + for (int i = 0; i < params->count; i++) { + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%lf%s", &retParam, str) != 1) { + error( + "Tried parsing double '%s' but found '%s' with illegal double " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + return retParam; + } + } + + error("Cannot find '%s' in the structure.", name); + return 0.; +} + +/** + * @brief Retrieve string parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam (return) Value of the parameter found + */ +void parser_get_param_string(const struct swift_params *params, + const char *name, char *retParam) { + for (int i = 0; i < params->count; i++) { + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + strcpy(retParam, params->data[i].value); + return; + } + } + + error("Cannot find '%s' in the structure.", name); +} + +/** + * @brief Prints the contents of the parameter structure. + * + * @param params Structure that holds the parameters + */ +void parser_print_params(const struct swift_params *params) { + printf("\n--------------------------\n"); + printf("| SWIFT Parameter File |\n"); + printf("--------------------------\n"); + + for (int i = 0; i < params->count; i++) { + printf("Parameter name: %s\n", params->data[i].name); + printf("Parameter value: %s\n", params->data[i].value); + } +} + +/** + * @brief Write the contents of the parameter structure to a file in YAML + *format. + * + * @param params Structure that holds the parameters + * @param file_name Name of file to be written + */ +void parser_write_params_to_file(const struct swift_params *params, + const char *file_name) { + FILE *file = fopen(file_name, "w"); + char section[PARSER_MAX_LINE_SIZE]; + char param_name[PARSER_MAX_LINE_SIZE]; + char *token; + + /* Start of file identifier in YAML. */ + fprintf(file, "%s\n", PARSER_START_OF_FILE); + + for (int i = 0; i < params->count; i++) { + /* Check that the parameter name contains a section name. */ + if (strchr(params->data[i].name, PARSER_VALUE_CHAR)) { + /* Copy the parameter name into a temporary string and find the section + * name. */ + strcpy(param_name, params->data[i].name); + token = strtok(param_name, PARSER_VALUE_STRING); + + /* If a new section name is found print it to the file. */ + if (strcmp(token, section)) { + strcpy(section, token); + fprintf(file, "\n%s%c\n", section, PARSER_VALUE_CHAR); + } + + /* Remove white space from parameter name and write it to the file. */ + token = strtok(NULL, " #\n"); + + fprintf(file, "\t%s%c %s\n", token, PARSER_VALUE_CHAR, + params->data[i].value); + } else { + fprintf(file, "\n%s%c %s\n", params->data[i].name, PARSER_VALUE_CHAR, + params->data[i].value); + } + } + + /* End of file identifier in YAML. */ + fprintf(file, PARSER_END_OF_FILE); + + fclose(file); +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..7b2088ae12cdd5136a96baeabd01dd80255c8a3b --- /dev/null +++ b/src/parser.h @@ -0,0 +1,55 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PARSER_H +#define SWIFT_PARSER_H + +/* Config parameters. */ +#include "../config.h" + +/* Some constants. */ +#define PARSER_MAX_LINE_SIZE 256 +#define PARSER_MAX_NO_OF_PARAMS 512 + +/* A parameter in the input file */ +struct parameter { + char name[PARSER_MAX_LINE_SIZE]; + char value[PARSER_MAX_LINE_SIZE]; +}; + +/* The array of parameters read from a file */ +struct swift_params { + struct parameter data[PARSER_MAX_NO_OF_PARAMS]; + int count; +}; + +/* Public API. */ +void parser_read_file(const char *file_name, struct swift_params *params); +void parser_print_params(const struct swift_params *params); +void parser_write_params_to_file(const struct swift_params *params, + const char *file_name); +char parser_get_param_char(const struct swift_params *params, const char *name); +int parser_get_param_int(const struct swift_params *params, const char *name); +float parser_get_param_float(const struct swift_params *params, + const char *name); +double parser_get_param_double(const struct swift_params *params, + const char *name); +void parser_get_param_string(const struct swift_params *params, + const char *name, char *retParam); + +#endif /* SWIFT_PARSER_H */ diff --git a/src/part.c b/src/part.c index 6a99325ef23a7062fafb387fa3f3bd6b2203d057..b89abdde40fe8c7a57d1e9ac9e18fece83ba1f21 100644 --- a/src/part.c +++ b/src/part.c @@ -26,33 +26,21 @@ #endif /* This object's header. */ +#include "error.h" #include "part.h" #ifdef WITH_MPI -/** - * @brief Registers and returns an MPI type for the particles - * - * @param part_type The type container - */ -void part_create_mpi_type(MPI_Datatype* part_type) { - - /* This is not the recommended way of doing this. - One should define the structure field by field - But as long as we don't do serialization via MPI-IO - we don't really care. - Also we would have to modify this function everytime something - is added to the part structure. */ - MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, - part_type); - MPI_Type_commit(part_type); -} +/* MPI data type for the particle transfers */ +MPI_Datatype part_mpi_type; +MPI_Datatype xpart_mpi_type; +MPI_Datatype gpart_mpi_type; +#endif +#ifdef WITH_MPI /** - * @brief Registers and returns an MPI type for the xparticles - * - * @param xpart_type The type container + * @brief Registers MPI particle types. */ -void xpart_create_mpi_type(MPI_Datatype* xpart_type) { +void part_create_mpi_types() { /* This is not the recommended way of doing this. One should define the structure field by field @@ -60,9 +48,20 @@ void xpart_create_mpi_type(MPI_Datatype* xpart_type) { we don't really care. Also we would have to modify this function everytime something is added to the part structure. */ - MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), MPI_BYTE, - xpart_type); - MPI_Type_commit(xpart_type); + if (MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, + &part_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&part_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for parts."); + } + if (MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), + MPI_BYTE, &xpart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&xpart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for xparts."); + } + if (MPI_Type_contiguous(sizeof(struct gpart) / sizeof(unsigned char), + MPI_BYTE, &gpart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&gpart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for gparts."); + } } - #endif diff --git a/src/part.h b/src/part.h index 865403e8c2c157dc5a8ff7a32bc41be676d7919b..5d4c9c88a1acadea3d23a3df618c04da389fb61d 100644 --- a/src/part.h +++ b/src/part.h @@ -35,8 +35,8 @@ /* Some constants. */ #define part_align 64 -#define gpart_align 32 #define xpart_align 32 +#define gpart_align 32 /* Import the right particle definition */ #if defined(MINIMAL_SPH) @@ -52,8 +52,12 @@ #include "./gravity/Default/gravity_part.h" #ifdef WITH_MPI -void part_create_mpi_type(MPI_Datatype* part_type); -void xpart_create_mpi_type(MPI_Datatype* xpart_type); +/* MPI data type for the particle transfers */ +extern MPI_Datatype part_mpi_type; +extern MPI_Datatype xpart_mpi_type; +extern MPI_Datatype gpart_mpi_type; + +void part_create_mpi_types(); #endif #endif /* SWIFT_PART_H */ diff --git a/src/partition.c b/src/partition.c index 7dbbb9552e603adee45097a379200f1493ce3349..1e5202df7df45fab9182ad625ae145e6fd221ebd 100644 --- a/src/partition.c +++ b/src/partition.c @@ -35,7 +35,7 @@ #include <stdio.h> #include <stdlib.h> #include <strings.h> -#include <values.h> +#include <float.h> /* MPI headers. */ #ifdef WITH_MPI @@ -52,6 +52,7 @@ #include "error.h" #include "partition.h" #include "space.h" +#include "tools.h" /* Maximum weight used for METIS. */ #define metis_maxweight 10000.0f @@ -424,7 +425,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, * assume the same graph structure as used in the part_ calls). */ int nr_cells = s->nr_cells; struct cell *cells = s->cells; - float wscale = 1e-3, vscale = 1e-3, wscale_buff; + float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0; int wtot = 0; int wmax = 1e9 / nr_nodes; int wmin; @@ -659,6 +660,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, split_metis(s, nr_nodes, celllist); /* Clean up. */ + free(inds); if (bothweights) free(weights_v); free(weights_e); free(celllist); @@ -907,6 +909,111 @@ void partition_initial_partition(struct partition *initial_partition, } } +/** + * @brief Initialises the partition and re-partition scheme from the parameter + *file + * + * @param partition The #partition scheme to initialise. + * @param reparttype The repartition scheme to initialise. + * @param params The parsed parameter file. + * @param nr_nodes The number of MPI nodes we are running on. + */ +void partition_init(struct partition *partition, + enum repartition_type *reparttype, + const struct swift_params *params, int nr_nodes) { + +#ifdef WITH_MPI + +/* Defaults make use of METIS if available */ +#ifdef HAVE_METIS + *reparttype = REPART_METIS_BOTH; + partition->type = INITPART_METIS_NOWEIGHT; +#else + *reparttype = REPART_NONE; + partition->type = INITPART_GRID; +#endif + + /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */ + factor(nr_nodes, &partition->grid[0], &partition->grid[1]); + factor(nr_nodes / partition->grid[1], &partition->grid[0], + &partition->grid[2]); + factor(partition->grid[0] * partition->grid[1], &partition->grid[1], + &partition->grid[0]); + + /* Now let's check what the user wants as an initial domain*/ + const char part_type = + parser_get_param_char(params, "DomainDecomposition:initial_type"); + + switch (part_type) { + case 'g': + partition->type = INITPART_GRID; + break; + case 'v': + partition->type = INITPART_VECTORIZE; + break; +#ifdef HAVE_METIS + case 'm': + partition->type = INITPART_METIS_NOWEIGHT; + break; + case 'w': + partition->type = INITPART_METIS_WEIGHT; + break; + default: + message("Invalid choice of initial partition type '%c'.", part_type); + error("Permitted values are: 'g','m','v' or 'w'."); +#else + default: + message("Invalid choice of initial partition type '%c'.", part_type); + error("Permitted values are: 'g' or 'v' when compiled without metis."); +#endif + } + + /* In case of grid, read more parameters */ + if (part_type == 'g') { + partition->grid[0] = + parser_get_param_int(params, "DomainDecomposition:initial_grid_x"); + partition->grid[1] = + parser_get_param_int(params, "DomainDecomposition:initial_grid_y"); + partition->grid[2] = + parser_get_param_int(params, "DomainDecomposition:initial_grid_z"); + } + + /* Now let's check what the user wants as a repartition strategy */ + const char repart_type = + parser_get_param_char(params, "DomainDecomposition:repartition_type"); + + switch (repart_type) { + case 'n': + *reparttype = REPART_NONE; + break; +#ifdef HAVE_METIS + case 'b': + *reparttype = REPART_METIS_BOTH; + break; + case 'e': + *reparttype = REPART_METIS_EDGE; + break; + case 'v': + *reparttype = REPART_METIS_VERTEX; + break; + case 'x': + *reparttype = REPART_METIS_VERTEX_EDGE; + break; + default: + message("Invalid choice of re-partition type '%c'.", repart_type); + error("Permitted values are: 'b','e','n', 'v' or 'x'."); +#else + default: + message("Invalid choice of re-partition type '%c'.", repart_type); + error("Permitted values are: 'n' when compiled without metis."); +#endif + } + +#else + error("SWIFT was not compiled with MPI support"); +#endif +} + /* General support */ /* =============== */ diff --git a/src/partition.h b/src/partition.h index 08906c765bbcf71b084829502e632e3159ebd0bf..b2a132ed48e48573949d16291f72218990589158 100644 --- a/src/partition.h +++ b/src/partition.h @@ -19,6 +19,7 @@ #ifndef SWIFT_PARTITION_H #define SWIFT_PARTITION_H +#include "parser.h" #include "space.h" #include "task.h" @@ -58,4 +59,8 @@ void partition_initial_partition(struct partition *initial_partition, int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeID, struct space *s); +void partition_init(struct partition *partition, + enum repartition_type *reparttypestruct, + const struct swift_params *params, int nr_nodes); + #endif /* SWIFT_PARTITION_H */ diff --git a/src/proxy.c b/src/proxy.c index 7d2e546bf945ca18c2195ea2801d1b2058cb2f58..02263a5653bdcdd2d1bf0a86523ed1a599d4bf21 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -50,11 +50,9 @@ void proxy_cells_exch1(struct proxy *p) { #ifdef WITH_MPI - int k, ind; - /* Get the number of pcells we will need to send. */ p->size_pcells_out = 0; - for (k = 0; k < p->nr_cells_out; k++) + for (int k = 0; k < p->nr_cells_out; k++) p->size_pcells_out += p->cells_out[k]->pcell_size; /* Send the number of pcells. */ @@ -70,7 +68,7 @@ void proxy_cells_exch1(struct proxy *p) { if ((p->pcells_out = malloc(sizeof(struct pcell) * p->size_pcells_out)) == NULL) error("Failed to allocate pcell_out buffer."); - for (ind = 0, k = 0; k < p->nr_cells_out; k++) { + for (int ind = 0, k = 0; k < p->nr_cells_out; k++) { memcpy(&p->pcells_out[ind], p->cells_out[k]->pcell, sizeof(struct pcell) * p->cells_out[k]->pcell_size); ind += p->cells_out[k]->pcell_size; @@ -131,16 +129,14 @@ void proxy_cells_exch2(struct proxy *p) { void proxy_addcell_in(struct proxy *p, struct cell *c) { - int k; - struct cell **temp; - /* Check if the cell is already registered with the proxy. */ - for (k = 0; k < p->nr_cells_in; k++) + for (int k = 0; k < p->nr_cells_in; k++) if (p->cells_in[k] == c) return; /* Do we need to grow the number of in cells? */ if (p->nr_cells_in == p->size_cells_in) { p->size_cells_in *= proxy_buffgrow; + struct cell **temp; if ((temp = malloc(sizeof(struct cell *) * p->size_cells_in)) == NULL) error("Failed to allocate incoming cell list."); memcpy(temp, p->cells_in, sizeof(struct cell *) * p->nr_cells_in); @@ -162,16 +158,14 @@ void proxy_addcell_in(struct proxy *p, struct cell *c) { void proxy_addcell_out(struct proxy *p, struct cell *c) { - int k; - struct cell **temp; - /* Check if the cell is already registered with the proxy. */ - for (k = 0; k < p->nr_cells_out; k++) + for (int k = 0; k < p->nr_cells_out; k++) if (p->cells_out[k] == c) return; /* Do we need to grow the number of out cells? */ if (p->nr_cells_out == p->size_cells_out) { p->size_cells_out *= proxy_buffgrow; + struct cell **temp; if ((temp = malloc(sizeof(struct cell *) * p->size_cells_out)) == NULL) error("Failed to allocate outgoing cell list."); memcpy(temp, p->cells_out, sizeof(struct cell *) * p->nr_cells_out); @@ -195,20 +189,21 @@ void proxy_parts_exch1(struct proxy *p) { #ifdef WITH_MPI /* Send the number of particles. */ - if (MPI_Isend(&p->nr_parts_out, 1, MPI_INT, p->nodeID, + p->buff_out[0] = p->nr_parts_out; + p->buff_out[1] = p->nr_gparts_out; + if (MPI_Isend(p->buff_out, 2, MPI_INT, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_out) != MPI_SUCCESS) error("Failed to isend nr of parts."); - // message( "isent particle count (%i) from node %i to node %i." , - // p->nr_parts_out , p->mynodeID , p->nodeID ); fflush(stdout); + /* message( "isent particle counts [%i, %i] from node %i to node %i." , + p->buff_out[0], p->buff_out[1], p->mynodeID , p->nodeID ); fflush(stdout); */ /* Send the particle buffers. */ if (p->nr_parts_out > 0) { - if (MPI_Isend(p->parts_out, sizeof(struct part) * p->nr_parts_out, MPI_BYTE, - p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_parts, + if (MPI_Isend(p->parts_out, p->nr_parts_out, part_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD, &p->req_parts_out) != MPI_SUCCESS || - MPI_Isend(p->xparts_out, sizeof(struct xpart) * p->nr_parts_out, - MPI_BYTE, p->nodeID, + MPI_Isend(p->xparts_out, p->nr_parts_out, xpart_mpi_type, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_xparts, MPI_COMM_WORLD, &p->req_xparts_out) != MPI_SUCCESS) error("Failed to isend part data."); @@ -219,14 +214,20 @@ void proxy_parts_exch1(struct proxy *p) { p->parts_out[k].id, p->parts_out[k].x[0], p->parts_out[k].x[1], p->parts_out[k].x[2], p->parts_out[k].h, p->nodeID);*/ } + if (p->nr_gparts_out > 0) { + if (MPI_Isend(p->gparts_out, p->nr_gparts_out, gpart_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_gparts, + MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS) + error("Failed to isend part data."); + // message( "isent gpart data (%i) to node %i." , p->nr_parts_out , + // p->nodeID ); fflush(stdout); + } /* Receive the number of particles. */ - if (MPI_Irecv(&p->nr_parts_in, 1, MPI_INT, p->nodeID, + if (MPI_Irecv(p->buff_in, 2, MPI_INT, p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_in) != MPI_SUCCESS) error("Failed to irecv nr of parts."); -// message( "irecv particle count on node %i from node %i." , p->mynodeID , -// p->nodeID ); fflush(stdout); #else error("SWIFT was not compiled with MPI support."); @@ -237,6 +238,10 @@ void proxy_parts_exch2(struct proxy *p) { #ifdef WITH_MPI + /* Unpack the incomming parts counts. */ + p->nr_parts_in = p->buff_in[0]; + p->nr_gparts_in = p->buff_in[1]; + /* Is there enough space in the buffer? */ if (p->nr_parts_in > p->size_parts_in) { do { @@ -250,19 +255,36 @@ void proxy_parts_exch2(struct proxy *p) { p->size_parts_in)) == NULL) error("Failed to re-allocate parts_in buffers."); } + if (p->nr_gparts_in > p->size_gparts_in) { + do { + p->size_gparts_in *= proxy_buffgrow; + } while (p->nr_gparts_in > p->size_gparts_in); + free(p->gparts_in); + if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_in)) == NULL) + error("Failed to re-allocate gparts_in buffers."); + } /* Receive the particle buffers. */ if (p->nr_parts_in > 0) { - if (MPI_Irecv(p->parts_in, sizeof(struct part) * p->nr_parts_in, MPI_BYTE, - p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_parts, - MPI_COMM_WORLD, &p->req_parts_in) != MPI_SUCCESS || - MPI_Irecv(p->xparts_in, sizeof(struct xpart) * p->nr_parts_in, MPI_BYTE, - p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_xparts, + if (MPI_Irecv(p->parts_in, p->nr_parts_in, part_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD, + &p->req_parts_in) != MPI_SUCCESS || + MPI_Irecv(p->xparts_in, p->nr_parts_in, xpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_xparts, MPI_COMM_WORLD, &p->req_xparts_in) != MPI_SUCCESS) error("Failed to irecv part data."); // message( "irecv particle data (%i) from node %i." , p->nr_parts_in , // p->nodeID ); fflush(stdout); } + if (p->nr_gparts_in > 0) { + if (MPI_Irecv(p->gparts_in, p->nr_gparts_in, gpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_gparts, + MPI_COMM_WORLD, &p->req_gparts_in) != MPI_SUCCESS) + error("Failed to irecv gpart data."); + // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in , + // p->nodeID ); fflush(stdout); + } #else error("SWIFT was not compiled with MPI support."); @@ -278,8 +300,8 @@ void proxy_parts_exch2(struct proxy *p) { * @param N The number of parts. */ -void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, - int N) { +void proxy_parts_load(struct proxy *p, const struct part *parts, + const struct xpart *xparts, int N) { /* Is there enough space in the buffer? */ if (p->nr_parts_out + N > p->size_parts_out) { @@ -309,6 +331,37 @@ void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, p->nr_parts_out += N; } +/** + * @brief Load parts onto a proxy for exchange. + * + * @param p The #proxy. + * @param gparts Pointer to an array of #gpart to send. + * @param N The number of parts. + */ + +void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) { + + /* Is there enough space in the buffer? */ + if (p->nr_gparts_out + N > p->size_gparts_out) { + do { + p->size_gparts_out *= proxy_buffgrow; + } while (p->nr_gparts_out + N > p->size_gparts_out); + struct gpart *tp; + if ((tp = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_out)) == NULL) + error("Failed to re-allocate gparts_out buffers."); + memcpy(tp, p->gparts_out, sizeof(struct gpart) * p->nr_gparts_out); + free(p->gparts_out); + p->gparts_out = tp; + } + + /* Copy the parts and xparts data to the buffer. */ + memcpy(&p->gparts_out[p->nr_gparts_out], gparts, sizeof(struct gpart) * N); + + /* Increase the counters. */ + p->nr_gparts_out += N; +} + /** * @brief Initialize the given proxy. * @@ -358,4 +411,20 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { error("Failed to allocate parts_out buffers."); } p->nr_parts_out = 0; + + /* Allocate the gpart send and receive buffers, if needed. */ + if (p->gparts_in == NULL) { + p->size_gparts_in = proxy_buffinit; + if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_in)) == NULL) + error("Failed to allocate gparts_in buffers."); + } + p->nr_gparts_in = 0; + if (p->gparts_out == NULL) { + p->size_gparts_out = proxy_buffinit; + if ((p->gparts_out = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_out)) == NULL) + error("Failed to allocate gparts_out buffers."); + } + p->nr_gparts_out = 0; } diff --git a/src/proxy.h b/src/proxy.h index 3cd33e0f0819ee1ecac53213630445b39c809dea..5a747187e05a78a109ce4523ebb3c9d5fe2ad717 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -32,7 +32,8 @@ #define proxy_tag_count 0 #define proxy_tag_parts 1 #define proxy_tag_xparts 2 -#define proxy_tag_cells 3 +#define proxy_tag_gparts 3 +#define proxy_tag_cells 4 /* Data structure for the proxy. */ struct proxy { @@ -53,14 +54,21 @@ struct proxy { /* The parts and xparts buffers for input and output. */ struct part *parts_in, *parts_out; struct xpart *xparts_in, *xparts_out; + struct gpart *gparts_in, *gparts_out; int size_parts_in, size_parts_out; int nr_parts_in, nr_parts_out; + int size_gparts_in, size_gparts_out; + int nr_gparts_in, nr_gparts_out; + + /* Buffer to hold the incomming/outgoing particle counts. */ + int buff_out[2], buff_in[2]; /* MPI request handles. */ #ifdef WITH_MPI MPI_Request req_parts_count_out, req_parts_count_in; MPI_Request req_parts_out, req_parts_in; MPI_Request req_xparts_out, req_xparts_in; + MPI_Request req_gparts_out, req_gparts_in; MPI_Request req_cells_count_out, req_cells_count_in; MPI_Request req_cells_out, req_cells_in; #endif @@ -68,8 +76,9 @@ struct proxy { /* Function prototypes. */ void proxy_init(struct proxy *p, int mynodeID, int nodeID); -void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, - int N); +void proxy_parts_load(struct proxy *p, const struct part *parts, + const struct xpart *xparts, int N); +void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N); void proxy_parts_exch1(struct proxy *p); void proxy_parts_exch2(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c); diff --git a/src/queue.c b/src/queue.c index a7321155100df9225526c2f19fac2b99531307e4..6b788d7376ba4bdc95f1b1d918ab52a9514e7b4a 100644 --- a/src/queue.c +++ b/src/queue.c @@ -136,9 +136,6 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, lock_type *qlock = &q->lock; struct task *res = NULL; - /* If there are no tasks, leave immediately. */ - if (q->count == 0) return NULL; - /* Grab the task lock. */ if (blocking) { if (lock_lock(qlock) != 0) error("Locking the qlock failed.\n"); @@ -146,6 +143,12 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, if (lock_trylock(qlock) != 0) return NULL; } + /* If there are no tasks, leave immediately. */ + if (q->count == 0) { + lock_unlock_blind(qlock); + return NULL; + } + /* Set some pointers we will use often. */ int *qtid = q->tid; struct task *qtasks = q->tasks; diff --git a/src/riemann/riemann_exact.h b/src/riemann/riemann_exact.h index 861dad9729794efb302638792fef6e3df43c700a..b768cde5f4f5dfd0463cc8a582a1af0a17607bbe 100644 --- a/src/riemann/riemann_exact.h +++ b/src/riemann/riemann_exact.h @@ -192,6 +192,8 @@ __attribute__((always_inline)) INLINE static GFLOAT riemann_guess_p( * * @param lower_limit Lower limit for the method (riemann_f(lower_limit) < 0) * @param upper_limit Upper limit for the method (riemann_f(upper_limit) > 0) + * @param lowf ??? Bert? + * @param upf ??? Bert? * @param error_tol Tolerance used to decide if the solution is converged * @param WL Left state vector * @param WR Right state vector diff --git a/src/runner.c b/src/runner.c index 7eedb6adc72755ba12faed5429edad43d3849451..e86a2129b013398647db416df2095a55fdb7417e 100644 --- a/src/runner.c +++ b/src/runner.c @@ -469,8 +469,10 @@ void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) { void runner_doinit(struct runner *r, struct cell *c, int timer) { - struct part *p, *parts = c->parts; + struct part *const parts = c->parts; + struct gpart *const gparts = c->gparts; const int count = c->count; + const int gcount = c->gcount; const int ti_current = r->e->ti_current; TIMER_TIC; @@ -486,7 +488,7 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) { for (int i = 0; i < count; i++) { /* Get a direct pointer on the part. */ - p = &parts[i]; + struct part *const p = &parts[i]; if (p->ti_end <= ti_current) { @@ -494,6 +496,19 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) { hydro_init_part(p); } } + + /* Loop over the gparts in this cell. */ + for (int i = 0; i < gcount; i++) { + + /* Get a direct pointer on the part. */ + struct gpart *const gp = &gparts[i]; + + if (gp->ti_end <= ti_current) { + + /* Get ready for a density calculation */ + gravity_init_part(gp); + } + } } if (timer) TIMER_TOC(timer_init); @@ -649,7 +664,7 @@ void runner_doghost(struct runner *r, struct cell *c) { } /** - * @brief Drift particles forward in time + * @brief Drift particles and g-particles forward in time * * @param r The runner thread. * @param c The cell. @@ -658,26 +673,39 @@ void runner_doghost(struct runner *r, struct cell *c) { void runner_dodrift(struct runner *r, struct cell *c, int timer) { const int nr_parts = c->count; + const int nr_gparts = c->gcount; const double timeBase = r->e->timeBase; const double dt = (r->e->ti_current - r->e->ti_old) * timeBase; - const float ti_old = r->e->ti_old; - const float ti_current = r->e->ti_current; - struct part *restrict p, *restrict parts = c->parts; - struct xpart *restrict xp, *restrict xparts = c->xparts; - float dx_max = 0.f, h_max = 0.f; - float w; + const int ti_old = r->e->ti_old; + const int ti_current = r->e->ti_current; + struct part *const parts = c->parts; + struct xpart *const xparts = c->xparts; + struct gpart *const gparts = c->gparts; + float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; TIMER_TIC /* No children? */ if (!c->split) { - /* Loop over all the particles in the cell */ + /* Loop over all the g-particles in the cell */ + for (int k = 0; k < nr_gparts; ++k) { + + /* Get a handle on the gpart. */ + struct gpart *const gp = &gparts[k]; + + /* Drift... */ + gp->x[0] += gp->v_full[0] * dt; + gp->x[1] += gp->v_full[1] * dt; + gp->x[2] += gp->v_full[2] * dt; + } + + /* Loop over all the particles in the cell (more work for these !) */ for (int k = 0; k < nr_parts; k++) { /* Get a handle on the part. */ - p = &parts[k]; - xp = &xparts[k]; + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; /* Useful quantity */ const float h_inv = 1.0f / p->h; @@ -693,32 +721,34 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { p->v[2] += p->a_hydro[2] * dt; /* Predict smoothing length */ - w = p->h_dt * h_inv * dt; - if (fabsf(w) < 0.2f) - p->h *= approx_expf(w); /* 4th order expansion of exp(w) */ + const float w1 = p->h_dt * h_inv * dt; + if (fabsf(w1) < 0.2f) + p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */ else - p->h *= expf(w); + p->h *= expf(w1); /* Predict density */ - w = -3.0f * p->h_dt * h_inv * dt; - if (fabsf(w) < 0.2f) - p->rho *= approx_expf(w); /* 4th order expansion of exp(w) */ + const float w2 = -3.0f * p->h_dt * h_inv * dt; + if (fabsf(w2) < 0.2f) + p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */ else - p->rho *= expf(w); + p->rho *= expf(w2); /* Predict the values of the extra fields */ hydro_predict_extra(p, xp, ti_old, ti_current, timeBase); - /* Compute motion since last cell construction */ - const float dx = - sqrtf((p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) + - (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) + - (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2])); - dx_max = fmaxf(dx_max, dx); + /* Compute (square of) motion since last cell construction */ + const float dx2 = (p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) + + (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) + + (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]); + dx2_max = fmaxf(dx2_max, dx2); /* Maximal smoothing length */ h_max = fmaxf(p->h, h_max); } + + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); } /* Otherwise, aggregate data from children. */ @@ -758,37 +788,105 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { const double timeBase = r->e->timeBase; const double timeBase_inv = 1.0 / r->e->timeBase; const int count = c->count; + const int gcount = c->gcount; + struct part *const parts = c->parts; + struct xpart *const xparts = c->xparts; + struct gpart *const gparts = c->gparts; const int is_fixdt = (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt; - int new_dti; - int dti_timeline; - - int updated = 0; + int updated = 0, g_updated = 0; int ti_end_min = max_nr_timesteps, ti_end_max = 0; double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0; float mom[3] = {0.0f, 0.0f, 0.0f}; float ang[3] = {0.0f, 0.0f, 0.0f}; - float x[3], v_full[3]; - struct part *restrict p, *restrict parts = c->parts; - struct xpart *restrict xp, *restrict xparts = c->xparts; TIMER_TIC /* No children? */ if (!c->split) { + /* Loop over the g-particles and kick the active ones. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *const gp = &gparts[k]; + + /* If the g-particle has no counterpart and needs to be kicked */ + if (gp->id < 0 && (is_fixdt || gp->ti_end <= ti_current)) { + + /* First, finish the force calculation */ + gravity_end_force(gp); + + /* Now we are ready to compute the next time-step size */ + int new_dti; + + if (is_fixdt) { + + /* Now we have a time step, proceed with the kick */ + new_dti = global_dt_max * timeBase_inv; + + } else { + + /* Compute the next timestep (gravity condition) */ + float new_dt = gravity_compute_timestep(gp); + + /* Limit timestep within the allowed range */ + new_dt = fminf(new_dt, global_dt_max); + new_dt = fmaxf(new_dt, global_dt_min); + + /* Convert to integer time */ + new_dti = new_dt * timeBase_inv; + + /* Recover the current timestep */ + const int current_dti = gp->ti_end - gp->ti_begin; + + /* Limit timestep increase */ + if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); + + /* Put this timestep on the time line */ + int dti_timeline = max_nr_timesteps; + while (new_dti < dti_timeline) dti_timeline /= 2; + + /* Now we have a time step, proceed with the kick */ + new_dti = dti_timeline; + } + + /* Compute the time step for this kick */ + const int ti_start = (gp->ti_begin + gp->ti_end) / 2; + const int ti_end = gp->ti_end + new_dti / 2; + const double dt = (ti_end - ti_start) * timeBase; + const double half_dt = (ti_end - gp->ti_end) * timeBase; + + /* Move particle forward in time */ + gp->ti_begin = gp->ti_end; + gp->ti_end = gp->ti_begin + new_dti; + + /* Kick particles in momentum space */ + gp->v_full[0] += gp->a_grav[0] * dt; + gp->v_full[1] += gp->a_grav[1] * dt; + gp->v_full[2] += gp->a_grav[2] * dt; + + /* Extra kick work */ + gravity_kick_extra(gp, dt, half_dt); + + /* Number of updated g-particles */ + g_updated++; + } + + /* Minimal time for next end of time-step */ + ti_end_min = min(gp->ti_end, ti_end_min); + ti_end_max = max(gp->ti_end, ti_end_max); + } + + /* Now do the hydro ones... */ + /* Loop over the particles and kick the active ones. */ for (int k = 0; k < count; k++) { /* Get a handle on the part. */ - p = &parts[k]; - xp = &xparts[k]; - - const float m = p->mass; - x[0] = p->x[0]; - x[1] = p->x[1]; - x[2] = p->x[2]; + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; /* If particle needs to be kicked */ if (is_fixdt || p->ti_end <= ti_current) { @@ -798,8 +896,10 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* And do the same of the extra variable */ hydro_end_force(p); + if (p->gpart != NULL) gravity_end_force(p->gpart); /* Now we are ready to compute the next time-step size */ + int new_dti; if (is_fixdt) { @@ -808,9 +908,13 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { } else { - /* Compute the next timestep */ + /* Compute the next timestep (hydro condition) */ const float new_dt_hydro = hydro_compute_timestep(p, xp); - const float new_dt_grav = gravity_compute_timestep(p, xp); + + /* Compute the next timestep (gravity condition) */ + float new_dt_grav = FLT_MAX; + if (p->gpart != NULL) + new_dt_grav = gravity_compute_timestep(p->gpart); float new_dt = fminf(new_dt_hydro, new_dt_grav); @@ -835,7 +939,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); /* Put this timestep on the time line */ - dti_timeline = max_nr_timesteps; + int dti_timeline = max_nr_timesteps; while (new_dti < dti_timeline) dti_timeline /= 2; /* Now we have a time step, proceed with the kick */ @@ -845,34 +949,55 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Compute the time step for this kick */ const int ti_start = (p->ti_begin + p->ti_end) / 2; const int ti_end = p->ti_end + new_dti / 2; - const float dt = (ti_end - ti_start) * timeBase; - const float half_dt = (ti_end - p->ti_end) * timeBase; + const double dt = (ti_end - ti_start) * timeBase; + const double half_dt = (ti_end - p->ti_end) * timeBase; /* Move particle forward in time */ p->ti_begin = p->ti_end; p->ti_end = p->ti_begin + new_dti; + if (p->gpart != NULL) { + p->gpart->ti_begin = p->ti_begin; + p->gpart->ti_end = p->ti_end; + } + + /* Get the acceleration */ + float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]}; + if (p->gpart != NULL) { + a_tot[0] += p->gpart->a_grav[0]; + a_tot[1] += p->gpart->a_grav[1]; + a_tot[1] += p->gpart->a_grav[2]; + } /* Kick particles in momentum space */ - xp->v_full[0] += p->a_hydro[0] * dt; - xp->v_full[1] += p->a_hydro[1] * dt; - xp->v_full[2] += p->a_hydro[2] * dt; + xp->v_full[0] += a_tot[0] * dt; + xp->v_full[1] += a_tot[1] * dt; + xp->v_full[2] += a_tot[2] * dt; + + if (p->gpart != NULL) { + p->gpart->v_full[0] = xp->v_full[0]; + p->gpart->v_full[1] = xp->v_full[1]; + p->gpart->v_full[2] = xp->v_full[2]; + } - p->v[0] = xp->v_full[0] - half_dt * p->a_hydro[0]; - p->v[1] = xp->v_full[1] - half_dt * p->a_hydro[1]; - p->v[2] = xp->v_full[2] - half_dt * p->a_hydro[2]; + /* Go back by half-step for the hydro velocity */ + p->v[0] = xp->v_full[0] - half_dt * a_tot[0]; + p->v[1] = xp->v_full[1] - half_dt * a_tot[1]; + p->v[2] = xp->v_full[2] - half_dt * a_tot[2]; /* Extra kick work */ hydro_kick_extra(p, xp, dt, half_dt); + if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt, half_dt); /* Number of updated particles */ updated++; + if (p->gpart != NULL) g_updated++; } /* Now collect quantities for statistics */ - v_full[0] = xp->v_full[0]; - v_full[1] = xp->v_full[1]; - v_full[2] = xp->v_full[2]; + const double x[3] = {p->x[0], p->x[1], p->x[2]}; + const float v_full[3] = {xp->v_full[0], xp->v_full[1], xp->v_full[2]}; + const float m = p->mass; /* Collect mass */ mass += m; @@ -906,13 +1031,14 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Loop over the progeny. */ for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; + struct cell *const cp = c->progeny[k]; /* Recurse */ runner_dokick(r, cp, 0); /* And aggregate */ updated += cp->updated; + g_updated += cp->g_updated; e_kin += cp->e_kin; e_int += cp->e_int; e_pot += cp->e_pot; @@ -930,6 +1056,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Store the values. */ c->updated = updated; + c->g_updated = g_updated; c->e_kin = e_kin; c->e_int = e_int; c->e_pot = e_pot; @@ -1057,9 +1184,12 @@ void *runner_main(void *data) { case task_type_grav_down: runner_dograv_down(r, t->ci); break; - case task_type_psort: + case task_type_part_sort: space_do_parts_sort(); break; + case task_type_gpart_sort: + space_do_gparts_sort(); + break; case task_type_split_cell: space_do_split(e->s, t->ci); break; diff --git a/src/runner_doiact.h b/src/runner_doiact.h index cf5d56e94169b44e6cd2974a3422a0bc5e4610ac..de339db6133fcc829bdc6ee0ce9e537b68982422 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1235,7 +1235,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { #else /* Does pi need to be updated too? */ - if (pi->dt <= dt_step) { + if (pi->ti_end <= ti_current) { /* Add this interaction to the symmetric queue. */ r2q2[icount2] = r2; diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index f374339da75e31b39a5295fcd8bbc23c34d8d67d..02626295a49f314fef840bc044a476f5c9cf332d 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -267,9 +267,9 @@ void runner_dograv_down(struct runner *r, struct cell *c) { /* Apply the multipole acceleration to all gparts. */ for (int k = 0; k < c->gcount; k++) { struct gpart *p = &c->gparts[k]; - p->a[0] += m->a[0]; - p->a[1] += m->a[1]; - p->a[2] += m->a[2]; + p->a_grav[0] += m->a[0]; + p->a_grav[1] += m->a[1]; + p->a_grav[2] += m->a[2]; } } } @@ -594,5 +594,4 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, if (gettimer) TIMER_TOC(timer_dosub_grav); } - #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */ diff --git a/src/scheduler.c b/src/scheduler.c index 722e344b5a86b5fbdc42c7038fd3cb00e44b2ee8..d1d343240b37f5afd5f41fecacf106b0e85f726f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -43,7 +43,7 @@ #include "cycle.h" #include "error.h" #include "intrinsics.h" -#include "kernel.h" +#include "kernel_hydro.h" #include "timers.h" /** @@ -95,39 +95,38 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, void scheduler_splittasks(struct scheduler *s) { - int j, k, ind, sid, tid = 0, redo; - struct cell *ci, *cj; - double hi, hj, shift[3]; - struct task *t, *t_old; - // float dt_step = s->dt_step; - int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0}, - {-1, -1, 11, 10, 5, 4, 2, 1}, - {-1, -1, -1, 12, 7, 6, 4, 3}, - {-1, -1, -1, -1, 8, 7, 5, 4}, - {-1, -1, -1, -1, -1, 12, 10, 9}, - {-1, -1, -1, -1, -1, -1, 11, 10}, - {-1, -1, -1, -1, -1, -1, -1, 12}}; - float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897, - 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; + const int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0}, + {-1, -1, 11, 10, 5, 4, 2, 1}, + {-1, -1, -1, 12, 7, 6, 4, 3}, + {-1, -1, -1, -1, 8, 7, 5, 4}, + {-1, -1, -1, -1, -1, 12, 10, 9}, + {-1, -1, -1, -1, -1, -1, 11, 10}, + {-1, -1, -1, -1, -1, -1, -1, 12}}; + const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, + 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, + 0.5788, 0.4025, 0.5788}; /* Loop through the tasks... */ - redo = 0; - t_old = t = NULL; + int tid = 0, redo = 0; + struct task *t_old = NULL; while (1) { /* Get a pointer on the task. */ + struct task *t = t_old; if (redo) { redo = 0; - t = t_old; } else { - if ((ind = atomic_inc(&tid)) < s->nr_tasks) + const int ind = atomic_inc(&tid); + if (ind < s->nr_tasks) t_old = t = &s->tasks[s->tasks_ind[ind]]; else break; } /* Skip sorting tasks. */ - if (t->type == task_type_psort) continue; + if (t->type == task_type_part_sort) continue; + + if (t->type == task_type_gpart_sort) continue; /* Empty task? */ if (t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) { @@ -161,7 +160,7 @@ void scheduler_splittasks(struct scheduler *s) { if (t->type == task_type_self) { /* Get a handle on the cell involved. */ - ci = t->ci; + struct cell *ci = t->ci; /* Foreign task? */ if (ci->nodeID != s->nodeID) { @@ -187,18 +186,18 @@ void scheduler_splittasks(struct scheduler *s) { redo = 1; /* Add the self task. */ - for (k = 0; ci->progeny[k] == NULL; k++) - ; - t->ci = ci->progeny[k]; - for (k += 1; k < 8; k++) + int first_child = 0; + while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + for (int k = first_child + 1; k < 8; k++) if (ci->progeny[k] != NULL) scheduler_addtask(s, task_type_self, t->subtype, 0, 0, ci->progeny[k], NULL, 0); /* Make a task for each pair of progeny. */ - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL) - for (k = j + 1; k < 8; k++) + for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL) scheduler_addtask(s, task_type_pair, t->subtype, pts[j][k], 0, ci->progeny[j], ci->progeny[k], 0); @@ -211,10 +210,10 @@ void scheduler_splittasks(struct scheduler *s) { else if (t->type == task_type_pair) { /* Get a handle on the cells involved. */ - ci = t->ci; - cj = t->cj; - hi = ci->dmin; - hj = cj->dmin; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const double hi = ci->dmin; + const double hj = cj->dmin; /* Foreign task? */ if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { @@ -224,7 +223,8 @@ void scheduler_splittasks(struct scheduler *s) { /* Get the sort ID, use space_getsid and not t->flags to make sure we get ci and cj swapped if needed. */ - sid = space_getsid(s->space, &ci, &cj, shift); + double shift[3]; + int sid = space_getsid(s->space, &ci, &cj, shift); /* Should this task be split-up? */ if (ci->split && cj->split && @@ -480,9 +480,9 @@ void scheduler_splittasks(struct scheduler *s) { /* Replace the current task. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (cj->progeny[k] != NULL) { t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[j], cj->progeny[k], 0); @@ -521,8 +521,8 @@ void scheduler_splittasks(struct scheduler *s) { else if (t->type == task_type_grav_mm) { /* Get a handle on the cells involved. */ - ci = t->ci; - cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; /* Self-interaction? */ if (cj == NULL) { @@ -546,7 +546,7 @@ void scheduler_splittasks(struct scheduler *s) { /* Split this task into tasks on its progeny. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -555,7 +555,7 @@ void scheduler_splittasks(struct scheduler *s) { } else t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none, 0, 0, ci->progeny[j], NULL, 0); - for (k = j + 1; k < 8; k++) + for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -594,7 +594,7 @@ void scheduler_splittasks(struct scheduler *s) { /* Get the opening angle theta. */ float dx[3], theta; - for (k = 0; k < 3; k++) { + for (int k = 0; k < 3; k++) { dx[k] = fabs(ci->loc[k] - cj->loc[k]); if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k]) dx[k] = -dx[k] + s->space->dim[k]; @@ -615,9 +615,9 @@ void scheduler_splittasks(struct scheduler *s) { /* Split this task into tasks on its progeny. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -663,17 +663,14 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype, int flags, int wait, struct cell *ci, struct cell *cj, int tight) { - int ind; - struct task *t; - /* Get the next free task. */ - ind = atomic_inc(&s->tasks_next); + const int ind = atomic_inc(&s->tasks_next); /* Overflow? */ if (ind >= s->size) error("Task list overflow."); /* Get a pointer to the new task. */ - t = &s->tasks[ind]; + struct task *t = &s->tasks[ind]; /* Copy the data. */ t->type = type; @@ -768,24 +765,24 @@ void scheduler_set_unlocks(struct scheduler *s) { void scheduler_ranktasks(struct scheduler *s) { - int i, j = 0, k, temp, left = 0, rank; - struct task *t, *tasks = s->tasks; - int *tid = s->tasks_ind, nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + int *tid = s->tasks_ind; + const int nr_tasks = s->nr_tasks; /* Run through the tasks and get all the waits right. */ - for (i = 0, k = 0; k < nr_tasks; k++) { + for (int k = 0; k < nr_tasks; k++) { tid[k] = k; - for (j = 0; j < tasks[k].nr_unlock_tasks; j++) + for (int j = 0; j < tasks[k].nr_unlock_tasks; j++) tasks[k].unlock_tasks[j]->wait += 1; } /* Main loop. */ - for (j = 0, rank = 0; left < nr_tasks; rank++) { + for (int j = 0, rank = 0, left = 0; left < nr_tasks; rank++) { /* Load the tids of tasks with no waits. */ - for (k = left; k < nr_tasks; k++) + for (int k = left; k < nr_tasks; k++) if (tasks[tid[k]].wait == 0) { - temp = tid[j]; + int temp = tid[j]; tid[j] = tid[k]; tid[k] = temp; j += 1; @@ -795,15 +792,16 @@ void scheduler_ranktasks(struct scheduler *s) { if (j == left) error("Unsatisfiable task dependencies detected."); /* Unlock the next layer of tasks. */ - for (i = left; i < j; i++) { - t = &tasks[tid[i]]; + for (int i = left; i < j; i++) { + struct task *t = &tasks[tid[i]]; t->rank = rank; tid[i] = t - tasks; if (tid[i] >= nr_tasks) error("Task index overshoot."); /* message( "task %i of type %s has rank %i." , i , (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */ - for (k = 0; k < t->nr_unlock_tasks; k++) t->unlock_tasks[k]->wait -= 1; + for (int k = 0; k < t->nr_unlock_tasks; k++) + t->unlock_tasks[k]->wait -= 1; } /* The new left (no, not tony). */ @@ -825,8 +823,6 @@ void scheduler_ranktasks(struct scheduler *s) { void scheduler_reset(struct scheduler *s, int size) { - int k; - /* Do we need to re-allocate? */ if (size > s->size) { @@ -853,7 +849,7 @@ void scheduler_reset(struct scheduler *s, int size) { s->nr_unlocks = 0; /* Set the task pointers in the queues. */ - for (k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; + for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; } /** @@ -864,21 +860,23 @@ void scheduler_reset(struct scheduler *s, int size) { void scheduler_reweight(struct scheduler *s) { - int k, j, nr_tasks = s->nr_tasks, *tid = s->tasks_ind; - struct task *t, *tasks = s->tasks; - int nodeID = s->nodeID; - float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897, - 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; - float wscale = 0.001; + const int nr_tasks = s->nr_tasks; + int *tid = s->tasks_ind; + struct task *tasks = s->tasks; + const int nodeID = s->nodeID; + const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, + 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, + 0.5788, 0.4025, 0.5788}; + const float wscale = 0.001; // ticks tic; /* Run through the tasks backwards and set their waits and weights. */ // tic = getticks(); - for (k = nr_tasks - 1; k >= 0; k--) { - t = &tasks[tid[k]]; + for (int k = nr_tasks - 1; k >= 0; k--) { + struct task *t = &tasks[tid[k]]; t->weight = 0; - for (j = 0; j < t->nr_unlock_tasks; j++) + for (int j = 0; j < t->nr_unlock_tasks; j++) if (t->unlock_tasks[j]->weight > t->weight) t->weight = t->unlock_tasks[j]->weight; if (!t->implicit && t->tic > 0) @@ -959,8 +957,9 @@ void scheduler_reweight(struct scheduler *s) { void scheduler_start(struct scheduler *s, unsigned int mask, unsigned int submask) { - int nr_tasks = s->nr_tasks, *tid = s->tasks_ind; - struct task *t, *tasks = s->tasks; + const int nr_tasks = s->nr_tasks; + int *tid = s->tasks_ind; + struct task *tasks = s->tasks; // ticks tic; /* Store the masks */ @@ -986,8 +985,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, const int waiting_old = s->waiting; /* We are going to use the task structure in a modified way to pass - information - to the task. Don't do this at home ! + information to the task. Don't do this at home ! - ci and cj will give the range of tasks to which the waits will be applied - the flags will be used to transfer the mask - the rank will be used to transfer the submask @@ -1012,6 +1010,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, /* Wait for the rewait tasks to have executed. */ pthread_mutex_lock(&s->sleep_mutex); + pthread_cond_broadcast(&s->sleep_cond); while (s->waiting > waiting_old) { pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); } @@ -1025,7 +1024,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, /* Loop over the tasks and enqueue whoever is ready. */ // tic = getticks(); for (int k = 0; k < s->nr_tasks; k++) { - t = &tasks[tid[k]]; + struct task *t = &tasks[tid[k]]; if (atomic_dec(&t->wait) == 1 && ((1 << t->type) & s->mask) && ((1 << t->subtype) & s->submask) && !t->skip) { scheduler_enqueue(s, t); @@ -1033,6 +1032,11 @@ void scheduler_start(struct scheduler *s, unsigned int mask, } } + /* To be safe, fire of one last sleep_cond in a safe way. */ + pthread_mutex_lock(&s->sleep_mutex); + pthread_cond_broadcast(&s->sleep_cond); + pthread_mutex_unlock(&s->sleep_mutex); + // message( "enqueueing tasks took %.3f %s." , // clocks_from_ticks( getticks() - tic ), clocks_getunit()); } @@ -1046,10 +1050,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask, void scheduler_enqueue(struct scheduler *s, struct task *t) { + /* The target queue for this task. */ int qid = -1; -#ifdef WITH_MPI - int err; -#endif /* Fail if this task has already been enqueued before. */ if (t->rid >= 0) error("Task has already been enqueued."); @@ -1071,6 +1073,9 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { /* Otherwise, look for a suitable queue. */ else { +#ifdef WITH_MPI + int err; +#endif /* Find the previous owner for each task type, and do any pre-processing needed. */ @@ -1093,13 +1098,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - if ((err = MPI_Irecv(t->ci->parts, t->ci->count, s->part_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, - &t->req)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(err, buff, &len); - error("Failed to emit irecv for particle data (%s).", buff); + err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (err != MPI_SUCCESS) { + mpi_error(err, "Failed to emit irecv for particle data."); } // message( "receiving %i parts with tag=%i from %i to %i." , // t->ci->count , t->flags , t->ci->nodeID , s->nodeID ); @@ -1111,13 +1113,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - if ((err = MPI_Isend(t->ci->parts, t->ci->count, s->part_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, - &t->req)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(err, buff, &len); - error("Failed to emit isend for particle data (%s).", buff); + err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (err != MPI_SUCCESS) { + mpi_error(err, "Failed to emit isend for particle data."); } // message( "sending %i parts with tag=%i from %i to %i." , // t->ci->count , t->flags , s->nodeID , t->cj->nodeID ); @@ -1133,7 +1132,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { if (qid >= s->nr_queues) error("Bad computed qid."); - /* If no previous owner, find the shortest queue. */ + /* If no previous owner, pick a random queue. */ if (qid < 0) qid = rand() % s->nr_queues; /* Increase the waiting counter. */ @@ -1164,7 +1163,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { for (int k = 0; k < t->nr_unlock_tasks; k++) { struct task *t2 = t->unlock_tasks[k]; - int res = atomic_dec(&t2->wait); + const int res = atomic_dec(&t2->wait); if (res < 1) { error("Negative wait!"); } else if (res == 1) { @@ -1203,7 +1202,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) { they are ready. */ for (int k = 0; k < t->nr_unlock_tasks; k++) { struct task *t2 = t->unlock_tasks[k]; - int res = atomic_dec(&t2->wait); + const int res = atomic_dec(&t2->wait); if (res < 1) { error("Negative wait!"); } else if (res == 1) { @@ -1240,7 +1239,7 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, const struct task *prev) { struct task *res = NULL; - int k, nr_queues = s->nr_queues; + const int nr_queues = s->nr_queues; unsigned int seed = qid; /* Check qid. */ @@ -1264,10 +1263,10 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, /* If unsuccessful, try stealing from the other queues. */ if (s->flags & scheduler_flag_steal) { int count = 0, qids[nr_queues]; - for (k = 0; k < nr_queues; k++) + for (int k = 0; k < nr_queues; k++) if (s->queues[k].count > 0) qids[count++] = k; - for (k = 0; k < scheduler_maxsteal && count > 0; k++) { - int ind = rand_r(&seed) % count; + for (int k = 0; k < scheduler_maxsteal && count > 0; k++) { + const int ind = rand_r(&seed) % count; TIMER_TIC res = queue_gettask(&s->queues[qids[ind]], prev, 0); TIMER_TOC(timer_qsteal); @@ -1287,7 +1286,10 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, if (res == NULL) { #endif pthread_mutex_lock(&s->sleep_mutex); - if (s->waiting > 0) pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); + res = queue_gettask(&s->queues[qid], prev, 1); + if (res == NULL && s->waiting > 0) { + pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); + } pthread_mutex_unlock(&s->sleep_mutex); } } @@ -1352,12 +1354,6 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, s->tasks = NULL; s->tasks_ind = NULL; scheduler_reset(s, nr_tasks); - -/* Construct types for MPI communications */ -#ifdef WITH_MPI - part_create_mpi_type(&s->part_mpi_type); - xpart_create_mpi_type(&s->xpart_mpi_type); -#endif } /** @@ -1366,7 +1362,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, * @param s The #scheduler * @param fileName Name of the file to write to */ -void scheduler_print_tasks(struct scheduler *s, char *fileName) { +void scheduler_print_tasks(const struct scheduler *s, const char *fileName) { const int nr_tasks = s->nr_tasks, *tid = s->tasks_ind; struct task *t, *tasks = s->tasks; diff --git a/src/scheduler.h b/src/scheduler.h index 3f2d8c289d0d691d0d155b20ae0522c5830524aa..64c694aea295c13810a20b626055fc6c15eb0af8 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -100,12 +100,6 @@ struct scheduler { /* The node we are working on. */ int nodeID; - -#ifdef WITH_MPI - /* MPI data type for the particle transfers */ - MPI_Datatype part_mpi_type; - MPI_Datatype xpart_mpi_type; -#endif }; /* Function prototypes. */ @@ -128,7 +122,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t); void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb); void scheduler_set_unlocks(struct scheduler *s); void scheduler_dump_queue(struct scheduler *s); -void scheduler_print_tasks(struct scheduler *s, char *fileName); +void scheduler_print_tasks(const struct scheduler *s, const char *fileName); void scheduler_do_rewait(struct task *t_begin, struct task *t_end, unsigned int mask, unsigned int submask); diff --git a/src/serial_io.c b/src/serial_io.c index 8e63db5cfad3a3b50fc7e350bbac6ce09708230a..10eab97f1bf118a842e274b521056d0d81b32db1 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -57,18 +57,18 @@ * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param importance If COMPULSORY, the data must be present in the IC file. If *OPTIONAL, the array will be zeroed when the data is not present. * * @todo A better version using HDF5 hyper-slabs to read the file directly into *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int dim, long long N_total, long long offset, - char* part_c, enum DATA_IMPORTANCE importance) { + char* part_c, size_t partSize, + enum DATA_IMPORTANCE importance) { hid_t h_data = 0, h_err = 0, h_type = 0, h_memspace = 0, h_filespace = 0; hsize_t shape[2], offsets[2]; htri_t exist = 0; @@ -76,7 +76,6 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; /* Check whether the dataspace exists or not */ @@ -172,9 +171,10 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * Routines writing an output file *-----------------------------------------------------------------------------*/ -void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, long long N_total, int dim, - struct UnitSystem* us, enum UnitConversionFactor convFactor) { +void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + long long N_total, int dim, struct UnitSystem* us, + enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_space = 0, h_prop = 0; int rank = 0; hsize_t shape[2]; @@ -234,14 +234,14 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - writeXMFline(xmfFile, fileName, name, N_total, dim, type); + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N_total, dim, type); /* Write unit conversion factors for this data set */ - conversionString(buffer, us, convFactor); + units_conversion_string(buffer, us, convFactor); writeAttribute_d(h_data, "CGS conversion factor", - conversionFactor(us, convFactor)); - writeAttribute_f(h_data, "h-scale exponent", hFactor(us, convFactor)); - writeAttribute_f(h_data, "a-scale exponent", aFactor(us, convFactor)); + units_conversion_factor(us, convFactor)); + writeAttribute_f(h_data, "h-scale exponent", units_h_factor(us, convFactor)); + writeAttribute_f(h_data, "a-scale exponent", units_a_factor(us, convFactor)); writeAttribute_s(h_data, "Conversion factor", buffer); H5Pclose(h_prop); @@ -255,21 +255,22 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param grp The group in which to write. * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array to write. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param us The UnitSystem currently in use - * @param convFactor The UnitConversionFactor for this array - * - * - * Calls #error() if an error occurs. + * @param convFactor The UnitConversionFactor for this arrayo */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, long long N_total, - int mpi_rank, long long offset, char* part_c, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, long long N_total, int mpi_rank, + long long offset, char* part_c, size_t partSize, struct UnitSystem* us, enum UnitConversionFactor convFactor) { @@ -279,15 +280,14 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; /* message("Writing '%s' array...", name); */ /* Prepare the arrays in the file */ if (mpi_rank == 0) - prepareArray(grp, fileName, xmfFile, name, type, N_total, dim, us, - convFactor); + prepareArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N_total, + dim, us, convFactor); /* Allocate temporary buffer */ temp = malloc(N * dim * sizeOfType(type)); @@ -362,7 +362,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, #define readArray(grp, name, type, N, dim, part, N_total, offset, field, \ importance) \ readArrayBackEnd(grp, name, type, N, dim, N_total, offset, \ - (char*)(&(part[0]).field), importance) + (char*)(&(part[0]).field), sizeof(part[0]), importance) /** * @brief A helper macro to call the readArrayBackEnd function more easily. @@ -371,34 +371,48 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param fileName Unused parameter in non-MPI mode * @param xmfFile Unused parameter in non-MPI mode * @param name The name of the array to write. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part A (char*) pointer on the first occurrence of the field of - *interest - *in the parts array + *interest in the parts array + * @param N_total Unused parameter in non-MPI mode + * @param mpi_rank Unused parameter in non-MPI mode + * @param offset Unused parameter in non-MPI mode * @param field The name (code name) of the field to read from. * @param us The UnitSystem currently in use * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total, \ - mpi_rank, offset, (char*)(&(part[0]).field), us, \ - convFactor) +#define writeArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, part, N_total, mpi_rank, offset, field, us, \ + convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, N_total, mpi_rank, offset, (char*)(&(part[0]).field), \ + sizeof(part[0]), us, convFactor) /* Import the right hydro definition */ #include "hydro_io.h" +/* Import the right gravity definition */ +#include "gravity_io.h" /** * @brief Reads an HDF5 initial condition file (GADGET-3 type) * * @param fileName The file to read. * @param dim (output) The dimension of the volume read from the file. - * @param parts (output) The array of #part read from the file. - * @param N (output) The number of particles read from the file. + * @param parts (output) The array of #part (gas particles) read from the file. + * @param gparts (output) The array of #gpart read from the file. + * @param Ngas (output) The number of #part read from the file on that node. + * @param Ngparts (output) The number of #gpart read from the file on that node. * @param periodic (output) 1 if the volume is periodic, 0 if not. + * @param mpi_rank The MPI rank of this node + * @param mpi_size The number of MPI ranks + * @param comm The MPI communicator + * @param info The MPI information object + * @param dry_run If 1, don't read the particle. Only allocates the arrays. * * Opens the HDF5 file fileName and reads the particles contained * in the parts array. N is the returned number of particles found @@ -407,21 +421,20 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @warning Can not read snapshot distributed over more than 1 file !!! * @todo Read snapshots distributed in more than one file. * - * Calls #error() if an error occurs. - * */ void read_ic_serial(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info) { + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info, int dry_run) { hid_t h_file = 0, h_grp = 0; - double boxSize[3] = {0.0, -1.0, -1.0}; /* GADGET has only cubic boxes (in cosmological mode) */ - int numParticles[6] = {0}; - /* GADGET has 6 particle types. We only keep the type 0*/ - int numParticles_highWord[6] = {0}; - long long offset = 0; - long long N_total = 0; - int rank; + double boxSize[3] = {0.0, -1.0, -1.0}; + /* GADGET has 6 particle types. We only keep the type 0 & 1 for now*/ + int numParticles[NUM_PARTICLE_TYPES] = {0}; + int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + size_t N[NUM_PARTICLE_TYPES] = {0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; /* First read some information about the content */ if (mpi_rank == 0) { @@ -453,8 +466,10 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); - N_total = ((long long)numParticles[0]) + - ((long long)numParticles_highWord[0] << 32); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = ((long long)numParticles[ptype]) + + ((long long)numParticles_highWord[ptype] << 32); + dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; @@ -474,22 +489,40 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, /* Now need to broadcast that information to all ranks. */ MPI_Bcast(periodic, 1, MPI_INT, 0, comm); - MPI_Bcast(&N_total, 1, MPI_LONG_LONG, 0, comm); + MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG, 0, comm); MPI_Bcast(dim, 3, MPI_DOUBLE, 0, comm); /* Divide the particles among the tasks. */ - offset = mpi_rank * N_total / mpi_size; - *N = (mpi_rank + 1) * N_total / mpi_size - offset; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + offset[ptype] = mpi_rank * N_total[ptype] / mpi_size; + N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype]; + } - /* Allocate memory to store particles */ - if (posix_memalign((void*)parts, part_align, (*N) * sizeof(struct part)) != 0) + /* Allocate memory to store SPH particles */ + *Ngas = N[0]; + if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) != + 0) error("Error while allocating memory for particles"); - bzero(*parts, *N * sizeof(struct part)); + bzero(*parts, *Ngas * sizeof(struct part)); + + /* Allocate memory to store all particles */ + const size_t Ndm = N[1]; + *Ngparts = N[1] + N[0]; + if (posix_memalign((void*)gparts, gpart_align, + *Ngparts * sizeof(struct gpart)) != 0) + error("Error while allocating memory for gravity particles"); + bzero(*gparts, *Ngparts * sizeof(struct gpart)); + /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / */ /* (1024.*1024.)); */ + /* message("BoxSize = %lf", dim[0]); */ + /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */ + + /* For dry runs, only need to do this on rank 0 */ + if (dry_run) mpi_size = 1; /* Now loop over ranks and read the data */ - for (rank = 0; rank < mpi_size; ++rank) { + for (int rank = 0; rank < mpi_size; ++rank) { /* Is it this rank's turn to read ? */ if (rank == mpi_rank) { @@ -498,17 +531,43 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, if (h_file < 0) error("Error while opening file '%s' on rank %d.", fileName, mpi_rank); - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT); - if (h_grp < 0) - error("Error while opening particle group on rank %d.\n", mpi_rank); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Read particle fields into the particle structure */ - hydro_read_particles(h_grp, *N, N_total, offset, *parts); + /* Don't do anything if no particle of this kind */ + if (N[ptype] == 0) continue; - /* Close particle group */ - H5Gclose(h_grp); + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } + + /* Read particle fields into the particle structure */ + switch (ptype) { + + case GAS: + if (!dry_run) + hydro_read_particles(h_grp, N[ptype], N_total[ptype], + offset[ptype], *parts); + break; + + case DM: + if (!dry_run) + darkmatter_read_particles(h_grp, N[ptype], N_total[ptype], + offset[ptype], *gparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + } /* Close file */ H5Fclose(h_file); @@ -518,6 +577,12 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, MPI_Barrier(comm); } + /* Prepare the DM particles */ + if (!dry_run) prepare_dm_gparts(*gparts, Ndm); + + /* Now duplicate the hydro particle into gparts */ + if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + /* message("Done Reading particles..."); */ } @@ -525,7 +590,11 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor * * @param e The engine containing all the system. - * @param us The UnitSystem used for the conversion of units in the output + * @param us The UnitSystem used for the conversion of units in the output. + * @param mpi_rank The MPI rank of this node. + * @param mpi_size The number of MPI ranks. + * @param comm The MPI communicator. + * @param info The MPI information object * * Creates an HDF5 output file and writes the particles contained * in the engine. If such a file already exists, it is erased and replaced @@ -538,35 +607,40 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info) { hid_t h_file = 0, h_grp = 0, h_grpsph = 0; - int N = e->s->nr_parts; + const size_t Ngas = e->s->nr_parts; + const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; - int numParticles[6] = {N, 0}; - int numParticlesHighWord[6] = {0}; - unsigned int flagEntropy[6] = {0}; - long long N_total = 0, offset = 0; - double offset_d = 0., N_d = 0., N_total_d = 0.; int numFiles = 1; - int rank = 0; struct part* parts = e->s->parts; - FILE* xmfFile = 0; + struct gpart* gparts = e->s->gparts; + struct gpart* dmparts = NULL; static int outputCount = 0; + FILE* xmfFile = 0; + + /* Number of particles of each type */ + // const size_t Ndm = Ntot - Ngas; + + /* MATTHIEU: Temporary fix to preserve master */ + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + /* MATTHIEU: End temporary fix */ /* File name */ - char fileName[200]; - sprintf(fileName, "output_%03i.hdf5", outputCount); + char fileName[FILENAME_BUFFER_SIZE]; + snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); /* Compute offset in the file and total number of particles */ - /* Done using double to allow for up to 2^50=10^15 particles */ - N_d = (double)N; - MPI_Exscan(&N_d, &offset_d, 1, MPI_DOUBLE, MPI_SUM, comm); - N_total_d = offset_d + N_d; - MPI_Bcast(&N_total_d, 1, MPI_DOUBLE, mpi_size - 1, comm); - if (N_total_d > 1.e15) - error( - "Error while computing the offset for parallel output: Simulation has " - "more than 10^15 particles.\n"); - N_total = (long long)N_total_d; - offset = (long long)offset_d; + size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; + MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = offset[ptype] + N[ptype]; + + /* The last rank now has the correct N_total. Let's broadcast from there */ + MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm); + + /* Now everybody konws its offset and the total number of particles of each + * type */ /* Do common stuff first */ if (mpi_rank == 0) { @@ -578,7 +652,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, xmfFile = prepareXMFfile(); /* Write the part corresponding to this specific output */ - writeXMFheader(xmfFile, N_total, fileName, e->time); + writeXMFoutputheader(xmfFile, fileName, e->time); /* Open file */ /* message("Opening file '%s'.", fileName); */ @@ -610,15 +684,24 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1); /* GADGET-2 legacy values */ - numParticles[0] = (unsigned int)N_total; - writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, 6); - writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, 6); - numParticlesHighWord[0] = (unsigned int)(N_total >> 32); + /* Number of particles of each type */ + unsigned int numParticles[NUM_PARTICLE_TYPES] = {0}; + unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + numParticles[ptype] = (unsigned int)N_total[ptype]; + numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32); + } + writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total, + NUM_PARTICLE_TYPES); + writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord, - 6); + NUM_PARTICLE_TYPES); double MassTable[6] = {0., 0., 0., 0., 0., 0.}; - writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, 6); - writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, 6); + writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); + unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); /* Close header */ @@ -636,21 +719,32 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, /* Print the system of Units */ writeUnitSystem(h_file, us); - /* Create SPH particles group */ - /* message("Writing particle arrays..."); */ - h_grp = - H5Gcreate(h_file, "/PartType0", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (h_grp < 0) error("Error while creating particle group.\n"); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Close particle group */ - H5Gclose(h_grp); + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gcreate(h_file, partTypeGroupName, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) { + error("Error while creating particle group.\n"); + } + + /* Close particle group */ + H5Gclose(h_grp); + } /* Close file */ H5Fclose(h_file); } /* Now loop over ranks and write the data */ - for (rank = 0; rank < mpi_size; ++rank) { + for (int rank = 0; rank < mpi_size; ++rank) { /* Is it this rank's turn to write ? */ if (rank == mpi_rank) { @@ -659,18 +753,65 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, if (h_file < 0) error("Error while opening file '%s' on rank %d.", fileName, mpi_rank); - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT); - if (h_grp < 0) - error("Error while opening particle group on rank %d.\n", mpi_rank); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Write particle fields from the particle structure */ - hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, mpi_rank, - offset, parts, us); + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; - /* Close particle group */ - H5Gclose(h_grp); + /* Add the global information for that particle type to the XMF + * meta-file */ + if (mpi_rank == 0) + writeXMFgroupheader(xmfFile, fileName, N_total[ptype], ptype); + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } + + /* Read particle fields into the particle structure */ + switch (ptype) { + + case GAS: + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, + offset[ptype], parts, us); + + break; + + case DM: + /* Allocate temporary array */ + if (posix_memalign((void*)&dmparts, gpart_align, + Ndm * sizeof(struct gpart)) != 0) + error("Error while allocating temporart memory for DM particles"); + bzero(dmparts, Ndm * sizeof(struct gpart)); + + /* Collect the DM particles from gpart */ + collect_dm_gparts(gparts, Ntot, dmparts, Ndm); + + /* Write DM particles */ + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, + xmfFile, N[ptype], N_total[ptype], + mpi_rank, offset[ptype], dmparts, us); + + /* Free temporary array */ + free(dmparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + if (mpi_rank == 0) writeXMFgroupfooter(xmfFile, ptype); + } /* Close file */ H5Fclose(h_file); @@ -681,7 +822,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, } /* Write footer of LXMF file descriptor */ - if (mpi_rank == 0) writeXMFfooter(xmfFile); + if (mpi_rank == 0) writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ ++outputCount; diff --git a/src/serial_io.h b/src/serial_io.h index 95f09f5977a97a359e978db7a1b71b02030d6a14..74ab8326dbeeb955e354687059cdd595657285f0 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -32,8 +32,9 @@ #if defined(HAVE_HDF5) && defined(WITH_MPI) && !defined(HAVE_PARALLEL_HDF5) void read_ic_serial(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info); + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info, int dry_run); void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info); diff --git a/src/single_io.c b/src/single_io.c index 59686a68b5d9e5ea41267ba7b3aad9391862fae4..1dc71087e102ff884dba7b7d4b6dcd6339335cac 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -39,9 +39,6 @@ #include "common_io.h" #include "error.h" -#define FILENAME_BUFFER_SIZE 150 -#define PARTICLE_GROUP_BUFFER_SIZE 20 - /*----------------------------------------------------------------------------- * Routines reading an IC file *-----------------------------------------------------------------------------*/ @@ -56,24 +53,23 @@ * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param importance If COMPULSORY, the data must be present in the IC file. If *OPTIONAL, the array will be zeroed when the data is not present. * * @todo A better version using HDF5 hyper-slabs to read the file directly into *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, - int dim, char* part_c, enum DATA_IMPORTANCE importance) { + int dim, char* part_c, size_t partSize, + enum DATA_IMPORTANCE importance) { hid_t h_data = 0, h_err = 0, h_type = 0; htri_t exist = 0; void* temp; int i = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; /* Check whether the dataspace exists or not */ @@ -141,23 +137,25 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * @param grp The group in which to write. * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array to write. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of - *interest in the parts array + *interest in the parts array. + * @param partSize The size in bytes of the particle structure. * @param us The UnitSystem currently in use * @param convFactor The UnitConversionFactor for this array * * @todo A better version using HDF5 hyper-slabs to write the file directly from *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, char* part_c, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, char* part_c, size_t partSize, struct UnitSystem* us, enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_space = 0, h_prop = 0; @@ -165,7 +163,6 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; hsize_t shape[2]; hsize_t chunk_shape[2]; @@ -204,7 +201,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, /* Make sure the chunks are not larger than the dataset */ if (chunk_shape[0] > N) chunk_shape[0] = N; - + /* Change shape of data space */ h_err = H5Sset_extent_simple(h_space, rank, shape, NULL); if (h_err < 0) { @@ -241,14 +238,14 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - writeXMFline(xmfFile, fileName, name, N, dim, type); + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N, dim, type); /* Write unit conversion factors for this data set */ - conversionString(buffer, us, convFactor); + units_conversion_string(buffer, us, convFactor); writeAttribute_d(h_data, "CGS conversion factor", - conversionFactor(us, convFactor)); - writeAttribute_f(h_data, "h-scale exponent", hFactor(us, convFactor)); - writeAttribute_f(h_data, "a-scale exponent", aFactor(us, convFactor)); + units_conversion_factor(us, convFactor)); + writeAttribute_f(h_data, "h-scale exponent", units_h_factor(us, convFactor)); + writeAttribute_f(h_data, "a-scale exponent", units_a_factor(us, convFactor)); writeAttribute_s(h_data, "Conversion factor", buffer); /* Free and close everything */ @@ -276,7 +273,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, #define readArray(grp, name, type, N, dim, part, N_total, offset, field, \ importance) \ readArrayBackEnd(grp, name, type, N, dim, (char*)(&(part[0]).field), \ - importance) + sizeof(part[0]), importance) /** * @brief A helper macro to call the readArrayBackEnd function more easily. @@ -285,6 +282,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description * @param name The name of the array to write. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) @@ -298,10 +297,12 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, \ - (char*)(&(part[0]).field), us, convFactor) +#define writeArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, part, N_total, mpi_rank, offset, field, us, \ + convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, (char*)(&(part[0]).field), sizeof(part[0]), us, \ + convFactor) /* Import the right hydro definition */ #include "hydro_io.h" @@ -314,10 +315,11 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param fileName The file to read. * @param dim (output) The dimension of the volume. * @param parts (output) Array of Gas particles. - * @param gparts (output) Array of DM particles. + * @param gparts (output) Array of #gpart particles. * @param Ngas (output) number of Gas particles read. - * @param Ngparts (output) The number of DM particles read. + * @param Ngparts (output) The number of #gpart read. * @param periodic (output) 1 if the volume is periodic, 0 if not. + * @param dry_run If 1, don't read the particle. Only allocates the arrays. * * Opens the HDF5 file fileName and reads the particles contained * in the parts array. N is the returned number of particles found @@ -326,17 +328,17 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @warning Can not read snapshot distributed over more than 1 file !!! * @todo Read snapshots distributed in more than one file. * - * Calls #error() if an error occurs. - * */ void read_ic_single(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ngparts, - int* periodic) { + int* periodic, int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; /* GADGET has 6 particle types. We only keep the type 0 & 1 for now...*/ int numParticles[NUM_PARTICLE_TYPES] = {0}; + int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + size_t N[NUM_PARTICLE_TYPES] = {0}; size_t Ndm; /* Open file */ @@ -365,9 +367,12 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, /* Read the relevant information and print status */ readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); readAttribute(h_grp, "NumPart_Total", UINT, numParticles); + readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); + + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N[ptype] = ((long long)numParticles[ptype]) + + ((long long)numParticles_highWord[ptype] << 32); - *Ngas = numParticles[0]; - Ndm = numParticles[1]; dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; @@ -378,16 +383,16 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, /* Close header */ H5Gclose(h_grp); - /* Total number of particles */ - *Ngparts = *Ngas + Ndm; - /* Allocate memory to store SPH particles */ + *Ngas = N[0]; if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) != 0) error("Error while allocating memory for SPH particles"); bzero(*parts, *Ngas * sizeof(struct part)); /* Allocate memory to store all particles */ + Ndm = N[1]; + *Ngparts = N[1] + N[0]; if (posix_memalign((void*)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -396,16 +401,14 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / * (1024.*1024.)); */ - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - message("BoxSize = %lf", dim[0]); - message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); + /* message("BoxSize = %lf", dim[0]); */ + /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */ /* Loop over all particle types */ for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { /* Don't do anything if no particle of this kind */ - if (numParticles[ptype] == 0) continue; + if (N[ptype] == 0) continue; /* Open the particle group in the file */ char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; @@ -422,11 +425,11 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, switch (ptype) { case GAS: - hydro_read_particles(h_grp, *Ngas, *Ngas, 0, *parts); + if (!dry_run) hydro_read_particles(h_grp, *Ngas, *Ngas, 0, *parts); break; case DM: - darkmatter_read_particles(h_grp, Ndm, Ndm, 0, *gparts); + if (!dry_run) darkmatter_read_particles(h_grp, Ndm, Ndm, 0, *gparts); break; default: @@ -438,10 +441,10 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, } /* Prepare the DM particles */ - prepare_dm_gparts(*gparts, Ndm); + if (!dry_run) prepare_dm_gparts(*gparts, Ndm); /* Now duplicate the hydro particle into gparts */ - duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); /* message("Done Reading particles..."); */ @@ -476,10 +479,13 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { static int outputCount = 0; /* Number of particles of each type */ - const size_t Ndm = Ntot - Ngas; - int numParticles[NUM_PARTICLE_TYPES] = /* Gadget-2 convention here */ - {Ngas, Ndm, 0}; /* Could use size_t instead */ - int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + // const size_t Ndm = Ntot - Ngas; + + /* MATTHIEU: Temporary fix to preserve master */ + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + /* MATTHIEU: End temporary fix */ + + long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; /* File name */ char fileName[FILENAME_BUFFER_SIZE]; @@ -493,7 +499,7 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { xmfFile = prepareXMFfile(); /* Write the part corresponding to this specific output */ - writeXMFheader(xmfFile, Ngas, fileName, e->time); + writeXMFoutputheader(xmfFile, fileName, e->time); /* Open file */ /* message("Opening file '%s'.", fileName); */ @@ -521,19 +527,27 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { /* Print the relevant information and print status */ writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); - writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, - NUM_PARTICLE_TYPES); double dblTime = e->time; writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1); /* GADGET-2 legacy values */ + /* Number of particles of each type */ + unsigned int numParticles[NUM_PARTICLE_TYPES] = {0}; + unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + numParticles[ptype] = (unsigned int)N_total[ptype]; + numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32); + } + writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord, NUM_PARTICLE_TYPES); - double MassTable[NUM_PARTICLE_TYPES] = {0., 0., 0., 0., 0., 0.}; + double MassTable[NUM_PARTICLE_TYPES] = {0}; writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); - writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, numParticlesHighWord, + unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); @@ -558,6 +572,9 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { /* Don't do anything if no particle of this kind */ if (numParticles[ptype] == 0) continue; + /* Add the global information for that particle type to the XMF meta-file */ + writeXMFgroupheader(xmfFile, fileName, numParticles[ptype], ptype); + /* Open the particle group in the file */ char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", @@ -574,8 +591,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { switch (ptype) { case GAS: - hydro_write_particles(h_grp, fileName, xmfFile, Ngas, Ngas, 0, 0, parts, - us); + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, Ngas, + Ngas, 0, 0, parts, us); break; case DM: @@ -589,8 +606,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { collect_dm_gparts(gparts, Ntot, dmparts, Ndm); /* Write DM particles */ - darkmatter_write_particles(h_grp, fileName, xmfFile, Ndm, Ndm, 0, 0, - dmparts, us); + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + Ndm, Ndm, 0, 0, dmparts, us); /* Free temporary array */ free(dmparts); @@ -602,10 +619,13 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { /* Close particle group */ H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + writeXMFgroupfooter(xmfFile, ptype); } /* Write LXMF file descriptor */ - writeXMFfooter(xmfFile); + writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ diff --git a/src/single_io.h b/src/single_io.h index c5250280e82e1801b2a4a6136d404d09093dd0ec..587ebe07b6fa2b984b964baf282e7ceb1003ad29 100644 --- a/src/single_io.h +++ b/src/single_io.h @@ -28,7 +28,7 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ndm, - int* periodic); + int* periodic, int dry_run); void write_output_single(struct engine* e, struct UnitSystem* us); diff --git a/src/space.c b/src/space.c index 62cc292588f7f57f0c91e2d8351ff8c5ee17a81a..17b1c72980c3e3343d4713c3088e1de072eacc3e 100644 --- a/src/space.c +++ b/src/space.c @@ -40,7 +40,7 @@ #include "atomic.h" #include "engine.h" #include "error.h" -#include "kernel.h" +#include "kernel_hydro.h" #include "lock.h" #include "minmax.h" #include "runner.h" @@ -99,12 +99,10 @@ const int sortlistID[27] = { int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift) { - int k, sid = 0, periodic = s->periodic; - struct cell *temp; - double dx[3]; - /* Get the relative distance between the pairs, wrapping. */ - for (k = 0; k < 3; k++) { + const int periodic = s->periodic; + double dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = (*cj)->loc[k] - (*ci)->loc[k]; if (periodic && dx[k] < -s->dim[k] / 2) shift[k] = s->dim[k]; @@ -116,15 +114,16 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, } /* Get the sorting index. */ - for (k = 0; k < 3; k++) + int sid = 0; + for (int k = 0; k < 3; k++) sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1)); /* Switch the cells around? */ if (runner_flip[sid]) { - temp = *ci; + struct cell *temp = *ci; *ci = *cj; *cj = temp; - for (k = 0; k < 3; k++) shift[k] = -shift[k]; + for (int k = 0; k < 3; k++) shift[k] = -shift[k]; } sid = sortlistID[sid]; @@ -139,10 +138,8 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, void space_rebuild_recycle(struct space *s, struct cell *c) { - int k; - if (c->split) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { space_rebuild_recycle(s, c->progeny[k]); space_recycle(s, c->progeny[k]); @@ -160,19 +157,19 @@ void space_rebuild_recycle(struct space *s, struct cell *c) { void space_regrid(struct space *s, double cell_max, int verbose) { - float h_max = s->cell_min / kernel_gamma / space_stretch, dmin; - int i, j, k, cdim[3], nr_parts = s->nr_parts; + float h_max = s->cell_min / kernel_gamma / space_stretch; + const size_t nr_parts = s->nr_parts; struct cell *restrict c; ticks tic = getticks(); /* Run through the parts and get the current h_max. */ // tic = getticks(); if (s->cells != NULL) { - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { if (s->cells[k].h_max > h_max) h_max = s->cells[k].h_max; } } else { - for (k = 0; k < nr_parts; k++) { + for (int k = 0; k < nr_parts; k++) { if (s->parts[k].h > h_max) h_max = s->parts[k].h; } s->h_max = h_max; @@ -192,7 +189,8 @@ void space_regrid(struct space *s, double cell_max, int verbose) { if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max); /* Get the new putative cell dimensions. */ - for (k = 0; k < 3; k++) + int cdim[3]; + for (int k = 0; k < 3; k++) cdim[k] = floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max)); @@ -242,7 +240,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { /* Free the old cells, if they were allocated. */ if (s->cells != NULL) { - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { space_rebuild_recycle(s, &s->cells[k]); if (s->cells[k].sort != NULL) free(s->cells[k].sort); } @@ -251,12 +249,12 @@ void space_regrid(struct space *s, double cell_max, int verbose) { } /* Set the new cell dimensions only if smaller. */ - for (k = 0; k < 3; k++) { + for (int k = 0; k < 3; k++) { s->cdim[k] = cdim[k]; s->h[k] = s->dim[k] / cdim[k]; s->ih[k] = 1.0 / s->h[k]; } - dmin = fminf(s->h[0], fminf(s->h[1], s->h[2])); + const float dmin = fminf(s->h[0], fminf(s->h[1], s->h[2])); /* Allocate the highest level of cells. */ s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2]; @@ -264,13 +262,13 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->nr_cells * sizeof(struct cell)) != 0) error("Failed to allocate cells."); bzero(s->cells, s->nr_cells * sizeof(struct cell)); - for (k = 0; k < s->nr_cells; k++) + for (int k = 0; k < s->nr_cells; k++) if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock."); /* Set the cell location and sizes. */ - for (i = 0; i < cdim[0]; i++) - for (j = 0; j < cdim[1]; j++) - for (k = 0; k < cdim[2]; k++) { + for (int i = 0; i < cdim[0]; i++) + for (int j = 0; j < cdim[1]; j++) + for (int k = 0; k < cdim[2]; k++) { c = &s->cells[cell_getid(cdim, i, j, k)]; c->loc[0] = i * s->h[0]; c->loc[1] = j * s->h[1]; @@ -333,7 +331,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { else { /* Free the old cells, if they were allocated. */ - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { space_rebuild_recycle(s, &s->cells[k]); s->cells[k].sorts = NULL; s->cells[k].nr_tasks = 0; @@ -370,7 +368,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { void space_rebuild(struct space *s, double cell_max, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); /* Be verbose about this. */ // message( "re)building space..." ); fflush(stdout); @@ -382,23 +380,15 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { int nr_gparts = s->nr_gparts; struct cell *restrict cells = s->cells; - double ih[3], dim[3]; - int cdim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; - cdim[0] = s->cdim[0]; - cdim[1] = s->cdim[1]; - cdim[2] = s->cdim[2]; + const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; /* Run through the particles and get their cell index. */ // tic = getticks(); const size_t ind_size = s->size_parts; - size_t *ind; - if ((ind = (size_t *)malloc(sizeof(size_t) * ind_size)) == NULL) + int *ind; + if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) error("Failed to allocate temporary particle indices."); for (int k = 0; k < nr_parts; k++) { struct part *restrict p = &s->parts[k]; @@ -411,37 +401,91 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count++; } + // message( "getting particle indices took %.3f %s." , + // clocks_from_ticks(getticks() - tic), clocks_getunit()): + + /* Run through the gravity particles and get their cell index. */ + // tic = getticks(); + const size_t gind_size = s->size_gparts; + int *gind; + if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL) + error("Failed to allocate temporary g-particle indices."); + for (int k = 0; k < nr_gparts; k++) { + struct gpart *restrict gp = &s->gparts[k]; + for (int j = 0; j < 3; j++) + if (gp->x[j] < 0.0) + gp->x[j] += dim[j]; + else if (gp->x[j] >= dim[j]) + gp->x[j] -= dim[j]; + gind[k] = + cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); + cells[gind[k]].gcount++; + } // message( "getting particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()): +// clocks_from_ticks(getticks() - tic), clocks_getunit()); #ifdef WITH_MPI /* Move non-local parts to the end of the list. */ - const int nodeID = s->e->nodeID; + const int local_nodeID = s->e->nodeID; for (int k = 0; k < nr_parts; k++) - if (cells[ind[k]].nodeID != nodeID) { + if (cells[ind[k]].nodeID != local_nodeID) { cells[ind[k]].count -= 1; nr_parts -= 1; - struct part tp = s->parts[k]; + const struct part tp = s->parts[k]; s->parts[k] = s->parts[nr_parts]; s->parts[nr_parts] = tp; - struct xpart txp = s->xparts[k]; + if (s->parts[k].gpart != NULL) { + s->parts[k].gpart->part = &s->parts[k]; + } + if (s->parts[nr_parts].gpart != NULL) { + s->parts[nr_parts].gpart->part = &s->parts[nr_parts]; + } + const struct xpart txp = s->xparts[k]; s->xparts[k] = s->xparts[nr_parts]; s->xparts[nr_parts] = txp; - int t = ind[k]; + const int t = ind[k]; ind[k] = ind[nr_parts]; ind[nr_parts] = t; } + /* Move non-local gparts to the end of the list. */ + for (int k = 0; k < nr_gparts; k++) + if (cells[gind[k]].nodeID != local_nodeID) { + cells[gind[k]].gcount -= 1; + nr_gparts -= 1; + const struct gpart tp = s->gparts[k]; + s->gparts[k] = s->gparts[nr_gparts]; + s->gparts[nr_gparts] = tp; + if (s->gparts[k].id > 0) { + s->gparts[k].part->gpart = &s->gparts[k]; + } + if (s->gparts[nr_gparts].id > 0) { + s->gparts[nr_gparts].part->gpart = &s->gparts[nr_gparts]; + } + const int t = gind[k]; + gind[k] = gind[nr_gparts]; + gind[nr_gparts] = t; + } + /* Exchange the strays, note that this potentially re-allocates the parts arrays. */ - s->nr_parts = - nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], - s->nr_parts - nr_parts); + /* TODO: This function also exchanges gparts, but this is shorted-out + until they are fully implemented. */ + size_t nr_parts_exchanged = s->nr_parts - nr_parts; + size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts; + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged, + nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged); + + /* Add post-processing, i.e. re-linking/creating of gparts here. */ + + /* Set the new particle counts. */ + s->nr_parts = nr_parts + nr_parts_exchanged; + s->nr_gparts = nr_gparts + nr_gparts_exchanged; /* Re-allocate the index array if needed.. */ if (s->nr_parts > ind_size) { - size_t *ind_new; - if ((ind_new = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL) + int *ind_new; + if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) error("Failed to allocate temporary particle indices."); memcpy(ind_new, ind, sizeof(size_t) * nr_parts); free(ind); @@ -450,7 +494,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Assign each particle to its cell. */ for (int k = nr_parts; k < s->nr_parts; k++) { - struct part *p = &s->parts[k]; + const struct part *const p = &s->parts[k]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count += 1; @@ -481,65 +525,24 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* We no longer need the indices as of here. */ free(ind); - /* Run through the gravity particles and get their cell index. */ - // tic = getticks(); - const size_t gind_size = s->size_gparts; - size_t *gind; - if ((gind = (size_t *)malloc(sizeof(size_t) * gind_size)) == NULL) - error("Failed to allocate temporary g-particle indices."); - for (int k = 0; k < nr_gparts; k++) { - struct gpart *gp = &s->gparts[k]; - for (int j = 0; j < 3; j++) - if (gp->x[j] < 0.0) - gp->x[j] += dim[j]; - else if (gp->x[j] >= dim[j]) - gp->x[j] -= dim[j]; - gind[k] = - cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); - cells[gind[k]].gcount++; - } -// message( "getting particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); - #ifdef WITH_MPI - /* Move non-local gparts to the end of the list. */ - for (int k = 0; k < nr_gparts; k++) - if (cells[ind[k]].nodeID != nodeID) { - cells[ind[k]].gcount -= 1; - nr_gparts -= 1; - struct gpart tp = s->gparts[k]; - s->gparts[k] = s->gparts[nr_gparts]; - s->gparts[nr_gparts] = tp; - int t = ind[k]; - ind[k] = ind[nr_gparts]; - ind[nr_gparts] = t; - } - - /* Exchange the strays, note that this potentially re-allocates - the parts arrays. */ - // s->nr_gparts = - // nr_gparts + engine_exchange_strays(s->e, nr_gparts, &ind[nr_gparts], - // s->nr_gparts - nr_gparts); - if (nr_gparts > 0) - error("Need to implement the exchange of strays for the gparts"); - /* Re-allocate the index array if needed.. */ if (s->nr_gparts > gind_size) { - size_t *gind_new; - if ((gind_new = (size_t *)malloc(sizeof(size_t) * s->nr_gparts)) == NULL) + int *gind_new; + if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) error("Failed to allocate temporary g-particle indices."); - memcpy(gind_new, gind, sizeof(size_t) * nr_gparts); + memcpy(gind_new, gind, sizeof(int) * nr_gparts); free(gind); gind = gind_new; } /* Assign each particle to its cell. */ for (int k = nr_gparts; k < s->nr_gparts; k++) { - struct gpart *p = &s->gparts[k]; + const struct gpart *const p = &s->gparts[k]; gind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[gind[k]].count += 1; + cells[gind[k]].gcount += 1; /* if ( cells[ ind[k] ].nodeID != nodeID ) error( "Received part that does not belong to me (nodeID=%i)." , cells[ ind[k] ].nodeID ); */ @@ -549,7 +552,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { #endif /* Sort the parts according to their cells. */ - space_gparts_sort(s->gparts, gind, nr_gparts, 0, s->nr_cells - 1); + space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose); /* Re-link the parts. */ for (int k = 0; k < nr_gparts; k++) @@ -558,6 +561,28 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* We no longer need the indices as of here. */ free(gind); + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < nr_gparts; ++k) { + + if (s->gparts[k].id > 0) { + + if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !"); + + if (s->gparts[k].x[0] != s->gparts[k].part->x[0] || + s->gparts[k].x[1] != s->gparts[k].part->x[1] || + s->gparts[k].x[2] != s->gparts[k].part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < nr_parts; ++k) { + + if (s->parts[k].gpart != NULL) { + + if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !"); + } + } + /* Hook the cells up to the parts. */ // tic = getticks(); struct part *finger = s->parts; @@ -593,7 +618,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { */ void space_split(struct space *s, struct cell *cells, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); for (int k = 0; k < s->nr_cells; k++) scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k, @@ -617,10 +642,10 @@ void space_split(struct space *s, struct cell *cells, int verbose) { * @param verbose Are we talkative ? */ -void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, +void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); /*Populate the global parallel_sort structure with the input data */ space_sort_struct.parts = s->parts; @@ -644,7 +669,7 @@ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, space_sort_struct.waiting = 1; /* Launch the sorting tasks. */ - engine_launch(s->e, s->e->nr_threads, (1 << task_type_psort), 0); + engine_launch(s->e, s->e->nr_threads, (1 << task_type_part_sort), 0); /* Verify space_sort_struct. */ /* for (int i = 1; i < N; i++) @@ -665,7 +690,7 @@ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, void space_do_parts_sort() { /* Pointers to the sorting data. */ - size_t *ind = space_sort_struct.ind; + int *ind = space_sort_struct.ind; struct part *parts = space_sort_struct.parts; struct xpart *xparts = space_sort_struct.xparts; @@ -787,103 +812,140 @@ void space_do_parts_sort() { } /* main loop. */ } -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, - int max) { - - struct qstack { - volatile size_t i, j; - volatile int min, max; - volatile int ready; - }; - struct qstack *qstack; - int qstack_size = 2 * (max - min) + 10; - volatile unsigned int first, last, waiting; - - int pivot; - ptrdiff_t i, ii, j, jj, temp_i; - int qid; - struct gpart temp_p; - - /* for ( int k = 0 ; k < N ; k++ ) - if ( ind[k] > max || ind[k] < min ) - error( "ind[%i]=%i is not in [%i,%i]." , k , ind[k] , min , max ); */ - - /* Allocate the stack. */ - if ((qstack = malloc(sizeof(struct qstack) * qstack_size)) == NULL) - error("Failed to allocate qstack."); - - /* Init the interval stack. */ - qstack[0].i = 0; - qstack[0].j = N - 1; - qstack[0].min = min; - qstack[0].max = max; - qstack[0].ready = 1; - for (i = 1; i < qstack_size; i++) qstack[i].ready = 0; - first = 0; - last = 1; - waiting = 1; +/** + * @brief Sort the g-particles and condensed particles according to the given + *indices. + * + * @param s The #space. + * @param ind The indices with respect to which the gparts are sorted. + * @param N The number of gparts + * @param min Lowest index. + * @param max highest index. + * @param verbose Are we talkative ? + */ +void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, + int verbose) { + + const ticks tic = getticks(); + + /*Populate the global parallel_sort structure with the input data */ + space_sort_struct.gparts = s->gparts; + space_sort_struct.ind = ind; + space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; + if ((space_sort_struct.stack = malloc(sizeof(struct qstack) * + space_sort_struct.stack_size)) == NULL) + error("Failed to allocate sorting stack."); + for (int i = 0; i < space_sort_struct.stack_size; i++) + space_sort_struct.stack[i].ready = 0; + + /* Add the first interval. */ + space_sort_struct.stack[0].i = 0; + space_sort_struct.stack[0].j = N - 1; + space_sort_struct.stack[0].min = min; + space_sort_struct.stack[0].max = max; + space_sort_struct.stack[0].ready = 1; + space_sort_struct.first = 0; + space_sort_struct.last = 1; + space_sort_struct.waiting = 1; + + /* Launch the sorting tasks. */ + engine_launch(s->e, s->e->nr_threads, (1 << task_type_gpart_sort), 0); + + /* Verify space_sort_struct. */ + /* for (int i = 1; i < N; i++) + if (ind[i - 1] > ind[i]) + error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, + ind[i - 1], i, + ind[i], min, max); + message("Sorting succeeded."); */ + + /* Clean up. */ + free(space_sort_struct.stack); + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +void space_do_gparts_sort() { + + /* Pointers to the sorting data. */ + int *ind = space_sort_struct.ind; + struct gpart *gparts = space_sort_struct.gparts; /* Main loop. */ - while (waiting > 0) { + while (space_sort_struct.waiting) { /* Grab an interval off the queue. */ - qid = (first++) % qstack_size; + int qid = + atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size; + + /* Wait for the entry to be ready, or for the sorting do be done. */ + while (!space_sort_struct.stack[qid].ready) + if (!space_sort_struct.waiting) return; /* Get the stack entry. */ - i = qstack[qid].i; - j = qstack[qid].j; - min = qstack[qid].min; - max = qstack[qid].max; - qstack[qid].ready = 0; + ptrdiff_t i = space_sort_struct.stack[qid].i; + ptrdiff_t j = space_sort_struct.stack[qid].j; + int min = space_sort_struct.stack[qid].min; + int max = space_sort_struct.stack[qid].max; + space_sort_struct.stack[qid].ready = 0; /* Loop over sub-intervals. */ while (1) { /* Bring beer. */ - pivot = (min + max) / 2; + const int pivot = (min + max) / 2; + /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.", + i, j, min, max, pivot); */ /* One pass of QuickSort's partitioning. */ - ii = i; - jj = j; + ptrdiff_t ii = i; + ptrdiff_t jj = j; while (ii < jj) { while (ii <= j && ind[ii] <= pivot) ii++; while (jj >= i && ind[jj] > pivot) jj--; if (ii < jj) { - temp_i = ind[ii]; + size_t temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i; - temp_p = gparts[ii]; + struct gpart temp_p = gparts[ii]; gparts[ii] = gparts[jj]; gparts[jj] = temp_p; } } /* Verify space_sort_struct. */ - /* for ( int k = i ; k <= jj ; k++ ) - if ( ind[k] > pivot ) { - message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, - N=%i." , k , ind[k] , pivot , i , j , N ); - error( "Partition failed (<=pivot)." ); - } - for ( int k = jj+1 ; k <= j ; k++ ) - if ( ind[k] <= pivot ) { - message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, - N=%i." , k , ind[k] , pivot , i , j , N ); - error( "Partition failed (>pivot)." ); - } */ + /* for (int k = i; k <= jj; k++) + if (ind[k] > pivot) { + message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k, + ind[k], pivot, i, j); + error("Partition failed (<=pivot)."); + } + for (int k = jj + 1; k <= j; k++) + if (ind[k] <= pivot) { + message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k, + ind[k], pivot, i, j); + error("Partition failed (>pivot)."); + } */ /* Split-off largest interval. */ if (jj - i > j - jj + 1) { /* Recurse on the left? */ if (jj > i && pivot > min) { - qid = (last++) % qstack_size; - qstack[qid].i = i; - qstack[qid].j = jj; - qstack[qid].min = min; - qstack[qid].max = pivot; - qstack[qid].ready = 1; - if ((waiting++) >= qstack_size) error("Qstack overflow."); + qid = atomic_inc(&space_sort_struct.last) % + space_sort_struct.stack_size; + while (space_sort_struct.stack[qid].ready) + ; + space_sort_struct.stack[qid].i = i; + space_sort_struct.stack[qid].j = jj; + space_sort_struct.stack[qid].min = min; + space_sort_struct.stack[qid].max = pivot; + if (atomic_inc(&space_sort_struct.waiting) >= + space_sort_struct.stack_size) + error("Qstack overflow."); + space_sort_struct.stack[qid].ready = 1; } /* Recurse on the right? */ @@ -897,13 +959,18 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, /* Recurse on the right? */ if (pivot + 1 < max) { - qid = (last++) % qstack_size; - qstack[qid].i = jj + 1; - qstack[qid].j = j; - qstack[qid].min = pivot + 1; - qstack[qid].max = max; - qstack[qid].ready = 1; - if ((waiting++) >= qstack_size) error("Qstack overflow."); + qid = atomic_inc(&space_sort_struct.last) % + space_sort_struct.stack_size; + while (space_sort_struct.stack[qid].ready) + ; + space_sort_struct.stack[qid].i = jj + 1; + space_sort_struct.stack[qid].j = j; + space_sort_struct.stack[qid].min = pivot + 1; + space_sort_struct.stack[qid].max = max; + if (atomic_inc(&space_sort_struct.waiting) >= + space_sort_struct.stack_size) + error("Qstack overflow."); + space_sort_struct.stack[qid].ready = 1; } /* Recurse on the left? */ @@ -916,18 +983,9 @@ void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, } /* loop over sub-intervals. */ - waiting--; + atomic_dec(&space_sort_struct.waiting); } /* main loop. */ - - /* Verify space_sort_struct. */ - /* for ( i = 1 ; i < N ; i++ ) - if ( ind[i-1] > ind[i] ) - error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i - , ind[i] ); */ - - /* Clean up. */ - free(qstack); } /** @@ -1299,14 +1357,15 @@ struct cell *space_getcell(struct space *s) { * @brief Split the space into cells given the array of particles. * * @param s The #space to initialize. + * @param params The parsed parameter file. * @param dim Spatial dimensions of the domain. * @param parts Array of Gas particles. * @param gparts Array of Gravity particles. - * @param Ngas The number of Gas particles in the space. + * @param Npart The number of Gas particles in the space. * @param Ngpart The number of Gravity particles in the space. * @param periodic flag whether the domain is periodic or not. - * @param h_max The maximal interaction radius. * @param verbose Print messages to stdout or not + * @param dry_run If 1, just initialise stuff, don't do anything with the parts. * * Makes a grid of edge length > r_max and fills the particles * into the respective cells. Cells containing more than #space_splitsize @@ -1314,65 +1373,114 @@ struct cell *space_getcell(struct space *s) { * recursively. */ -void space_init(struct space *s, double dim[3], struct part *parts, - struct gpart *gparts, size_t Ngas, size_t Ngpart, int periodic, - double h_max, int verbose) { +void space_init(struct space *s, const struct swift_params *params, + double dim[3], struct part *parts, struct gpart *gparts, + size_t Npart, size_t Ngpart, int periodic, int verbose, + int dry_run) { + + /* Clean-up everything */ + bzero(s, sizeof(struct space)); /* Store everything in the space. */ s->dim[0] = dim[0]; s->dim[1] = dim[1]; s->dim[2] = dim[2]; s->periodic = periodic; - s->nr_parts = Ngas; - s->size_parts = Ngas; + s->nr_parts = Npart; + s->size_parts = Npart; s->parts = parts; s->nr_gparts = Ngpart; s->size_gparts = Ngpart; s->gparts = gparts; - s->cell_min = h_max; - s->nr_queues = 1; + s->cell_min = parser_get_param_double(params, "SPH:max_smoothing_length"); + s->nr_queues = 1; /* Temporary value until engine construction */ s->size_parts_foreign = 0; - /* Check that all the gas particle positions are reasonable, wrap if periodic. - */ - if (periodic) { - for (int k = 0; k < Ngas; k++) - for (int j = 0; j < 3; j++) { - while (parts[k].x[j] < 0) parts[k].x[j] += dim[j]; - while (parts[k].x[j] >= dim[j]) parts[k].x[j] -= dim[j]; - } - } else { - for (int k = 0; k < Ngas; k++) - for (int j = 0; j < 3; j++) - if (parts[k].x[j] < 0 || parts[k].x[j] >= dim[j]) - error("Not all particles are within the specified domain."); + /* Get the constants for the scheduler */ + space_maxsize = parser_get_param_int(params, "Scheduler:cell_max_size"); + space_subsize = parser_get_param_int(params, "Scheduler:cell_sub_size"); + space_splitsize = parser_get_param_int(params, "Scheduler:cell_split_size"); + if(verbose) + message("max_size set to %d, sub_size set to %d, split_size set to %d", + space_maxsize, space_subsize, space_splitsize); + + /* Check that we have enough cells */ + if (s->cell_min * 3 > dim[0] || s->cell_min * 3 > dim[1] || + s->cell_min * 3 > dim[2]) + error( + "Maximal smoothing length (%e) too large. Needs to be " + "smaller than 1/3 the simulation box size [%e %e %e]", + s->cell_min, dim[0], dim[1], dim[2]); + + /* Apply h scaling */ + const double scaling = + parser_get_param_double(params, "InitialConditions:h_scaling"); + if (scaling != 1.0 && !dry_run) { + message("Re-scaling smoothing lengths by a factor %e", scaling); + for (size_t k = 0; k < Npart; k++) parts[k].h *= scaling; } - /* Same for the gparts */ - if (periodic) { - for (int k = 0; k < Ngpart; k++) - for (int j = 0; j < 3; j++) { - while (gparts[k].x[j] < 0) gparts[k].x[j] += dim[j]; - while (gparts[k].x[j] >= dim[j]) gparts[k].x[j] -= dim[j]; - } - } else { - for (int k = 0; k < Ngpart; k++) - for (int j = 0; j < 3; j++) - if (gparts[k].x[j] < 0 || gparts[k].x[j] >= dim[j]) - error("Not all particles are within the specified domain."); + /* Apply shift */ + double shift[3] = {0.0, 0.0, 0.0}; + shift[0] = parser_get_param_double(params, "InitialConditions:shift_x"); + shift[1] = parser_get_param_double(params, "InitialConditions:shift_y"); + shift[2] = parser_get_param_double(params, "InitialConditions:shift_z"); + if ((shift[0] != 0 || shift[1] != 0 || shift[2] != 0) && !dry_run) { + message("Shifting particles by [%e %e %e]", shift[0], shift[1], shift[2]); + for (size_t k = 0; k < Npart; k++) { + parts[k].x[0] += shift[0]; + parts[k].x[1] += shift[1]; + parts[k].x[2] += shift[2]; + } + for (size_t k = 0; k < Ngpart; k++) { + gparts[k].x[0] += shift[0]; + gparts[k].x[1] += shift[1]; + gparts[k].x[2] += shift[2]; + } + } + + if (!dry_run) { + + /* Check that all the part positions are reasonable, wrap if periodic. */ + if (periodic) { + for (int k = 0; k < Npart; k++) + for (int j = 0; j < 3; j++) { + while (parts[k].x[j] < 0) parts[k].x[j] += dim[j]; + while (parts[k].x[j] >= dim[j]) parts[k].x[j] -= dim[j]; + } + } else { + for (int k = 0; k < Npart; k++) + for (int j = 0; j < 3; j++) + if (parts[k].x[j] < 0 || parts[k].x[j] >= dim[j]) + error("Not all particles are within the specified domain."); + } + + /* Same for the gparts */ + if (periodic) { + for (int k = 0; k < Ngpart; k++) + for (int j = 0; j < 3; j++) { + while (gparts[k].x[j] < 0) gparts[k].x[j] += dim[j]; + while (gparts[k].x[j] >= dim[j]) gparts[k].x[j] -= dim[j]; + } + } else { + for (int k = 0; k < Ngpart; k++) + for (int j = 0; j < 3; j++) + if (gparts[k].x[j] < 0 || gparts[k].x[j] >= dim[j]) + error("Not all g-particles are within the specified domain."); + } } /* Allocate the extra parts array. */ if (posix_memalign((void *)&s->xparts, xpart_align, - Ngas * sizeof(struct xpart)) != 0) + Npart * sizeof(struct xpart)) != 0) error("Failed to allocate xparts."); - bzero(s->xparts, Ngas * sizeof(struct xpart)); + bzero(s->xparts, Npart * sizeof(struct xpart)); /* Init the space lock. */ if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock."); /* Build the cells and the tasks. */ - space_regrid(s, h_max, verbose); + if (!dry_run) space_regrid(s, s->cell_min, verbose); } /** diff --git a/src/space.h b/src/space.h index 91485ff7e2ebe9da8ab927748589ae9f71320803..88e2f6f52774651217c4ff24e25f549d8ae1e347 100644 --- a/src/space.h +++ b/src/space.h @@ -24,6 +24,7 @@ /* Local includes. */ #include "cell.h" +#include "parser.h" #include "part.h" /* Forward-declare the engine to avoid cyclic includes. */ @@ -64,9 +65,6 @@ struct space { /* The minimum and maximum cutoff radii. */ double h_max, cell_min; - /* Current time step for particles. */ - float dt_step; - /* Current maximum displacement for particles. */ float dx_max; @@ -106,6 +104,8 @@ struct space { /* Buffers for parts that we will receive from foreign cells. */ struct part *parts_foreign; size_t nr_parts_foreign, size_parts_foreign; + struct gpart *gparts_foreign; + size_t nr_gparts_foreign, size_gparts_foreign; }; /* Interval stack necessary for parallel particle sorting. */ @@ -116,8 +116,9 @@ struct qstack { }; struct parallel_sort { struct part *parts; + struct gpart *gparts; struct xpart *xparts; - size_t *ind; + int *ind; struct qstack *stack; unsigned int stack_size; volatile unsigned int first, last, waiting; @@ -125,16 +126,17 @@ struct parallel_sort { extern struct parallel_sort space_sort_struct; /* function prototypes. */ -void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, +void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose); -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, - int max); +void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, + int verbose); struct cell *space_getcell(struct space *s); int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift); -void space_init(struct space *s, double dim[3], struct part *parts, - struct gpart *gparts, size_t N, size_t Ngpart, int periodic, - double h_max, int verbose); +void space_init(struct space *s, const struct swift_params *params, + double dim[3], struct part *parts, struct gpart *gparts, + size_t Npart, size_t Ngpart, int periodic, int verbose, + int dry_run); void space_map_cells_pre(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data); void space_map_parts(struct space *s, @@ -150,5 +152,6 @@ void space_recycle(struct space *s, struct cell *c); void space_split(struct space *s, struct cell *cells, int verbose); void space_do_split(struct space *s, struct cell *c); void space_do_parts_sort(); +void space_do_gparts_sort(); void space_link_cleanup(struct space *s); #endif /* SWIFT_SPACE_H */ diff --git a/src/swift.h b/src/swift.h index 9ab090dccd195ff4927d3e614e446b36d273f824..e568a28c888295affc9ec45b6d059d34f5b4bf04 100644 --- a/src/swift.h +++ b/src/swift.h @@ -27,7 +27,6 @@ #include "cell.h" #include "clocks.h" #include "const.h" -#include "const.h" #include "cycle.h" #include "debug.h" #include "engine.h" @@ -38,7 +37,9 @@ #include "map.h" #include "multipole.h" #include "parallel_io.h" +#include "parser.h" #include "part.h" +#include "partition.h" #include "queue.h" #include "runner.h" #include "scheduler.h" @@ -47,9 +48,8 @@ #include "space.h" #include "task.h" #include "timers.h" -#include "units.h" #include "tools.h" -#include "partition.h" +#include "units.h" #include "version.h" #endif /* SWIFT_SWIFT_H */ diff --git a/src/task.c b/src/task.c index 69109f9e6d4fe8730a317db46ea3862e65ab90b2..5f1475a46e4626e1f51db673d73fd84f86e6edb6 100644 --- a/src/task.c +++ b/src/task.c @@ -43,9 +43,10 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", "pair", "sub", "init", - "ghost", "drift", "kick", "send", "recv", "grav_pp", - "grav_mm", "grav_up", "grav_down", "psort", "split_cell", "rewait"}; + "none", "sort", "self", "pair", "sub", + "init", "ghost", "drift", "kick", "send", + "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", + "part_sort", "gpart_sort", "split_cell", "rewait"}; const char *subtaskID_names[task_type_count] = {"none", "density", "force", "grav"}; @@ -78,9 +79,10 @@ float task_overlap(const struct task *ta, const struct task *tb) { /* First check if any of the two tasks are of a type that don't use cells. */ if (ta == NULL || tb == NULL || ta->type == task_type_none || - ta->type == task_type_psort || ta->type == task_type_split_cell || - ta->type == task_type_rewait || tb->type == task_type_none || - tb->type == task_type_psort || tb->type == task_type_split_cell || + ta->type == task_type_part_sort || ta->type == task_type_gpart_sort || + ta->type == task_type_split_cell || ta->type == task_type_rewait || + tb->type == task_type_none || tb->type == task_type_part_sort || + tb->type == task_type_gpart_sort || tb->type == task_type_split_cell || tb->type == task_type_rewait) return 0.0f; @@ -145,7 +147,7 @@ int task_lock(struct task *t) { #ifdef WITH_MPI /* Check the status of the MPI request. */ - int res, err; + int res = 0, err = 0; MPI_Status stat; if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; diff --git a/src/task.h b/src/task.h index b86631cc49bfad102302e3bab380bfb5eb8ed1e0..9c0ba6087d772d7362a98bc40a838c6fa3713166 100644 --- a/src/task.h +++ b/src/task.h @@ -45,7 +45,8 @@ enum task_types { task_type_grav_mm, task_type_grav_up, task_type_grav_down, - task_type_psort, + task_type_part_sort, + task_type_gpart_sort, task_type_split_cell, task_type_rewait, task_type_count diff --git a/src/tools.c b/src/tools.c index 5feba7759f730faea1f38ceb9835f2076bc37a56..d25b7401a1e0515c650333b41193d54b5e155d39 100644 --- a/src/tools.c +++ b/src/tools.c @@ -236,6 +236,53 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { } } +void self_all_density(struct runner *r, struct cell *ci) { + float r2, hi, hj, hig2, hjg2, dxi[3]; //, dxj[3]; + struct part *pi, *pj; + + /* Implements a double-for loop and checks every interaction */ + for (int i = 0; i < ci->count; ++i) { + + pi = &ci->parts[i]; + hi = pi->h; + hig2 = hi * hi * kernel_gamma2; + + for (int j = i + 1; j < ci->count; ++j) { + + pj = &ci->parts[j]; + hj = pj->h; + hjg2 = hj * hj * kernel_gamma2; + + if (pi == pj) continue; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dxi[k] = ci->parts[i].x[k] - ci->parts[j].x[k]; + r2 += dxi[k] * dxi[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + + /* Interact */ + runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj); + } + + /* Hit or miss? */ + if (r2 < hjg2) { + + dxi[0] = -dxi[0]; + dxi[1] = -dxi[1]; + dxi[2] = -dxi[2]; + + /* Interact */ + runner_iact_nonsym_density(r2, dxi, hj, hi, pj, pi); + } + } + } +} + void pairs_single_grav(double *dim, long long int pid, struct gpart *__restrict__ parts, int N, int periodic) { @@ -253,9 +300,9 @@ void pairs_single_grav(double *dim, long long int pid, break; if (k == N) error("Part not found."); pi = parts[k]; - pi.a[0] = 0.0f; - pi.a[1] = 0.0f; - pi.a[2] = 0.0f; + pi.a_grav[0] = 0.0f; + pi.a_grav[1] = 0.0f; + pi.a_grav[2] = 0.0f; /* Loop over all particle pairs. */ for (k = 0; k < N; k++) { @@ -273,15 +320,15 @@ void pairs_single_grav(double *dim, long long int pid, } r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2]; runner_iact_grav(r2, fdx, &pi, &pj); - a[0] += pi.a[0]; - a[1] += pi.a[1]; - a[2] += pi.a[2]; - aabs[0] += fabsf(pi.a[0]); - aabs[1] += fabsf(pi.a[1]); - aabs[2] += fabsf(pi.a[2]); - pi.a[0] = 0.0f; - pi.a[1] = 0.0f; - pi.a[2] = 0.0f; + a[0] += pi.a_grav[0]; + a[1] += pi.a_grav[1]; + a[2] += pi.a_grav[2]; + aabs[0] += fabsf(pi.a_grav[0]); + aabs[1] += fabsf(pi.a_grav[1]); + aabs[2] += fabsf(pi.a_grav[2]); + pi.a_grav[0] = 0.0f; + pi.a_grav[1] = 0.0f; + pi.a_grav[2] = 0.0f; } /* Dump the result. */ diff --git a/src/tools.h b/src/tools.h index 59646291bda46a7dd0f5a34e158e3e0a6f21d3ca..ccffc77ceb8a967fd40c3737651ba75d529eee0f 100644 --- a/src/tools.h +++ b/src/tools.h @@ -33,6 +33,7 @@ void pairs_single_density(double *dim, long long int pid, struct part *__restrict__ parts, int N, int periodic); void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj); +void self_all_density(struct runner *r, struct cell *ci); void pairs_n2(double *dim, struct part *__restrict__ parts, int N, int periodic); diff --git a/src/units.c b/src/units.c index 8c9fd14452e9e1fdfe029ac89d22d7cd43aa0ef7..184dbe8a0df000008dba1d7003558d83b1f08cad 100644 --- a/src/units.c +++ b/src/units.c @@ -43,17 +43,24 @@ /** * @brief Initialises the UnitSystem structure with the constants given in - * const.h - * @param us The UnitSystem to initialize + * rhe parameter file. + * + * @param us The UnitSystem to initialize. + * @param params The parsed parameter file. */ - -void initUnitSystem(struct UnitSystem* us) { - us->UnitMass_in_cgs = const_unit_mass_in_cgs; - us->UnitLength_in_cgs = const_unit_length_in_cgs; - us->UnitTime_in_cgs = 1. / ((double)const_unit_velocity_in_cgs / - ((double)const_unit_length_in_cgs)); - us->UnitCurrent_in_cgs = 1.; - us->UnitTemperature_in_cgs = 1.; +void units_init(struct UnitSystem* us, const struct swift_params* params) { + + us->UnitMass_in_cgs = + parser_get_param_double(params, "UnitSystem:UnitMass_in_cgs"); + us->UnitLength_in_cgs = + parser_get_param_double(params, "UnitSystem:UnitLength_in_cgs"); + const double unitVelocity = + parser_get_param_double(params, "UnitSystem:UnitVelocity_in_cgs"); + us->UnitTime_in_cgs = us->UnitLength_in_cgs / unitVelocity; + us->UnitCurrent_in_cgs = + parser_get_param_double(params, "UnitSystem:UnitCurrent_in_cgs"); + us->UnitTemperature_in_cgs = + parser_get_param_double(params, "UnitSystem:UnitTemp_in_cgs"); } /** @@ -61,7 +68,8 @@ void initUnitSystem(struct UnitSystem* us) { * @param us The UnitSystem used * @param baseUnit The base unit */ -double getBaseUnit(struct UnitSystem* us, enum BaseUnits baseUnit) { +double units_get_base_unit(const struct UnitSystem* us, + enum BaseUnits baseUnit) { switch (baseUnit) { case UNIT_MASS: return us->UnitMass_in_cgs; @@ -83,7 +91,7 @@ double getBaseUnit(struct UnitSystem* us, enum BaseUnits baseUnit) { * @brief Returns the base unit symbol * @param baseUnit The base unit */ -const char* getBaseUnitSymbol(enum BaseUnits baseUnit) { +const char* units_get_base_unit_symbol(enum BaseUnits baseUnit) { switch (baseUnit) { case UNIT_MASS: return "U_M"; @@ -105,7 +113,7 @@ const char* getBaseUnitSymbol(enum BaseUnits baseUnit) { * @brief Returns the base unit symbol in the cgs system * @param baseUnit The base unit */ -const char* getBaseUnitCGSSymbol(enum BaseUnits baseUnit) { +const char* units_get_base_unit_CGS_symbol(enum BaseUnits baseUnit) { switch (baseUnit) { case UNIT_MASS: return "g"; @@ -123,8 +131,8 @@ const char* getBaseUnitCGSSymbol(enum BaseUnits baseUnit) { return ""; } -void getBaseUnitExponantsArray(float baseUnitsExp[5], - enum UnitConversionFactor unit) { +void units_get_base_unit_exponants_array(float baseUnitsExp[5], + enum UnitConversionFactor unit) { switch (unit) { case UNIT_CONV_NO_UNITS: break; @@ -265,12 +273,13 @@ void getBaseUnitExponantsArray(float baseUnitsExp[5], * @param us The system of units in use * @param unit The unit to convert */ -double conversionFactor(struct UnitSystem* us, enum UnitConversionFactor unit) { +double units_conversion_factor(const struct UnitSystem* us, + enum UnitConversionFactor unit) { float baseUnitsExp[5] = {0.f}; - getBaseUnitExponantsArray(baseUnitsExp, unit); + units_get_base_unit_exponants_array(baseUnitsExp, unit); - return generalConversionFactor(us, baseUnitsExp); + return units_general_conversion_factor(us, baseUnitsExp); } /** @@ -278,12 +287,13 @@ double conversionFactor(struct UnitSystem* us, enum UnitConversionFactor unit) { * @param us The system of units in use * @param unit The unit to convert */ -float hFactor(struct UnitSystem* us, enum UnitConversionFactor unit) { +float units_h_factor(const struct UnitSystem* us, + enum UnitConversionFactor unit) { float baseUnitsExp[5] = {0.f}; - getBaseUnitExponantsArray(baseUnitsExp, unit); + units_get_base_unit_exponants_array(baseUnitsExp, unit); - return generalhFactor(us, baseUnitsExp); + return units_general_h_factor(us, baseUnitsExp); } /** @@ -291,25 +301,26 @@ float hFactor(struct UnitSystem* us, enum UnitConversionFactor unit) { * @param us The system of units in use * @param unit The unit to convert */ -float aFactor(struct UnitSystem* us, enum UnitConversionFactor unit) { +float units_a_factor(const struct UnitSystem* us, + enum UnitConversionFactor unit) { float baseUnitsExp[5] = {0.f}; - getBaseUnitExponantsArray(baseUnitsExp, unit); + units_get_base_unit_exponants_array(baseUnitsExp, unit); - return generalaFactor(us, baseUnitsExp); + return units_general_a_factor(us, baseUnitsExp); } /** * @brief Returns a string containing the exponents of the base units making up * the conversion factors */ -void conversionString(char* buffer, struct UnitSystem* us, - enum UnitConversionFactor unit) { +void units_conversion_string(char* buffer, const struct UnitSystem* us, + enum UnitConversionFactor unit) { float baseUnitsExp[5] = {0.f}; - getBaseUnitExponantsArray(baseUnitsExp, unit); + units_get_base_unit_exponants_array(baseUnitsExp, unit); - generalConversionString(buffer, us, baseUnitsExp); + units_general_conversion_string(buffer, us, baseUnitsExp); } /** @@ -319,14 +330,14 @@ void conversionString(char* buffer, struct UnitSystem* us, * @param baseUnitsExponants The exponent of each base units required to form * the desired quantity. See conversionFactor() for a working example */ -double generalConversionFactor(struct UnitSystem* us, - float baseUnitsExponants[5]) { +double units_general_conversion_factor(const struct UnitSystem* us, + float baseUnitsExponants[5]) { double factor = 1.; int i; for (i = 0; i < 5; ++i) if (baseUnitsExponants[i] != 0) - factor *= pow(getBaseUnit(us, i), baseUnitsExponants[i]); + factor *= pow(units_get_base_unit(us, i), baseUnitsExponants[i]); return factor; } @@ -337,7 +348,8 @@ double generalConversionFactor(struct UnitSystem* us, * @param baseUnitsExponants The exponent of each base units required to form * the desired quantity. See conversionFactor() for a working example */ -float generalhFactor(struct UnitSystem* us, float baseUnitsExponants[5]) { +float units_general_h_factor(const struct UnitSystem* us, + float baseUnitsExponants[5]) { float factor_exp = 0.f; factor_exp += -baseUnitsExponants[UNIT_MASS]; @@ -354,7 +366,8 @@ float generalhFactor(struct UnitSystem* us, float baseUnitsExponants[5]) { * @param baseUnitsExponants The exponent of each base units required to form * the desired quantity. See conversionFactor() for a working example */ -float generalaFactor(struct UnitSystem* us, float baseUnitsExponants[5]) { +float units_general_a_factor(const struct UnitSystem* us, + float baseUnitsExponants[5]) { float factor_exp = 0.f; factor_exp += baseUnitsExponants[UNIT_LENGTH]; @@ -371,11 +384,11 @@ float generalaFactor(struct UnitSystem* us, float baseUnitsExponants[5]) { * @param baseUnitsExponants The exponent of each base units required to form * the desired quantity. See conversionFactor() for a working example */ -void generalConversionString(char* buffer, struct UnitSystem* us, - float baseUnitsExponants[5]) { +void units_general_conversion_string(char* buffer, const struct UnitSystem* us, + float baseUnitsExponants[5]) { char temp[14]; - double a_exp = generalaFactor(us, baseUnitsExponants); - double h_exp = generalhFactor(us, baseUnitsExponants); + double a_exp = units_general_a_factor(us, baseUnitsExponants); + double h_exp = units_general_h_factor(us, baseUnitsExponants); int i; /* Check whether we are unitless or not */ @@ -415,12 +428,13 @@ void generalConversionString(char* buffer, struct UnitSystem* us, if (baseUnitsExponants[i] == 0.) sprintf(temp, " "); else if (baseUnitsExponants[i] == 1.) - sprintf(temp, "%s ", getBaseUnitSymbol(i)); + sprintf(temp, "%s ", units_get_base_unit_symbol(i)); else if (remainder(baseUnitsExponants[i], 1.) == 0) - sprintf(temp, "%s^%d ", getBaseUnitSymbol(i), + sprintf(temp, "%s^%d ", units_get_base_unit_symbol(i), (int)baseUnitsExponants[i]); else - sprintf(temp, "%s^%7.4f ", getBaseUnitSymbol(i), baseUnitsExponants[i]); + sprintf(temp, "%s^%7.4f ", units_get_base_unit_symbol(i), + baseUnitsExponants[i]); strncat(buffer, temp, 12); } @@ -432,12 +446,12 @@ void generalConversionString(char* buffer, struct UnitSystem* us, if (baseUnitsExponants[i] == 0.) continue; else if (baseUnitsExponants[i] == 1.) - sprintf(temp, "%s ", getBaseUnitCGSSymbol(i)); + sprintf(temp, "%s ", units_get_base_unit_CGS_symbol(i)); else if (remainder(baseUnitsExponants[i], 1.) == 0) - sprintf(temp, "%s^%d ", getBaseUnitCGSSymbol(i), + sprintf(temp, "%s^%d ", units_get_base_unit_CGS_symbol(i), (int)baseUnitsExponants[i]); else - sprintf(temp, "%s^%7.4f ", getBaseUnitCGSSymbol(i), + sprintf(temp, "%s^%7.4f ", units_get_base_unit_CGS_symbol(i), baseUnitsExponants[i]); strncat(buffer, temp, 12); } diff --git a/src/units.h b/src/units.h index 1b977529784c1ef3069e1e932b16fd0b87073786..3e349dc16787cd4052a3e9205b21dce3c3732448 100644 --- a/src/units.h +++ b/src/units.h @@ -19,6 +19,12 @@ #ifndef SWIFT_UNITS_H #define SWIFT_UNITS_H +/* Config parameters. */ +#include "../config.h" + +/* Local includes. */ +#include "parser.h" + /** * @brief The unit system used internally. * @@ -86,74 +92,25 @@ enum UnitConversionFactor { UNIT_CONV_TEMPERATURE }; -/** - * @brief Initialises the UnitSystem structure with the constants given in - * const.h - */ -void initUnitSystem(struct UnitSystem*); - -/** - * @brief Returns the base unit conversion factor for a given unit system - */ -double getBaseUnit(struct UnitSystem*, enum BaseUnits); - -/** - * @brief Returns the base unit symbol in the cgs system - */ -const char* getBaseUnitSymbol(enum BaseUnits); - -/** - * @brief Returns the base unit symbol in the cgs system - */ -const char* getBaseUnitCGSSymbol(enum BaseUnits); - -/** - * @brief Returns the conversion factor for a given unit (expressed in terms of - * the 5 fundamental units) in the chosen unit system - */ -double generalConversionFactor(struct UnitSystem* us, - float baseUnitsExponants[5]); - -/** - * @brief Returns the conversion factor for a given unit in the chosen unit - * system - */ -double conversionFactor(struct UnitSystem* us, enum UnitConversionFactor unit); - -/** - * @brief Returns the h factor for a given unit (expressed in terms of the 5 - * fundamental units) in the chosen unit system - */ -float generalhFactor(struct UnitSystem* us, float baseUnitsExponants[5]); - -/** - * @brief Returns the h factor for a given unit in the chosen unit system - */ -float hFactor(struct UnitSystem* us, enum UnitConversionFactor unit); - -/** - * @brief Returns the scaling factor for a given unit (expressed in terms of the - * 5 fundamental units) in the chosen unit system - */ -float generalaFactor(struct UnitSystem* us, float baseUnitsExponants[5]); - -/** - * @brief Returns the scaling factor for a given unit in the chosen unit system - */ -float aFactor(struct UnitSystem* us, enum UnitConversionFactor unit); - -/** - * @brief Returns a string containing the exponents of the base units making up - * the conversion factors (expressed in terms of the 5 fundamental units) - */ -void generalConversionString(char* buffer, struct UnitSystem* us, +void units_init(struct UnitSystem*, const struct swift_params*); +double units_get_base_unit(const struct UnitSystem*, enum BaseUnits); +const char* units_get_base_unit_symbol(enum BaseUnits); +const char* units_get_base_unit_CGS_symbol(enum BaseUnits); +double units_general_conversion_factor(const struct UnitSystem* us, + float baseUnitsExponants[5]); +double units_conversion_factor(const struct UnitSystem* us, + enum UnitConversionFactor unit); +float units_general_h_factor(const struct UnitSystem* us, float baseUnitsExponants[5]); - -/** - * @brief Returns a string containing the exponents of the base units making up - * the conversion factors - */ -void conversionString(char* buffer, struct UnitSystem* us, - enum UnitConversionFactor unit); +float units_h_factor(const struct UnitSystem* us, + enum UnitConversionFactor unit); +float units_general_a_factor(const struct UnitSystem* us, + float baseUnitsExponants[5]); +float units_a_factor(const struct UnitSystem* us, + enum UnitConversionFactor unit); +void units_general_conversion_string(char* buffer, const struct UnitSystem* us, + float baseUnitsExponants[5]); +void units_conversion_string(char* buffer, const struct UnitSystem* us, + enum UnitConversionFactor unit); #endif /* SWIFT_UNITS_H */ diff --git a/src/version.c b/src/version.c index 6aeee2d8bcbc4652f679bbb786e9e512ebc4caa6..27841a16019a69442e66b21c327f4241e440fb12 100644 --- a/src/version.c +++ b/src/version.c @@ -241,7 +241,7 @@ const char *metis_version(void) { */ void greetings(void) { - printf(" Welcome to the cosmological code\n"); + printf(" Welcome to the cosmological hydrodynamical code\n"); printf(" ______ _________________\n"); printf(" / ___/ | / / _/ ___/_ __/\n"); printf(" \\__ \\| | /| / // // /_ / / \n"); diff --git a/tests/Makefile.am b/tests/Makefile.am index f0bfbefd3c7f4591134d1707c4ac9bf63278e855..b53a08615c5a8c7c2c31475bf7207522f8b9a58c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -21,10 +21,12 @@ AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) -DTIMER AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) # List of programs and scripts to run in the test suite -TESTS = testGreetings testReading.sh testSingle testTimeIntegration +TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh \ + test27cells.sh test27cellsPerturbed.sh testParser.sh # List of test programs to compile -check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration testSPHStep testVectorize +check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ + testSPHStep testPair test27cells testParser testKernel # Sources for the individual programs testGreetings_SOURCES = testGreetings.c @@ -37,7 +39,15 @@ testSPHStep_SOURCES = testSPHStep.c testSingle_SOURCES = testSingle.c -testVectorize_SOURCES = testVectorize.c +testPair_SOURCES = testPair.c + +test27cells_SOURCES = test27cells.c + +testParser_SOURCES = testParser.c + +testKernel_SOURCES = testKernel.c # Files necessary for distribution -EXTRA_DIST = testReading.sh makeInput.py +EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ + test27cells.sh test27cellsPerturbed.sh tolerance.dat testParser.sh \ + testParserInput.yaml diff --git a/tests/difffloat.py b/tests/difffloat.py new file mode 100644 index 0000000000000000000000000000000000000000..d4b48d54cbb9f292ed49b3cc142826cd1d71f87e --- /dev/null +++ b/tests/difffloat.py @@ -0,0 +1,118 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + +from numpy import * +import sys + +abs_tol = 1e-7 +rel_tol = 1e-7 + +# Compares the content of two ASCII tables of floats line by line and +# reports all differences beyond the given tolerances +# Comparisons are done both in absolute and relative terms + +# Individual tolerances for each column can be provided in a file +# The (cube root of) the number of lines to check is provided as +# an optional 4th argument + +file1 = sys.argv[1] +file2 = sys.argv[2] +number_to_check = -1 + +if len(sys.argv) == 5: + number_to_check = int(sys.argv[4]) + +fileTol = "" +if len(sys.argv) >= 4: + fileTol = sys.argv[3] + +data1 = loadtxt(file1) +data2 = loadtxt(file2) +if fileTol != "": + dataTol = loadtxt(fileTol) + n_linesTol = shape(dataTol)[0] + n_columnsTol = shape(dataTol)[1] + + +if shape(data1) != shape(data2): + print "Non-matching array sizes in the files", file1, "and", file2, "." + sys.exit(1) + +n_lines = shape(data1)[0] +n_columns = shape(data1)[1] + +if fileTol != "": + if n_linesTol != 2: + print "Incorrect number of lines in tolerance file '%s'."%fileTol + if n_columnsTol != n_columns: + print "Incorrect number of columns in tolerance file '%s'."%fileTol + +if fileTol == "": + print "Absolute difference tolerance:", abs_tol + print "Relative difference tolerance:", rel_tol + absTol = ones(n_columns) * abs_tol + relTol = ones(n_columns) * rel_tol +else: + print "Tolerances read from file" + absTol = dataTol[0,:] + relTol = dataTol[1,:] + +n_lines_to_check = 0 +if number_to_check > 0: + n_lines_to_check = number_to_check**3 + n_lines_to_check = min(n_lines_to_check, n_lines) + print "Checking the first %d particles."%n_lines_to_check +else: + n_lines_to_check = n_lines + print "Checking all particles in the file." + +error = False +for i in range(n_lines_to_check): + for j in range(n_columns): + + abs_diff = abs(data1[i,j] - data2[i,j]) + + sum = abs(data1[i,j] + data2[i,j]) + if sum > 0: + rel_diff = abs(data1[i,j] - data2[i,j]) / sum + else: + rel_diff = 0. + + if( abs_diff > absTol[j]): + print "Absolute difference larger than tolerance (%e) for particle %d, column %d:"%(absTol[j], i,j) + print "%10s: a = %e"%("File 1", data1[i,j]) + print "%10s: b = %e"%("File 2", data2[i,j]) + print "%10s: |a-b| = %e"%("Difference", abs_diff) + print "" + error = True + + if( rel_diff > relTol[j]): + print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j) + print "%10s: a = %e"%("File 1", data1[i,j]) + print "%10s: b = %e"%("File 2", data2[i,j]) + print "%10s: |a-b|/|a+b| = %e"%("Difference", rel_diff) + print "" + error = True + + +if error: + exit(1) +else: + print "No differences found" + exit(0) diff --git a/tests/test27cells.c b/tests/test27cells.c new file mode 100644 index 0000000000000000000000000000000000000000..7915511eed50a229a94eda6bb338607099303421 --- /dev/null +++ b/tests/test27cells.c @@ -0,0 +1,413 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include "swift.h" + +enum velocity_types { + velocity_zero, + velocity_random, + velocity_divergent, + velocity_rotating +}; + +/** + * @brief Returns a random number (uniformly distributed) in [a,b[ + */ +double random_uniform(double a, double b) { + return (rand() / (double)RAND_MAX) * (b - a) + a; +} + + +/** + * @brief Constructs a cell and all of its particle in a valid state prior to + * a DOPAIR or DOSELF calcuation. + * + * @param n The cube root of the number of particles. + * @param offset The position of the cell offset from (0,0,0). + * @param size The cell size. + * @param h The smoothing length of the particles in units of the inter-particle separation. + * @param density The density of the fluid. + * @param partId The running counter of IDs. + * @param pert The perturbation to apply to the particles in the cell in units of the inter-particle separation. + * @param vel The type of velocity field (0, random, divergent, rotating) + */ +struct cell *make_cell(size_t n, double *offset, double size, double h, + double density, long long *partId, double pert, + enum velocity_types vel) { + const size_t count = n * n * n; + const double volume = size * size * size; + struct cell *cell = malloc(sizeof(struct cell)); + bzero(cell, sizeof(struct cell)); + + if (posix_memalign((void **)&cell->parts, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(cell->parts, count * sizeof(struct part)); + + /* Construct the parts */ + struct part *part = cell->parts; + for (size_t x = 0; x < n; ++x) { + for (size_t y = 0; y < n; ++y) { + for (size_t z = 0; z < n; ++z) { + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + switch (vel) { + case velocity_zero: + part->v[0] = 0.f; + part->v[1] = 0.f; + part->v[2] = 0.f; + break; + case velocity_random: + part->v[0] = random_uniform(-0.05, 0.05); + part->v[1] = random_uniform(-0.05, 0.05); + part->v[2] = random_uniform(-0.05, 0.05); + break; + case velocity_divergent: + part->v[0] = part->x[0] - 1.5 * size; + part->v[1] = part->x[1] - 1.5 * size; + part->v[2] = part->x[2] - 1.5 * size; + break; + case velocity_rotating: + part->v[0] = part->x[1]; + part->v[1] = -part->x[0]; + part->v[2] = 0.f; + break; + } + part->h = size * h / (float)n; + part->id = ++(*partId); + part->mass = density * volume / count; + part->ti_begin = 0; + part->ti_end = 1; + ++part; + } + } + } + + /* Cell properties */ + cell->split = 0; + cell->h_max = h; + cell->count = count; + cell->dx_max = 0.; + cell->h[0] = size; + cell->h[1] = size; + cell->h[2] = size; + cell->loc[0] = offset[0]; + cell->loc[1] = offset[1]; + cell->loc[2] = offset[2]; + + cell->ti_end_min = 1; + cell->ti_end_max = 1; + + cell->sorted = 0; + cell->sort = NULL; + cell->sortsize = 0; + runner_dosort(NULL, cell, 0x1FFF, 0); + + return cell; +} + +void clean_up(struct cell *ci) { + free(ci->parts); + free(ci->sort); + free(ci); +} + +/** + * @brief Initializes all particles field to be ready for a density calculation + */ +void zero_particle_fields(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + c->parts[pid].rho = 0.f; + c->parts[pid].rho_dh = 0.f; + hydro_init_part(&c->parts[pid]); + } +} + +/** + * @brief Ends the loop by adding the appropriate coefficients + */ +void end_calculation(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + hydro_end_density(&c->parts[pid], 1); + } +} + +/** + * @brief Dump all the particles to a file + */ +void dump_particle_fields(char *fileName, struct cell *main_cell, + struct cell **cells) { + + FILE *file = fopen(fileName, "w"); + + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " + "%13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", + "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); + + fprintf(file, "# Main cell --------------------------------------------\n"); + + /* Write main cell */ + for (size_t pid = 0; pid < main_cell->count; pid++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + main_cell->parts[pid].id, main_cell->parts[pid].x[0], + main_cell->parts[pid].x[1], main_cell->parts[pid].x[2], + main_cell->parts[pid].v[0], main_cell->parts[pid].v[1], + main_cell->parts[pid].v[2], main_cell->parts[pid].rho, + main_cell->parts[pid].rho_dh, main_cell->parts[pid].density.wcount, + main_cell->parts[pid].density.wcount_dh, +#ifdef GADGET2_SPH + main_cell->parts[pid].div_v, main_cell->parts[pid].density.rot_v[0], + main_cell->parts[pid].density.rot_v[1], + main_cell->parts[pid].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + /* Write all other cells */ + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + + struct cell *cj = cells[i * 9 + j * 3 + k]; + if (cj == main_cell) continue; + + fprintf(file, + "# Offset: [%2d %2d %2d] -----------------------------------\n", + i - 1, j - 1, k - 1); + + for (size_t pjd = 0; pjd < cj->count; pjd++) { + fprintf( + file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1], + cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1], + cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh, + cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, +#ifdef GADGET2_SPH + cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], + cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + } + } + } + fclose(file); +} + +/* Just a forward declaration... */ +void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); +void runner_doself1_density(struct runner *r, struct cell *ci); + +/* And go... */ +int main(int argc, char *argv[]) { + + size_t runs = 0, particles = 0; + double h = 1.2348, size = 1., rho = 1.; + double perturbation = 0.; + char outputFileNameExtension[200] = ""; + char outputFileName[200] = ""; + int vel = velocity_zero; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "m:s:h:p:r:t:d:f:v:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 's': + sscanf(optarg, "%lf", &size); + break; + case 'p': + sscanf(optarg, "%zu", &particles); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; + case 'm': + sscanf(optarg, "%lf", &rho); + break; + case 'f': + strcpy(outputFileNameExtension, optarg); + break; + case 'v': + sscanf(optarg, "%d", &vel); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || particles == 0 || runs == 0) { + printf( + "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" + "\nGenerates a cell pair, filled with particles on a Cartesian grid." + "\nThese are then interacted using runner_dopair1_density." + "\n\nOptions:" + "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-m rho - Physical density in the cell" + "\n-s size - Physical size of the cell" + "\n-d pert - Perturbation to apply to the particles [0,1[" + "\n-v type (0,1,2,3) - Velocity field: (zero, random, divergent, " + "rotating)" + "\n-f fileName - Part of the file name used to save the dumps\n", + argv[0]); + exit(1); + } + + /* Help users... */ + message("Smoothing length: h = %f", h * size); + message("Kernel: %s", kernel_name); + message("Neighbour target: N = %f", h * h * h * kernel_nwneigh / 1.88273); + message("Density target: rho = %f", rho); + message("div_v target: div = %f", vel == 2 ? 3.f : 0.f); + message("curl_v target: curl = [0., 0., %f]", vel == 3 ? -2.f : 0.f); + printf("\n"); + + /* Build the infrastructure */ + struct space space; + space.periodic = 0; + space.h_max = h; + + struct engine engine; + engine.s = &space; + engine.time = 0.1f; + engine.ti_current = 1; + + struct runner runner; + runner.e = &engine; + + /* Construct some cells */ + struct cell *cells[27]; + struct cell *main_cell; + static long long partId = 0; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + + double offset[3] = {i * size, j * size, k * size}; + cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho, + &partId, perturbation, vel); + } + } + } + + /* Store the main cell for future use */ + main_cell = cells[13]; + + ticks time = 0; + for (size_t i = 0; i < runs; ++i) { + + /* Zero the fields */ + for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]); + + const ticks tic = getticks(); + + /* Run all the pairs */ + for (int j = 0; j < 27; ++j) + if (cells[j] != main_cell) + runner_dopair1_density(&runner, main_cell, cells[j]); + + /* And now the self-interaction */ + runner_doself1_density(&runner, main_cell); + + const ticks toc = getticks(); + time += toc - tic; + + /* Let's get physical ! */ + end_calculation(main_cell); + + /* Dump if necessary */ + if (i % 50 == 0) { + sprintf(outputFileName, "swift_dopair_27_%s.dat", + outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + } + } + + /* Output timing */ + message("SWIFT calculation took : %15lli ticks.", time / runs); + + /* Now perform a brute-force version for accuracy tests */ + + /* Zero the fields */ + for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]); + + const ticks tic = getticks(); + + /* Run all the brute-force pairs */ + for (int j = 0; j < 27; ++j) + if (cells[j] != main_cell) pairs_all_density(&runner, main_cell, cells[j]); + + /* And now the self-interaction */ + self_all_density(&runner, main_cell); + + const ticks toc = getticks(); + + /* Let's get physical ! */ + end_calculation(main_cell); + + /* Dump */ + sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + + /* Output timing */ + message("Brute force calculation took : %15lli ticks.", toc - tic); + + /* Clean things to make the sanitizer happy ... */ + for (int i = 0; i < 27; ++i) clean_up(cells[i]); + + return 0; +} diff --git a/tests/test27cells.sh b/tests/test27cells.sh new file mode 100755 index 0000000000000000000000000000000000000000..0afdc32c95397ce76190e847bdcd04a0b079ef78 --- /dev/null +++ b/tests/test27cells.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_27_standard.dat swift_dopair_27_standard.dat + +./test27cells -p 6 -r 1 -d 0 -f standard + +python difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat tolerance.dat 6 + +exit $? diff --git a/tests/test27cellsPerturbed.sh b/tests/test27cellsPerturbed.sh new file mode 100755 index 0000000000000000000000000000000000000000..a553a2553e92cedee7c2c0679d231ec9d982fc28 --- /dev/null +++ b/tests/test27cellsPerturbed.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat + +./test27cells -p 6 -r 1 -d 0.1 -f perturbed + +python difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat tolerance.dat 6 + +exit $? diff --git a/tests/testKernel.c b/tests/testKernel.c new file mode 100644 index 0000000000000000000000000000000000000000..5ad9cc81ea92e6ef9487489c5d560abf414e38df --- /dev/null +++ b/tests/testKernel.c @@ -0,0 +1,37 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "swift.h" + +int main() { + + const float h = const_eta_kernel; + const int numPoints = 30; + + for (int i = 0; i < numPoints; ++i) { + + const float x = i * 3.f / numPoints; + float W, dW; + kernel_deval(x / h, &W, &dW); + + printf("h= %f H= %f x=%f W(x,h)=%f\n", h, h * kernel_gamma, x, W); + } + + return 0; +} diff --git a/tests/testPair.c b/tests/testPair.c new file mode 100644 index 0000000000000000000000000000000000000000..6e46b577ca63a8d3c2edce888a7485af0949813d --- /dev/null +++ b/tests/testPair.c @@ -0,0 +1,305 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include "swift.h" + +/** + * Returns a random number (uniformly distributed) in [a,b[ + */ +double random_uniform(double a, double b) { + return (rand() / (double)RAND_MAX) * (b - a) + a; +} + +/* n is both particles per axis and box size: + * particles are generated on a mesh with unit spacing + */ +struct cell *make_cell(size_t n, double *offset, double size, double h, + double density, unsigned long long *partId, + double pert) { + const size_t count = n * n * n; + const double volume = size * size * size; + struct cell *cell = malloc(sizeof(struct cell)); + bzero(cell, sizeof(struct cell)); + + if (posix_memalign((void **)&cell->parts, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(cell->parts, count * sizeof(struct part)); + + /* Construct the parts */ + struct part *part = cell->parts; + for (size_t x = 0; x < n; ++x) { + for (size_t y = 0; y < n; ++y) { + for (size_t z = 0; z < n; ++z) { + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + // part->v[0] = part->x[0] - 1.5; + // part->v[1] = part->x[1] - 1.5; + // part->v[2] = part->x[2] - 1.5; + part->v[0] = random_uniform(-0.05, 0.05); + part->v[1] = random_uniform(-0.05, 0.05); + part->v[2] = random_uniform(-0.05, 0.05); + part->h = size * h / (float)n; + part->id = ++(*partId); + part->mass = density * volume / count; + part->ti_begin = 0; + part->ti_end = 1; + ++part; + } + } + } + + /* Cell properties */ + cell->split = 0; + cell->h_max = h; + cell->count = count; + cell->dx_max = 0.; + cell->h[0] = n; + cell->h[1] = n; + cell->h[2] = n; + cell->loc[0] = offset[0]; + cell->loc[1] = offset[1]; + cell->loc[2] = offset[2]; + + cell->ti_end_min = 1; + cell->ti_end_max = 1; + + cell->sorted = 0; + cell->sort = NULL; + cell->sortsize = 0; + runner_dosort(NULL, cell, 0x1FFF, 0); + + return cell; +} + +void clean_up(struct cell *ci) { + free(ci->parts); + free(ci->sort); + free(ci); +} + +/** + * @brief Initializes all particles field to be ready for a density calculation + */ +void zero_particle_fields(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + c->parts[pid].rho = 0.f; + c->parts[pid].rho_dh = 0.f; + hydro_init_part(&c->parts[pid]); + } +} + +/** + * @brief Dump all the particles to a file + */ +void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { + + FILE *file = fopen(fileName, "w"); + + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " + "%13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", + "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); + + fprintf(file, "# ci --------------------------------------------\n"); + + for (size_t pid = 0; pid < ci->count; pid++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + ci->parts[pid].id, ci->parts[pid].x[0], ci->parts[pid].x[1], + ci->parts[pid].x[2], ci->parts[pid].v[0], ci->parts[pid].v[1], + ci->parts[pid].v[2], ci->parts[pid].rho, ci->parts[pid].rho_dh, + ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh, +#ifdef GADGET2_SPH + ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0], + ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + fprintf(file, "# cj --------------------------------------------\n"); + + for (size_t pjd = 0; pjd < cj->count; pjd++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1], + cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1], + cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh, + cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, +#ifdef GADGET2_SPH + cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], + cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + fclose(file); +} + +/* Just a forward declaration... */ +void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); + +int main(int argc, char *argv[]) { + size_t particles = 0, runs = 0, volume, type = 0; + double offset[3] = {0, 0, 0}, h = 1.1255, size = 1., rho = 1.; + double perturbation = 0.; + struct cell *ci, *cj; + struct space space; + struct engine engine; + struct runner runner; + char c; + static unsigned long long partId = 0; + char outputFileNameExtension[200] = ""; + char outputFileName[200] = ""; + ticks tic, toc, time; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + srand(0); + + while ((c = getopt(argc, argv, "h:p:r:t:d:f:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 'p': + sscanf(optarg, "%zu", &particles); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case 't': + sscanf(optarg, "%zu", &type); + break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; + case 'f': + strcpy(outputFileNameExtension, optarg); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || particles == 0 || runs == 0 || type > 2) { + printf( + "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" + "\nGenerates a cell pair, filled with particles on a Cartesian grid." + "\nThese are then interacted using runner_dopair1_density." + "\n\nOptions:" + "\n-t TYPE=0 - cells share face (0), edge (1) or corner (2)" + "\n-h DISTANCE=1.1255 - smoothing length" + "\n-d pert - perturbation to apply to the particles [0,1[" + "\n-f fileName - part of the file name used to save the dumps\n", + argv[0]); + exit(1); + } + + space.periodic = 0; + space.h_max = h; + + engine.s = &space; + engine.time = 0.1f; + engine.ti_current = 1; + runner.e = &engine; + + volume = particles * particles * particles; + message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part)); + + ci = make_cell(particles, offset, size, h, rho, &partId, perturbation); + for (size_t i = 0; i < type + 1; ++i) offset[i] = 1.; + cj = make_cell(particles, offset, size, h, rho, &partId, perturbation); + + time = 0; + for (size_t i = 0; i < runs; ++i) { + + /* Zero the fields */ + zero_particle_fields(ci); + zero_particle_fields(cj); + + tic = getticks(); + + /* Run the test */ + runner_dopair1_density(&runner, ci, cj); + + toc = getticks(); + time += toc - tic; + + /* Dump if necessary */ + if (i % 50 == 0) { + sprintf(outputFileName, "swift_dopair_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, ci, cj); + } + } + + /* Output timing */ + message("SWIFT calculation took %lli ticks.", time / runs); + + /* Now perform a brute-force version for accuracy tests */ + + /* Zero the fields */ + zero_particle_fields(ci); + zero_particle_fields(cj); + + tic = getticks(); + + /* Run the brute-force test */ + pairs_all_density(&runner, ci, cj); + + toc = getticks(); + + /* Dump */ + sprintf(outputFileName, "brute_force_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, ci, cj); + + /* Output timing */ + message("Brute force calculation took %lli ticks.", toc - tic); + + /* Clean things to make the sanitizer happy ... */ + clean_up(ci); + clean_up(cj); + + return 0; +} diff --git a/tests/testPair.sh b/tests/testPair.sh new file mode 100755 index 0000000000000000000000000000000000000000..f6f505e56a2c7a5c3cff0ec04bd871278634193c --- /dev/null +++ b/tests/testPair.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_standard.dat swift_dopair_standard.dat + +./testPair -p 6 -r 1 -d 0 -f standard + +python difffloat.py brute_force_standard.dat swift_dopair_standard.dat tolerance.dat + +exit $? diff --git a/tests/testPairPerturbed.sh b/tests/testPairPerturbed.sh new file mode 100755 index 0000000000000000000000000000000000000000..544ba1b032da8426c065dcfb2ce3ee554c5e76a1 --- /dev/null +++ b/tests/testPairPerturbed.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_perturbed.dat swift_dopair_perturbed.dat + +./testPair -p 6 -r 1 -d 0.1 -f perturbed + +python difffloat.py brute_force_perturbed.dat swift_dopair_perturbed.dat tolerance.dat + +exit $? diff --git a/tests/testParser.c b/tests/testParser.c new file mode 100644 index 0000000000000000000000000000000000000000..0b08d20c9e2d48de1858877cf186eaa9d0ac84c0 --- /dev/null +++ b/tests/testParser.c @@ -0,0 +1,70 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2016 James Willis (james.s.willis@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "parser.h" +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <math.h> + +int main(int argc, char *argv[]) { + const char *input_file = argv[1]; + + /* Create a structure to read file into. */ + struct swift_params param_file; + + /* Read the parameter file. */ + parser_read_file(input_file, ¶m_file); + + /* Print the contents of the structure to stdout. */ + parser_print_params(¶m_file); + + /* Print the contents of the structure to a file in YAML format. */ + parser_write_params_to_file(¶m_file, "parser_output.yml"); + + /* Retrieve parameters and store them in variables defined above. + * Have to specify the name of the parameter as it appears in the + * input file: testParserInput.yaml.*/ + const int no_of_threads = + parser_get_param_int(¶m_file, "Scheduler:no_of_threads"); + const int no_of_time_steps = + parser_get_param_int(¶m_file, "Simulation:no_of_time_steps"); + const float max_h = parser_get_param_float(¶m_file, "Simulation:max_h"); + const double start_time = + parser_get_param_double(¶m_file, "Simulation:start_time"); + const int kernel = parser_get_param_int(¶m_file, "kernel"); + + char ic_file[PARSER_MAX_LINE_SIZE]; + parser_get_param_string(¶m_file, "IO:ic_file", ic_file); + + /* Print the variables to check their values are correct. */ + printf( + "no_of_threads: %d, no_of_time_steps: %d, max_h: %f, start_time: %lf, " + "ic_file: %s, kernel: %d\n", + no_of_threads, no_of_time_steps, max_h, start_time, ic_file, kernel); + + assert(no_of_threads == 16); + assert(no_of_time_steps == 10); + assert(fabs(max_h - 1.1255) < 0.00001); + assert(fabs(start_time - 1.23456789) < 0.00001); + assert(strcmp(ic_file, "ic_file.ini") == 0); /*strcmp returns 0 if correct.*/ + assert(kernel == 4); + + return 0; +} diff --git a/tests/testParser.sh b/tests/testParser.sh new file mode 100755 index 0000000000000000000000000000000000000000..53d2bbe4e0230032666ace228449f913f03e0464 --- /dev/null +++ b/tests/testParser.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +rm parser_output.yml +./testParser testParserInput.yaml diff --git a/tests/testParserInput.yaml b/tests/testParserInput.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7fefb3242ab4e140756789aad9979d024f83906 --- /dev/null +++ b/tests/testParserInput.yaml @@ -0,0 +1,19 @@ +--- +#section_1: +# var_a: 4.5e10 +# var_b: Hello World! + +Scheduler: + no_of_threads: 16 # The number of threads that will be used. + +kernel: 4 + +Simulation: + no_of_time_steps: 10 + max_h: 1.1255 + start_time: 1.23456789 + +IO: + #Input file + ic_file: ic_file.ini +... diff --git a/tests/testReading.c b/tests/testReading.c index d2a2a766171a85ace486914f0f39a987d9d8c3d3..33aeb5095ba499bc0fd18ba15b513e351692432e 100644 --- a/tests/testReading.c +++ b/tests/testReading.c @@ -22,7 +22,7 @@ int main() { - int Ngas = -1, Ngpart = -1; + size_t Ngas = 0, Ngpart = 0; int periodic = -1; int i, j, k, n; double dim[3]; @@ -35,7 +35,8 @@ int main() { const double rho = 2.; /* Read data */ - read_ic_single("input.hdf5", dim, &parts, &gparts, &Ngas, &Ngpart, &periodic); + read_ic_single("input.hdf5", dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, + 0); /* Check global properties read are correct */ assert(dim[0] == boxSize); diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index 984b8ea867250d0bda1bc14d2600279a27321b2c..223078ecb637e64d94e37cdf8c0f60a86bdd5ff7 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -77,6 +77,10 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { #ifdef DEFAULT_SPH +/* Just a forward declaration... */ +void runner_doself1_density(struct runner *r, struct cell *ci); +void runner_doself2_force(struct runner *r, struct cell *ci); + /* Run a full time step integration for one cell */ int main() { @@ -132,7 +136,7 @@ int main() { /* Initialise the particles */ for (j = 0; j < 27; ++j) { - runner_doinit(&r, cells[j]); + runner_doinit(&r, cells[j], 0); } /* Compute density */ @@ -145,7 +149,7 @@ int main() { runner_doself2_force(&r, ci); runner_dokick(&r, ci, 1); - message("t_end=%f", p->t_end); + message("ti_end=%d", p->ti_end); free(ci->parts); free(ci->xparts); diff --git a/tests/testSingle.c b/tests/testSingle.c index c85b77ff1c5b2285c33fa7787bbd53deab463039..eb49a570b93b14734c9e6af37d3d8a2b90d04078 100644 --- a/tests/testSingle.c +++ b/tests/testSingle.c @@ -91,8 +91,8 @@ int main(int argc, char *argv[]) { p2.force.POrho2 = p2.u * (const_hydro_gamma - 1.0f) / p2.rho; /* Dump a header. */ - printParticle_single(&p1); - printParticle_single(&p2); + // printParticle_single(&p1, NULL); + // printParticle_single(&p2, NULL); printf("# r a_1 udt_1 a_2 udt_2\n"); /* Loop over the different radii. */ @@ -103,9 +103,9 @@ int main(int argc, char *argv[]) { r2 = dx[0] * dx[0]; /* Clear the particle fields. */ - p1.a[0] = 0.0f; + p1.a_hydro[0] = 0.0f; p1.force.u_dt = 0.0f; - p2.a[0] = 0.0f; + p2.a_hydro[0] = 0.0f; p2.force.u_dt = 0.0f; /* Interact the particles. */ @@ -130,8 +130,8 @@ int main(int argc, char *argv[]) { /* Output the results. */ printf( - "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0], p1.a[0], - p1.a[1], p1.a[2], p1.force.u_dt, + "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0], + p1.a_hydro[0], p1.a_hydro[1], p1.a_hydro[2], p1.force.u_dt, /// -dx[0] , p1.rho , p1.density.wcount , p2.rho , p2.density.wcount , w, dwdx, gradw[0], gradw[1], gradw[2]); diff --git a/tests/testVectorize.c b/tests/testVectorize.c deleted file mode 100644 index a18b6e8af5ac3f7b94bd7be3bdf8fd21e49681ff..0000000000000000000000000000000000000000 --- a/tests/testVectorize.c +++ /dev/null @@ -1,212 +0,0 @@ -#include <fenv.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <unistd.h> -#include "swift.h" - -/* n is both particles per axis and box size: - * particles are generated on a mesh with unit spacing - */ -struct cell *make_cell(size_t n, double *offset, double h, - unsigned long long *partId) { - size_t count = n * n * n; - struct cell *cell = malloc(sizeof *cell); - struct part *part; - size_t x, y, z, size; - - size = count * sizeof(struct part); - if (posix_memalign((void **)&cell->parts, part_align, size) != 0) { - error("couldn't allocate particles, no. of particles: %d", (int)count); - } - - part = cell->parts; - for (x = 0; x < n; ++x) { - for (y = 0; y < n; ++y) { - for (z = 0; z < n; ++z) { - // Add .5 for symmetry: 0.5, 1.5, 2.5 vs. 0, 1, 2 - part->x[0] = x + offset[0] + 0.5; - part->x[1] = y + offset[1] + 0.5; - part->x[2] = z + offset[2] + 0.5; - part->v[0] = 1.0f; - part->v[1] = 1.0f; - part->v[2] = 1.0f; - part->h = h; - part->id = ++(*partId); - part->mass = 1.0f; - part->ti_begin = 0; - part->ti_end = 1; - ++part; - } - } - } - - cell->split = 0; - cell->h_max = h; - cell->count = count; - cell->dx_max = 1.; - cell->h[0] = n; - cell->h[1] = n; - cell->h[2] = n; - - cell->sort = malloc(13 * count * sizeof *cell->sort); - runner_dosort(NULL, cell, 0x1FFF, 0); - - return cell; -} - -void clean_up(struct cell *ci) { - free(ci->parts); - free(ci->sort); - free(ci); -} - -/** - * @brief Initializes all particles field to be ready for a density calculation - */ -void zero_particle_fields(struct cell *c) { - - for (size_t pid = 0; pid < c->count; pid++) { - c->parts[pid].rho = 0.f; - c->parts[pid].rho_dh = 0.f; - hydro_init_part(&c->parts[pid]); - } -} - -/** - * @brief Dump all the particles to a file - */ -void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { - - FILE *file = fopen(fileName, "w"); - - fprintf(file, - "# ID rho rho_dh wcount wcount_dh div_v curl_v:[x y z]\n"); - - for (size_t pid = 0; pid < ci->count; pid++) { - fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", ci->parts[pid].id, - ci->parts[pid].rho, ci->parts[pid].rho_dh, - ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh, - ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0], - ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2]); - } - - fprintf(file, "# -----------------------------------\n"); - - for (size_t pjd = 0; pjd < cj->count; pjd++) { - fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", cj->parts[pjd].id, - cj->parts[pjd].rho, cj->parts[pjd].rho_dh, - cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, - cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], - cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]); - } - - fclose(file); -} - -/* Just a forward declaration... */ -void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); - -int main(int argc, char *argv[]) { - size_t particles = 0, runs = 0, volume, type = 0; - double offset[3] = {0, 0, 0}, h = 1.1255; // * DIM/PARTS_PER_AXIS == * 1 - struct cell *ci, *cj; - struct space space; - struct engine engine; - struct runner runner; - char c; - static unsigned long long partId = 0; - ticks tic, toc, time; - - while ((c = getopt(argc, argv, "h:p:r:t:")) != -1) { - switch (c) { - case 'h': - sscanf(optarg, "%lf", &h); - break; - case 'p': - sscanf(optarg, "%zu", &particles); - break; - case 'r': - sscanf(optarg, "%zu", &runs); - break; - case 't': - sscanf(optarg, "%zu", &type); - break; - } - } - - if (h < 0 || particles == 0 || runs == 0 || type > 2) { - printf( - "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" - "\nGenerates a cell pair, filled with particles on a Cartesian grid." - "\nThese are then interacted using runner_dopair1_density." - "\n\nOptions:" - "\n-t TYPE=0 - cells share face (0), edge (1) or corner (2)" - "\n-h DISTANCE=1.1255 - smoothing length\n", - argv[0]); - exit(1); - } - - volume = particles * particles * particles; - message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part)); - - ci = make_cell(particles, offset, h, &partId); - for (size_t i = 0; i < type + 1; ++i) offset[i] = particles; - cj = make_cell(particles, offset, h, &partId); - - for (int i = 0; i < 3; ++i) { - space.h_max = h; - space.dt_step = 0.1; - } - - engine.s = &space; - engine.time = 0.1f; - runner.e = &engine; - - time = 0; - for (size_t i = 0; i < runs; ++i) { - - /* Zero the fields */ - zero_particle_fields(ci); - zero_particle_fields(cj); - - tic = getticks(); - - /* Run the test */ - runner_dopair1_density(&runner, ci, cj); - - toc = getticks(); - time += toc - tic; - - /* Dump if necessary */ - if (i % 50 == 0) dump_particle_fields("swift_dopair.dat", ci, cj); - } - - /* Output timing */ - message("SWIFT calculation took %lli ticks.", time / runs); - - /* Now perform a brute-force version for accuracy tests */ - - /* Zero the fields */ - zero_particle_fields(ci); - zero_particle_fields(cj); - - tic = getticks(); - - /* Run the test */ - pairs_all_density(&runner, ci, cj); - - toc = getticks(); - - /* Dump */ - dump_particle_fields("brute_force.dat", ci, cj); - - /* Output timing */ - message("Brute force calculation took %lli ticks.", toc - tic); - - /* Clean things to make the sanitizer happy ... */ - clean_up(ci); - clean_up(cj); - - return 0; -} diff --git a/tests/tolerance.dat b/tests/tolerance.dat new file mode 100644 index 0000000000000000000000000000000000000000..f5031c5f47dfa203300ebcc9a47fbac42f854d26 --- /dev/null +++ b/tests/tolerance.dat @@ -0,0 +1,3 @@ +# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-5 1e-5 2e-5 3e-2 1e-5 1e-5 1e-5 1e-5 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-5 1.2e-5 1e-5 1e-2 1e-4 1e-4 1e-4 1e-4 diff --git a/theory/kernel/kernel.pdf b/theory/kernel/kernel.pdf deleted file mode 100644 index b6e540dc61c36dd00e56f02e44ce33f9f91a7f01..0000000000000000000000000000000000000000 Binary files a/theory/kernel/kernel.pdf and /dev/null differ diff --git a/theory/kernel/kernel.tex b/theory/kernel/kernel.tex deleted file mode 100644 index 7087555d423afbe2745bb91010a17c52a32084f2..0000000000000000000000000000000000000000 --- a/theory/kernel/kernel.tex +++ /dev/null @@ -1,155 +0,0 @@ -\documentclass[a4paper,10pt]{article} -\usepackage[utf8]{inputenc} -\usepackage{amsmath} - -%opening -\title{SPH kernels in SWIFT} -\author{Matthieu Schaller} - -\begin{document} - -\maketitle - -\section{General Definitions} - -The smoothing kernels used in SPH are almost always isotropic and can hence be written in 3D as - -\begin{equation} - W(\vec{x},h) = \frac{1}{h^3}f\left(\frac{|\vec{x}|}{h}\right), -\end{equation} - -where $f(q)$ is a dimensionless function, usually a low-order polynomial, normalized to unity. For computational -reasons, this kernel -usually has a finite support of radius $H$. In other words, - -\begin{equation} - W(\vec{x},h) = 0\quad \forall\quad |\vec{x}| > H. -\end{equation} - One can then define the weighted number of neighbours within $H$ as - -\begin{equation} - N_{ngb} = \frac{4}{3}\pi H^3 \sum_j W(\vec{x}_i - \vec{x}_j,h). -\end{equation} - -The value of $N_{ngb}$ is often used in the codes to find the smoothing length of each particle via Newton iterations -or a bissection algorithm. $H$ is defined as \emph{the smoothing length} in the GADGET code. This definition is useful -for implementation reasons but does not really correspond to a true physical quantity. \\ -The main question is the definition of the smoothing length. The function $W(\vec{x},h)$ is invariant under the -rescaling $h\rightarrow \alpha h,~f(q)\rightarrow\alpha^{-3}f(\alpha q)$, which makes the definition of $h$ difficult. -This ambiguity is present in the litterature with authors using different definition of the \emph{physical} smoothing -length, $h=\frac{1}{2}H$ or $h=H$ for instance. \\ -A more physically motivated estimate is the standard deviation of the kernel: - -\begin{equation} - \sigma^2 = \frac{1}{3} \int \vec{x}^2~W(\vec{x},h)~d^3\vec{x} -\end{equation} - -which then allows us to set $h=2\sigma$. This definition of the smoothing length is more physical as one can -demonstrate that the reconstruction of any smooth field $A(\vec{x})$ using interpolation of particles at the point -$\vec{x}_i$ can be expanded as - -\begin{equation} -A_i \approx A(\vec{x}_i) + \frac{1}{2}\sigma^2 \nabla^2A(\vec{x}_i) + \mathcal{O}\left(\sigma^4\right). -\end{equation} - -The quantity $H/\sigma$ is independant of the choice of $h$ made and is purely a functional of $f(q)$. The number of -neighbours (used in the code to construct the neighborhood of a given particle) can then be expressed as a function of -this \emph{physical} $h$ (or $\sigma$). Or to relate it even more to the particle distribution, we can write -$h=\eta\Delta x$, with $\Delta x$ the mean inter-particle separation: - -\begin{equation} - N_{ngb} = \frac{4}{3}\pi \left(\frac{1}{2}\eta\frac{H}{\sigma}\right)^3 = \frac{4}{3}\pi -\left(\eta\zeta\right)^3 -\end{equation} - -This definition of the number of neighbours only depends on $f(q)$ (via $\zeta$) and on the mean inter-particle -separation. The problem is then fully specified by specifying a form for $f(q)$ and $\eta$. \\ -Experiments suggest that $\eta \approx 1.2 - 1.3$ is a reasonnable choice. The bigger $\eta$, the better the smoothing -and hence the better the reconstruction of the field. This, however, comes at a higher computational cost as more -interactions between neighbours will have to be computed. Also, spline kernels become instable when $\eta>1.5$. - -\section{Kernels available in SWIFT} - -The different kernels available are listed below. -\paragraph{Cubic Spline Kernel} -\begin{equation*} - f(q) = \frac{1}{\pi}\left\lbrace \begin{array}{rcl} - \frac{3}{4}q^3 - 15q^2 + 1 & \mbox{if} & 0 \leq q < 1 \\ - -\frac{1}{4}q^3 + \frac{3}{2}q^2-3q+2 & \mbox{if} & 1 \leq q < 2 \\ - 0 & \mbox{if} & q \geq 2 \\ - \end{array}\right. -\end{equation*} -with $\zeta = \frac{1}{2}\sqrt{\frac{40}{3}} \approx 1.825742$. Thus, for a resolution of $\eta = 1.235$, this kernel -uses $N_{ngb} \approx 48$. The code uses $h = \frac{1}{2}H = \zeta \sigma$ internally. - -\paragraph{Quartic Spline Kernel} -\begin{equation*} - f(q) = \frac{1}{20\pi}\left\lbrace \begin{array}{rcl} - 6q^4 - 15q^2 + \frac{115}{8} & \mbox{if} & 0 \leq q < \frac{1}{2} \\ - -4q^4 + 20q^3-30q^2 + 5q + \frac{55}{4} & \mbox{if} & \frac{1}{2} \leq q < \frac{3}{2} \\ - q^4-10q^3+\frac{75}{2}q^2-\frac{125}{2}q+\frac{625}{16} & \mbox{if} & \frac{3}{2} \leq q < -\frac{5}{2} \\ - 0 & \mbox{if} & q \geq \frac{5}{2} \\ - \end{array}\right. -\end{equation*} -with $\zeta = \frac{1}{2}\sqrt{\frac{375}{23}} \approx 2.018932$. Thus, for a resolution of $\eta = 1.235$, this kernel -uses $N_{ngb} \approx 64.9$. The code uses $h = \frac{2}{5}H =\frac{4}{5}\zeta \sigma$ internally. - -\paragraph{Quintic Spline Kernel} -\begin{equation*} - f(q) = \frac{1}{120\pi}\left\lbrace \begin{array}{rcl} - -10q^5 + 30q^4 - 60q^2 + 66 & \mbox{if} & 0 \leq q < 1 \\ - 5q^5 - 45q^4 + 150q^3 - 210q^2 + 75q + 51 & \mbox{if} & 1 \leq q < 2 \\ - -q^5 + 15q^4 - 90q^3 + 270q^2 - 405q + 243 & \mbox{if} & 2 \leq q < 3 \\ - 0 & \mbox{if} & q \geq 3 \\ - \end{array}\right. -\end{equation*} -with $\zeta = \frac{1}{2}\sqrt{\frac{135}{7}} \approx 2.195775$. Thus, for a resolution of $\eta = 1.235$, this kernel -uses $N_{ngb} \approx 83.5$. The code uses $h = \frac{1}{3}H = \frac{2}{3}\zeta \sigma$ internally. - -\paragraph{Wendland $C2$ Kernel} -\begin{equation*} - f(q) = \frac{21}{2\pi}\left\lbrace \begin{array}{rcl} - 4 q^5-15 q^4+20 q^3-10 q^2+1 & \mbox{if} & 0 \leq q < 1 \\ - 0 & \mbox{if} & q \geq 1 \\ - \end{array}\right. -\end{equation*} - with $\zeta = \frac{1}{2}\sqrt{15} \approx 1.93649$. Thus, for a resolution of $\eta = 1.235$, this kernel -uses $N_{ngb} \approx 57.3$. The code uses $h = H = 2\zeta \sigma$ internally. - - -\paragraph{Wendland $C4$ Kernel} -\begin{equation*} - f(q) = \frac{495}{32\pi}\left\lbrace \begin{array}{rcl} - \frac{35}{3} q^8-64 q^7+ 140 q^6-\frac{448}{3} q^5+70 q^4-\frac{28}{3} q^2+1 & \mbox{if} & 0 -\leq q < 1 \\ - 0 & \mbox{if} & q \geq 1 \\ - \end{array}\right. -\end{equation*} - with $\zeta = \frac{1}{2}\sqrt{\frac{39}{2}} \approx 2.20794$. Thus, for a resolution of $\eta = 1.235$, this kernel -uses $N_{ngb} \approx 84.9$. The code uses $h = H = 2\zeta \sigma$ internally. - -\paragraph{Wendland $C6$ Kernel} -\begin{equation*} - f(q) = \frac{1365}{64\pi}\left\lbrace \begin{array}{rcl} - 32 q^{11}-231 q^{10}+704 q^9-1155 q^8+1056 q^7-462 q^6+66 q^4-11 q^2+1 & \mbox{if} & 0 -\leq q < 1 \\ - 0 & \mbox{if} & q \geq 1 \\ - \end{array}\right. -\end{equation*} - with $\zeta = \frac{1}{2}\sqrt{24} \approx 2.44949$. Thus, for a resolution of $\eta = 1.235$, this kernel -uses $N_{ngb} \approx 116$. The code uses $h = H = 2\zeta \sigma$ internally. - -\section{Kernel Derivatives} - -The derivatives of the kernel function have relatively simple expressions: - -\begin{eqnarray*} - \vec\nabla_x W(\vec{x},h) &=& \frac{1}{h^4}f'\left(\frac{|\vec{x}|}{h}\right) \frac{\vec{x}}{|\vec{x}|} \\ - \frac{\partial W(\vec{x},h)}{\partial h} &=&- \frac{1}{h^4}\left[3f\left(\frac{|\vec{x}|}{h}\right) + -\frac{|\vec{x}|}{h}f'\left(\frac{|\vec{x}|}{h}\right)\right] -\end{eqnarray*} - -Note that for all the kernels listed above, $f'(0) = 0$. - -\end{document} diff --git a/theory/kernel/kernel_definitions.tex b/theory/kernel/kernel_definitions.tex new file mode 100644 index 0000000000000000000000000000000000000000..8999636109ffadcbf148ce3c1fbccdc44feafe65 --- /dev/null +++ b/theory/kernel/kernel_definitions.tex @@ -0,0 +1,242 @@ +\documentclass[a4paper]{mnras} +\usepackage[utf8]{inputenc} +\usepackage{amsmath} +\usepackage{graphicx} +\usepackage{xspace} + +\newcommand{\swift}{{\sc Swift}\xspace} + + + +%opening +\title{SPH kernels in SWIFT} +\author{Matthieu Schaller} + +\begin{document} + +\maketitle + +In here we follow the definitions of Dehnen \& Aly 2012. + +\section{General Definitions} + +The desirable properties of an SPH kernels $W(\vec{x},h)$ are: +\begin{enumerate} +\item $W(\vec{x},h)$ should be isotropic in $\vec{x}$. +\item $W(\vec{x},h)$ should be positive and decrease monotonically. +\item $W(\vec{x},h)$ should be twice differentiable. +\item $W(\vec{x},h)$ should have a finite support and be cheap to + compute. +\end{enumerate} + +As a consequence, the smoothing kernels used in SPH can +hence be written (in 3D) as + +\begin{equation} + W(\vec{x},h) \equiv \frac{1}{H^3}f\left(\frac{|\vec{x}|}{H}\right), +\end{equation} + +where $H=\gamma h$ is defined below and $f(u)$ is a dimensionless +function, usually a low-order polynomial, such that $f(u \geq 1) = 0$ +and normalised such that + +\begin{equation} + \int f(|\vec{u}|){\rm d}^3u = 1. +\end{equation} + +$H$ is the kernel's support radius and is used as the ``smoothing +length'' in the Gadget code( {i.e.} $H=h$). This definition is, +however, not very physical and makes comparison of kernels at a +\emph{fixed resolution} difficult. A more sensible definition of the +smoothing length, related to the Taylor expansion of the +re-constructed density field is given in terms of the kernel's +standard deviation + +\begin{equation} + \sigma^2 \equiv \frac{1}{3}\int \vec{u}^2 W(\vec{u},h) {\rm d}^3u. + \label{eq:sph:sigma} +\end{equation} + +The smoothing length is then: +\begin{equation} + h\equiv2\sigma. + \label{eq:sph:h} +\end{equation} + +Each kernel, {\it i.e.} defintion of $f(u)$, will have a different +ratio $\gamma = H/h$. So for a \emph{fixed resolution} $h$, one will +have different kernel support sizes, $H$, and a different number of +neighbours, $N_{\rm ngb}$ to interact with. One would typically choose +$h$ for a simulation as a multiple $\eta$ of the mean-interparticle +separation: + +\begin{equation} + h = \eta \langle x \rangle = \eta \left(\frac{m}{\rho}\right)^{1/3}, +\end{equation} + +where $\rho$ is the local density of the fluid and $m$ the SPH +particle mass. + +The (weighted) number of neighbours within the kernel support is a +useful quantity to use in implementations of SPH. It is defined as (in +3D) + +\begin{equation} + N_{\rm ngb} \equiv \frac{4}{3}\pi \left(\frac{H}{h}\eta\right)^3. +\end{equation} + +Once the fixed ratio $\gamma= H/h$ is known (via equations +\ref{eq:sph:sigma} and \ref{eq:sph:h}) for a given kernel, the number +of neighbours only depends on the resolution parameter $\eta$. For +the usual cubic spline kernel (see below), setting the simulation +resolution to $\eta=1.2348$ yields the commonly used value $N_{\rm + ngb} = 48$. + +\section{Kernels available in \swift} + +The \swift kernels are split into two categories, the B-splines +($M_{4,5,6}$) and the Wendland kernels ($C2$, $C4$ and $C6$). In all +cases we impose $f(u>1) = 0$.\\ + +The spline kernels are defined as: + +\begin{align} + f(u) &= C M_n(u), \\ + M_n(u) &\equiv \frac{1}{2\pi} + \int_{-\infty}^{\infty} + \left(\frac{\sin\left(k/n\right)}{k/n}\right)^n\cos\left(ku\right){\rm + d}k, +\end{align} + +whilst the Wendland kernels read + +\begin{align} + f(u) &= C \Psi_{i,j}(u), \\ + \Psi_{i,j}(u) &\equiv \mathcal{I}^k\left[\max\left(1-u,0\right)\right],\\ + \mathcal{I}[f](u) &\equiv \int_u^\infty f\left(k\right)k{\rm d}k. +\end{align} + +\subsubsection{Cubic spline ($M_4$) kernel} + +In 3D, we have $C=\frac{16}{\pi}$ and $\gamma=H/h = 1.825742$.\\ +The kernel function $f(u)$ reads: + +\begin{equation} + M_4(u) = \left\lbrace\begin{array}{rcl} + 3u^3 - 3u^2 + \frac{1}{2} & \mbox{if} & u<\frac{1}{2}\\ + -u^3 + 3u^2 - 3u + 1 & \mbox{if} & u \geq \frac{1}{2} + \end{array} + \right. + \nonumber +\end{equation} + + +\subsubsection{Quartic spline ($M_5$) kernel} + +In 3D, we have $C=\frac{15625}{512\pi}$ and $\gamma=H/h = 2.018932$.\\ +The kernel function $f(u)$ reads: + +\begin{align} + M_5(u) &= \nonumber\\ + &\left\lbrace\begin{array}{rcl} + 6u^4 - \frac{12}{5}u^2 + \frac{46}{125} & \mbox{if} & u < \frac{1}{5} \\ + -4u^4 + 8u^3 - \frac{24}{5}u^2 + \frac{8}{25}u + \frac{44}{125} & \mbox{if} & \frac{1}{5} \leq u < \frac{3}{5}\\ + u^4 - 4u^3 + 6u^2 - 4u + 1 & \mbox{if} & \frac{3}{5} \leq u \\ + \end{array} + \right. + \nonumber +\end{align} + + +\subsubsection{Quintic spline ($M_6$) kernel} + +In 3D, we have $C=\frac{2187}{40\pi}$ and $\gamma=H/h = 2.195775$.\\ +The kernel function $f(u)$ reads: + +\begin{align} + M_6(u) &= \nonumber\\ + &\left\lbrace\begin{array}{rcl} + -10u^5 + 10u^4 - \frac{20}{9}u^2 + \frac{22}{81} & \mbox{if} & u < \frac{1}{3} \\ + 5u^5 - 15u^4 + \frac{50}{3}u^3 - \frac{70}{9}u^2 + \frac{25}{27}u + \frac{17}{81} & \mbox{if} & \frac{1}{3} \leq u < \frac{2}{3}\\ + -1u^5 + 5u^4 - 10u^3 + 10u^2 - 5u + 1. & \mbox{if} & u \geq \frac{2}{3} + \end{array} + \right. + \nonumber +\end{align} + + +\subsubsection{Wendland C2 kernel} + +In 3D, we have $C=\frac{21}{2\pi}$ and $\gamma=H/h = 1.936492$.\\ +The kernel function $f(u)$ reads: + +\begin{align} + \Psi_{i,j}(u) &= 4u^5 - 15u^4 + 20u^3 - 10u^2 + 1. + \nonumber +\end{align} + + +\subsubsection{Wendland C4 kernel} + +In 3D, we have $C=\frac{495}{32\pi}$ and $\gamma=H/h = 2.207940$.\\ +The kernel function $f(u)$ reads: + +\begin{align} + \Psi_{i,j}(u) &= \frac{35}{3}u^8 - 64u^7 + 140u^6 \nonumber\\ + & - \frac{448}{3}u^5 + 70u^4 - \frac{28}{3}u^2 + 1 + \nonumber +\end{align} + + +\subsubsection{Wendland C6 kernel} + +In 3D, we have $C=\frac{1365}{64\pi}$ and $\gamma=H/h = 2.449490$.\\ +The kernel function $f(u)$ reads: + +\begin{align} + \Psi_{i,j}(u) &= 32u^{11} - 231u^{10} + 704u^9 - 1155u^8 \nonumber\\ + & + 1056u^7 - 462u^6 + 66u^4 - 11u^2 + 1 + \nonumber +\end{align} + + +\subsubsection{Summary} + +All kernels available in \swift are shown on Fig.~\ref{fig:sph:kernels}. + +\begin{figure} +\includegraphics[width=\columnwidth]{kernels.pdf} +\caption{The kernel functions available in \swift for a mean + inter-particle separation $\langle x\rangle=1.5$ and a resolution + $\eta=1.2348$. The corresponding kernel support radii $H$ (shown by + arrows) and number of neighours $N_{\rm ngb}$ are indicated on the + figure. A Gaussian kernel with the same smoothing length is shown + for comparison. Note that all these kernels have the \emph{same + resolution} despite having vastly different number of neighbours.} +\label{fig:sph:kernels} +\end{figure} + +\begin{figure} +\includegraphics[width=\columnwidth]{kernel_derivatives.pdf} +\caption{The first and secon derivatives of the kernel functions + available in \swift for a mean inter-particle separation $\langle + x\rangle=1.5$ and a resolution $\eta=1.2348$. A Gaussian kernel + with the same smoothing length is shown for comparison.} +\label{fig:sph:kernel_derivatives} +\end{figure} + + +\section{Kernel Derivatives} + +The derivatives of the kernel function have relatively simple +expressions and are shown on Fig.~\ref{fig:sph:kernel_derivatives}. + +\begin{eqnarray*} + \vec\nabla_x W(\vec{x},h) &=& \frac{1}{h^4}f'\left(\frac{|\vec{x}|}{h}\right) \frac{\vec{x}}{|\vec{x}|} \\ + \frac{\partial W(\vec{x},h)}{\partial h} &=&- \frac{1}{h^4}\left[3f\left(\frac{|\vec{x}|}{h}\right) + +\frac{|\vec{x}|}{h}f'\left(\frac{|\vec{x}|}{h}\right)\right] +\end{eqnarray*} + +Note that for all the kernels listed above, $f'(0) = 0$. + +\end{document} diff --git a/theory/kernel/kernels.py b/theory/kernel/kernels.py index d7bdbe2bf9ba49a30f4c8a2ae136c4843ce5c2cf..184379e5eafbcd12a1a47560ee88e02066da3942 100644 --- a/theory/kernel/kernels.py +++ b/theory/kernel/kernels.py @@ -11,24 +11,24 @@ from matplotlib.font_manager import FontProperties import numpy params = { - 'axes.labelsize': 8, + 'axes.labelsize': 10, 'axes.titlesize': 8, - 'font.size': 8, + 'font.size': 10, 'legend.fontsize': 9, - 'xtick.labelsize': 8, - 'ytick.labelsize': 8, + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, 'xtick.major.pad': 2.5, 'ytick.major.pad': 2.5, 'text.usetex': True, -'figure.figsize' : (3.15,3.15), -'figure.subplot.left' : 0.12, +'figure.figsize' : (4.15,4.15), +'figure.subplot.left' : 0.14, 'figure.subplot.right' : 0.99 , 'figure.subplot.bottom' : 0.08 , 'figure.subplot.top' : 0.99 , 'figure.subplot.wspace' : 0. , 'figure.subplot.hspace' : 0. , 'lines.markersize' : 6, -'lines.linewidth' : 2., +'lines.linewidth' : 1.5, 'text.latex.unicode': True } rcParams.update(params) @@ -36,147 +36,277 @@ rc('font',**{'family':'sans-serif','sans-serif':['Times']}) #Parameters -eta = 1.2349 -h = 2.1 +eta = 1.2348422195325 # Resolution (Gives 48 neighbours for a cubic spline kernel) +dx = 1.5#4 #2.7162 # Mean inter-particle separation #Constants PI = math.pi -#Cubic Spline -cubic_kernel_degree = 3 -cubic_kernel_ivals = 2 -cubic_kernel_gamma = 2. -cubic_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 6.0858 -cubic_kernel_coeffs = array([[3./(4.*PI) , -3./(2.*PI), 0., 1./PI], - [-1./(4.*PI), 3./(2.*PI), -3./PI, 2./PI], - [0., 0., 0., 0.]]) -def cubic_W(x): - if size(x) == 1: - x = array([x]) - ind = (minimum(x, cubic_kernel_ivals)).astype(int) - coeffs = cubic_kernel_coeffs[ind,:] - w = coeffs[:,0] * x + coeffs[:,1] - for k in range(2, cubic_kernel_degree+1): - w = x * w + coeffs[:,k] - return w - - -#Quartic Spline -quartic_kernel_degree = 4 -quartic_kernel_ivals = 3 -quartic_kernel_gamma = 2.5 -quartic_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 8.2293 -quartic_kernel_coeffs = array([[3./(10.*PI) , 0., -3./(4.*PI) , 0. , 23./(32.*PI)], - [-1./(5.*PI) , 1./PI , -3./(2.*PI) ,1./(4.*PI) , 11./(16.*PI)], - [1./(20.*PI) , -1./(2.*PI) , 15./(8.*PI) , -25./(8.*PI), 125./(64.*PI)], - [ 0. , 0., 0., 0., 0.]]) -def quartic_W(x): - if size(x) == 1: - x = array([x]) - ind = (minimum(x+0.5, quartic_kernel_ivals)).astype(int) - coeffs = quartic_kernel_coeffs[ind,:] - w = coeffs[:,0] * x + coeffs[:,1] - for k in range(2, quartic_kernel_degree+1): - w = x * w + coeffs[:,k] - return w - - -# Wendland kernel -wendland2_kernel_degree = 5 -wendland2_kernel_ivals = 1 -wendland2_kernel_gamma = 2 -wendland2_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 7.261825 -wendland2_kernel_coeffs = 3.342253804929802 * array([[1./8, -30./32, 80./32, -80./32., 0., 1.], - [ 0. , 0., 0., 0., 0., 0.]]) / 8. - -print wendland2_kernel_coeffs -def wendland2_W(x): - if size(x) == 1: - x = array([x]) - ind = (minimum(0.5*x, wendland2_kernel_ivals)).astype(int) - coeffs = wendland2_kernel_coeffs[ind,:] - w = coeffs[:,0] * x + coeffs[:,1] - for k in range(2, wendland2_kernel_degree+1): - w = x * w + coeffs[:,k] - return w - -#def wendland2_W(x): -# if size(x) == 1: -# x = array([x]) -# x /= 1.936492 -# x[x>1.] = 1. -# oneminusu = 1.-x -# oneminusu4 = oneminusu * oneminusu * oneminusu * oneminusu -# return 3.342253804929802 * oneminusu4 * (1. + 4.*x) / h**3 - - -#Find H -r = arange(0, 3.5*h, 1./1000.) -xi = r/h -cubic_Ws = cubic_W(xi) -quartic_Ws = quartic_W(xi) -wendland2_Ws = wendland2_W(xi) -for j in range(size(r)): - if cubic_Ws[j] == 0: - cubic_H = r[j] - break -for j in range(size(r)): - if quartic_Ws[j] == 0: - quartic_H = r[j] - break -for j in range(size(r)): - if wendland2_Ws[j] == 0: - wendland2_H = r[j] - break - - -print "H=", cubic_H -print "H=", quartic_H -print "H=", wendland2_H - - -# Compute sigma ----------------------------------------- -cubic_norm = 4.*PI*integrate.quad(lambda x: x**2*cubic_W(x), 0, cubic_H)[0] -quartic_norm = 4.*PI*integrate.quad(lambda x: x**2*quartic_W(x), 0, quartic_H)[0] -wendland2_norm = 4.*PI*integrate.quad(lambda x: x**2*wendland2_W(x), 0, wendland2_H)[0] - -print cubic_norm -print quartic_norm -print wendland2_norm - - -# Plot kernels ------------------------------------------ -r = arange(0, 3.5*h, 1./100.) -xi = r/h - -cubic_Ws = cubic_W(xi) -quartic_Ws = quartic_W(xi) -wendland2_Ws = wendland2_W(xi) - +# Compute expected moothing length +h = eta * dx + +# Get kernel support (Dehnen & Aly 2012, table 1) for 3D kernels +H_cubic = 1.825742 * h +H_quartic = 2.018932 * h +H_quintic = 2.195775 * h +H_WendlandC2 = 1.936492 * h +H_WendlandC4 = 2.207940 * h +H_WendlandC6 = 2.449490 * h + +# Get the number of neighbours within kernel support: +N_H_cubic = 4./3. * PI * H_cubic**3 / (dx)**3 +N_H_quartic = 4./3. * PI * H_quartic**3 / (dx)**3 +N_H_quintic = 4./3. * PI * H_quintic**3 / (dx)**3 +N_H_WendlandC2 = 4./3. * PI * H_WendlandC2**3 / (dx)**3 +N_H_WendlandC4 = 4./3. * PI * H_WendlandC4**3 / (dx)**3 +N_H_WendlandC6 = 4./3. * PI * H_WendlandC6**3 / (dx)**3 + + +print "Smoothing length: h =", h, "Cubic spline kernel support size: H =", H_cubic, "Number of neighbours N_H =", N_H_cubic +print "Smoothing length: h =", h, "Quartic spline kernel support size: H =", H_quartic, "Number of neighbours N_H =", N_H_quartic +print "Smoothing length: h =", h, "Quintic spline kernel support size: H =", H_quintic, "Number of neighbours N_H =", N_H_quintic +print "Smoothing length: h =", h, "Wendland C2 kernel support size: H =", H_WendlandC2, "Number of neighbours N_H =", N_H_WendlandC2 +print "Smoothing length: h =", h, "Wendland C4 kernel support size: H =", H_WendlandC4, "Number of neighbours N_H =", N_H_WendlandC4 +print "Smoothing length: h =", h, "Wendland C6 kernel support size: H =", H_WendlandC6, "Number of neighbours N_H =", N_H_WendlandC6 + +# Get kernel constants (Dehen & Aly 2012, table 1) for 3D kernel +C_cubic = 16. / PI +C_quartic = 5**6 / (512 * PI) +C_quintic = 3**7 / (40 * PI) +C_WendlandC2 = 21. / (2 * PI) +C_WendlandC4 = 495. / (32 * PI) +C_WendlandC6 = 1365. / (64 * PI) + +# Get the reduced kernel definitions (Dehen & Aly 2012, table 1) for 3D kernel +#def plus(u) : return maximum(0., u) +def cubic_spline(r): return where(r > 1., 0., where(r < 0.5, + 3.*r**3 - 3.*r**2 + 0.5, + -r**3 + 3.*r**2 - 3.*r + 1.) ) + +#return plus(1. - r)**3 - 4.*plus(1./2. - r)**3 +def quartic_spline(r): return where(r > 1., 0., where(r < 0.2, + 6.*r**4 - 2.4*r**2 + 46./125., + where(r < 0.6, + -4.*r**4 + 8.*r**3 - (24./5.)*r**2 + (8./25.)*r + 44./125., + 1.*r**4 - 4.*r**3 + 6.*r**2 - 4.*r + 1.))) + +#return plus(1. - r)**4 - 5.*plus(3./5. - r)**4 + 10.*plus(1./5. - r)**4 +def quintic_spline(r): return where(r > 1., 0., where(r < 1./3., + -10.*r**5 + 10.*r**4 - (20./9.)*r**2 + (22./81.), + where(r < 2./3., + 5.*r**5 - 15.*r**4 + (50./3.)*r**3 - (70./9.)*r**2 + (25./27.)*r + (17./81.), + -1.*r**5 + 5.*r**4 - 10.*r**3 + 10.*r**2 - 5.*r + 1.))) + +#return plus(1. - r)**5 - 6.*plus(2./3. - r)**5 + 15.*plus(1./3. - r)**5 +def wendlandC2(r): return where(r > 1., 0., 4.*r**5 - 15.*r**4 + 20.*r**3 - 10*r**2 + 1.) +def wendlandC4(r): return where(r > 1., 0., (35./3.)*r**8 - 64.*r**7 + 140.*r**6 - (448./3.)*r**5 + 70.*r**4 - (28. /3.)*r**2 + 1.) +def wendlandC6(r): return where(r > 1., 0., 32.*r**11 - 231.*r**10 + 704.*r**9 - 1155.*r**8 + 1056.*r**7 - 462.*r**6 + 66.*r**4 - 11.*r**2 + 1.) +def Gaussian(r,h): return (1./(0.5*pi*h**2)**(3./2.)) * exp(- 2.*r**2 / (h**2)) + + +# Kernel definitions (3D) +def W_cubic_spline(r): return C_cubic * cubic_spline(r / H_cubic) / H_cubic**3 +def W_quartic_spline(r): return C_quartic * quartic_spline(r / H_quartic) / H_quartic**3 +def W_quintic_spline(r): return C_quintic * quintic_spline(r / H_quintic) / H_quintic**3 +def W_WendlandC2(r): return C_WendlandC2 * wendlandC2(r / H_WendlandC2) / H_WendlandC2**3 +def W_WendlandC4(r): return C_WendlandC4 * wendlandC4(r / H_WendlandC4) / H_WendlandC4**3 +def W_WendlandC6(r): return C_WendlandC6 * wendlandC6(r / H_WendlandC6) / H_WendlandC6**3 + +# PLOT STUFF +figure() +subplot(211) +xx = linspace(0., 5*h, 1000) +maxY = 1.2*Gaussian(0, h) + +# Plot the kernels +plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm %14s\\quad H=\\infty}$"%("Gaussian~~~~~~")) +plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm %14s\\quad H=%4.3f}$"%("Cubic~spline~~", H_cubic)) +plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm %14s\\quad H=%4.3f}$"%("Quartic~spline", H_quartic)) +plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm %14s\\quad H=%4.3f}$"%("Quintic~spline", H_quintic)) +plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C2~", H_WendlandC2)) +plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C4~", H_WendlandC4)) +plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm %14s\\quad H=%4.3f}$"%("Wendland~C6~", H_WendlandC6)) + +# Indicate the position of H +arrow(H_cubic, 0.12*maxY , 0., -0.12*maxY*0.9, fc='b', ec='b', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3) +arrow(H_quartic, 0.12*maxY , 0., -0.12*maxY*0.9, fc='c', ec='c', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3) +arrow(H_quintic, 0.12*maxY , 0., -0.12*maxY*0.9, fc='g', ec='g', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3) +arrow(H_WendlandC2, 0.12*maxY , 0., -0.12*maxY*0.9, fc='r', ec='r', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3) +arrow(H_WendlandC4, 0.12*maxY , 0., -0.12*maxY*0.9, fc='m', ec='m', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3) +arrow(H_WendlandC6, 0.12*maxY , 0., -0.12*maxY*0.9, fc='y', ec='y', length_includes_head=True, head_width=0.03, head_length=0.12*maxY*0.3) + +# Show h +plot([h, h], [0., maxY], 'k:', linewidth=0.5) +text(h, maxY*0.35, "$h\\equiv\\eta\\langle x\\rangle = %.4f$"%h, rotation=90, backgroundcolor='w', ha='center', va='bottom') + +# Show <x> +plot([dx, dx], [0., maxY], 'k:', linewidth=0.5) +text(dx, maxY*0.35, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, backgroundcolor='w', ha='center', va='bottom') + +xlim(0., 2.5*h) +ylim(0., maxY) +gca().xaxis.set_ticklabels([]) +ylabel("$W(r,h)$", labelpad=1.5) +legend(loc="upper right", handlelength=1.2, handletextpad=0.2) + + +# Same but now in log space +subplot(212, yscale="log") +plot(xx, Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$") +plot(xx, W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$") +plot(xx, W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$") +plot(xx, W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$") +plot(xx, W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$") +plot(xx, W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$") +plot(xx, W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$") + +# Show h +plot([h, h], [0., 1.], 'k:', linewidth=0.5) + +# Show <x> +plot([dx, dx], [0., 1.], 'k:', linewidth=0.5) + + +# Show plot properties +text(h/5., 1e-3, "$\\langle x \\rangle = %3.1f$"%(dx), va="top", backgroundcolor='w') +text(h/5.+0.06, 3e-4, "$\\eta = %5.4f$"%(eta), va="top", backgroundcolor='w') + +# Show number of neighbours +text(1.9*h, 2e-1/2.9**0, "$N_{\\rm ngb}=\\infty$", fontsize=10) +text(1.9*h, 2e-1/2.9**1, "$N_{\\rm ngb}=%3.1f$"%(N_H_cubic), color='b', fontsize=9) +text(1.9*h, 2e-1/2.9**2, "$N_{\\rm ngb}=%3.1f$"%(N_H_quartic), color='c', fontsize=9) +text(1.9*h, 2e-1/2.9**3, "$N_{\\rm ngb}=%3.1f$"%(N_H_quintic), color='g', fontsize=9) +text(1.9*h, 2e-1/2.9**4, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC2), color='r', fontsize=9) +text(1.9*h, 2e-1/2.9**5, "$N_{\\rm ngb}=%3.1f$"%(N_H_WendlandC4), color='m', fontsize=9) +text(1.9*h, 2e-1/2.9**6, "$N_{\\rm ngb}=%3.0f$"%(N_H_WendlandC6), color='y', fontsize=9) + +xlim(0., 2.5*h) +ylim(1e-5, 0.7) +xlabel("$r$", labelpad=0) +ylabel("$W(r,h)$", labelpad=0.5) + +savefig("kernels.pdf") + + + + +################################ +# Now, let's work on derivatives +################################ + +# Get the derivative of the reduced kernel definitions for 3D kernels +def d_cubic_spline(r): return where(r > 1., 0., where(r < 0.5, + 9.*r**2 - 6.*r, + -3.*r**2 + 6.*r - 3.) ) + +def d_quartic_spline(r): return where(r > 1., 0., where(r < 0.2, + 24.*r**3 - 4.8*r, + where(r < 0.6, + -16.*r**3 + 24.*r**2 - (48./5.)*r + (8./25.), + 4.*r**3 - 12.*r**2 + 12.*r - 4.))) + +def d_quintic_spline(r): return where(r > 1., 0., where(r < 1./3., + -50.*r**4 + 40.*r**3 - (40./9.)*r, + where(r < 2./3., + 25.*r**4 - 60.*r**3 + 50.*r**2 - (140./9.)*r + (25./27.), + -5.*r**4 + 20.*r**3 - 30.*r**2 + 20.*r - 5.))) + +def d_wendlandC2(r): return where(r > 1., 0., 20.*r**4 - 60.*r**3 + 60.*r**2 - 20.*r) +def d_wendlandC4(r): return where(r > 1., 0., 93.3333*r**7 - 448.*r**6 + 840.*r**5 - 746.667*r**4 + 280.*r**3 - 18.6667*r) +def d_wendlandC6(r): return where(r > 1., 0., 352.*r**10 - 2310.*r**9 + 6336.*r**8 - 9240.*r**7 + 7392.*r**6 - 2772.*r**5 + 264.*r**3 - 22.*r) +def d_Gaussian(r,h): return (-8.*sqrt(2.)/(PI**(3./2.) * h**5)) * r * exp(- 2.*r**2 / (h**2)) + +# Get the second derivative of the reduced kernel definitions for 3D kernels +def d2_cubic_spline(r): return where(r > 1., 0., where(r < 0.5, + 18.*r - 6., + -6.*r + 6.) ) + +def d2_quartic_spline(r): return where(r > 1., 0., where(r < 0.2, + 72.*r**2 - 4.8, + where(r < 0.6, + -48.*r**2 + 48.*r - (48./5.), + 12.*r**2 - 24.*r + 12.))) + +def d2_quintic_spline(r): return where(r > 1., 0., where(r < 1./3., + -200.*r**3 + 120.*r**2 - (40./9.), + where(r < 2./3., + 100.*r**3 - 180.*r**2 + 100.*r - (140./9.), + -20.*r**3 + 60.*r**2 - 60.*r + 20))) +def d2_wendlandC2(r): return where(r > 1., 0., 80.*r**3 - 180.*r**2 + 120.*r - 20.) +def d2_wendlandC4(r): return where(r > 1., 0., 653.3333*r**6 - 2688.*r**5 + 4200.*r**4 - 2986.667*r**3 + 840.*r**2 - 18.6667) +def d2_wendlandC6(r): return where(r > 1., 0., 3520.*r**9 - 20790.*r**8 + 50688.*r**7 - 64680.*r**6 + 44352.*r**5 - 13860.*r**4 + 792.*r**2 - 22) +def d2_Gaussian(r,h): return (32*sqrt(2)/(PI**(3./2.)*h**7)) * r**2 * exp(-2.*r**2 / (h**2)) - 8.*sqrt(2.)/(PI**(3./2.) * h**5) * exp(- 2.*r**2 / (h**2)) + + +# Derivative of kernel definitions (3D) +def dW_cubic_spline(r): return C_cubic * d_cubic_spline(r / H_cubic) / H_cubic**4 +def dW_quartic_spline(r): return C_quartic * d_quartic_spline(r / H_quartic) / H_quartic**4 +def dW_quintic_spline(r): return C_quintic * d_quintic_spline(r / H_quintic) / H_quintic**4 +def dW_WendlandC2(r): return C_WendlandC2 * d_wendlandC2(r / H_WendlandC2) / H_WendlandC2**4 +def dW_WendlandC4(r): return C_WendlandC4 * d_wendlandC4(r / H_WendlandC4) / H_WendlandC4**4 +def dW_WendlandC6(r): return C_WendlandC6 * d_wendlandC6(r / H_WendlandC6) / H_WendlandC6**4 + +# Second derivative of kernel definitions (3D) +def d2W_cubic_spline(r): return C_cubic * d2_cubic_spline(r / H_cubic) / H_cubic**5 +def d2W_quartic_spline(r): return C_quartic * d2_quartic_spline(r / H_quartic) / H_quartic**5 +def d2W_quintic_spline(r): return C_quintic * d2_quintic_spline(r / H_quintic) / H_quintic**5 +def d2W_WendlandC2(r): return C_WendlandC2 * d2_wendlandC2(r / H_WendlandC2) / H_WendlandC2**5 +def d2W_WendlandC4(r): return C_WendlandC4 * d2_wendlandC4(r / H_WendlandC4) / H_WendlandC4**5 +def d2W_WendlandC6(r): return C_WendlandC6 * d2_wendlandC6(r / H_WendlandC6) / H_WendlandC6**5 figure() +subplot(211) + +plot([0, 2.5*h], [0., 0.], 'k--', linewidth=0.7) +plot(xx, d_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$") +plot(xx, dW_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$") +plot(xx, dW_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$") +plot(xx, dW_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$") +plot(xx, dW_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$") +plot(xx, dW_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$") +plot(xx, dW_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$") + +maxY = d_Gaussian(h/2, h) + +# Show h +plot([h, h], [2*maxY, 0.1], 'k:', linewidth=0.5) + +# Show <x> +plot([dx, dx], [2*maxY, 0.1], 'k:', linewidth=0.5) + + +xlim(0., 2.5*h) +gca().xaxis.set_ticklabels([]) +ylim(1.2*maxY, -0.1*maxY) +xlabel("$r$", labelpad=0) +ylabel("$\\partial W(r,h)/\\partial r$", labelpad=0.5) +legend(loc="lower right") -text(h-0.1, cubic_Ws[0]/20., "h", ha="right",va="center") -arrow(h, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='k', ec='k', length_includes_head=True, head_length=cubic_Ws[0]/30., head_width=0.1) -plot(r,cubic_Ws, 'b-' ,label="Cubic") -plot(r, quartic_Ws, 'r-', label="Quartic") -plot(r, wendland2_Ws, 'g-', label="Wendland C2") +subplot(212) -text(cubic_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='b') -arrow(cubic_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='b', ec='b', length_includes_head=True, head_length=cubic_Ws[0]/30., head_width=0.1) +maxY = d2_Gaussian(h,h) +plot([h, h], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5) +text(h, -3.*maxY, "$h\\equiv\\eta\\langle x\\rangle = %.4f$"%h, rotation=90, backgroundcolor='w', ha='center', va='bottom') -text(quartic_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='r') -arrow(quartic_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='r', ec='r', length_includes_head=True, head_length=quartic_Ws[0]/30., head_width=0.1) +plot([dx, dx], [-4*maxY, 1.4*maxY], 'k:', linewidth=0.5) +text(dx, -3.*maxY, "$\\langle x\\rangle = %.1f$"%dx, rotation=90, backgroundcolor='w', ha='center', va='bottom') -text(wendland2_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='r') -arrow(wendland2_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='g', ec='g', length_includes_head=True, head_length=wendland2_Ws[0]/30., head_width=0.1) +plot([0, 2.5*h], [0., 0.], 'k--', linewidth=0.7) +plot(xx, d2_Gaussian(xx, h), 'k-', linewidth=0.7, label="${\\rm Gaussian}$") +plot(xx, d2W_cubic_spline(xx), 'b-', label="${\\rm Cubic~spline}$") +plot(xx, d2W_quartic_spline(xx), 'c-', label="${\\rm Quartic~spline}$") +plot(xx, d2W_quintic_spline(xx), 'g-', label="${\\rm Quintic~spline}$") +plot(xx, d2W_WendlandC2(xx), 'r-', label="${\\rm Wendland~C2}$") +plot(xx, d2W_WendlandC4(xx), 'm-', label="${\\rm Wendland~C4}$") +plot(xx, d2W_WendlandC6(xx), 'y-', label="${\\rm Wendland~C6}$") +xlim(0., 2.5*h) +ylim(-3.2*maxY, 1.4*maxY) +xlabel("$r$", labelpad=0) +ylabel("$\\partial^2 W(r,h)/\\partial r^2$", labelpad=0.5) -xlabel("r", labelpad=0) -ylabel("W(r,h)", labelpad=0) -legend(loc="upper right") -savefig("kernel.pdf") +savefig("kernel_derivatives.pdf") diff --git a/theory/kernel/spline_3.nb b/theory/kernel/spline_3.nb deleted file mode 100644 index d59c7f43846fd6217c2b98e193e410f6b6268cc5..0000000000000000000000000000000000000000 --- a/theory/kernel/spline_3.nb +++ /dev/null @@ -1,871 +0,0 @@ -(* Content-type: application/vnd.wolfram.mathematica *) - -(*** Wolfram Notebook File ***) -(* http://www.wolfram.com/nb *) - -(* CreatedBy='Mathematica 8.0' *) - -(*CacheID: 234*) -(* Internal cache information: -NotebookFileLineBreakTest -NotebookFileLineBreakTest -NotebookDataPosition[ 157, 7] -NotebookDataLength[ 36970, 862] -NotebookOptionsPosition[ 35595, 809] -NotebookOutlinePosition[ 35934, 824] -CellTagsIndexPosition[ 35891, 821] -WindowFrame->Normal*) - -(* Beginning of Notebook Content *) -Notebook[{ - -Cell[CellGroupData[{ -Cell[BoxData[ - RowBox[{"\[IndentingNewLine]", - RowBox[{ - RowBox[{ - RowBox[{"f", "[", "q_", "]"}], ":=", - RowBox[{ - RowBox[{"1", "/", "Pi"}], "*", - RowBox[{"If", "[", - RowBox[{ - RowBox[{"q", ">", "2"}], ",", "0", ",", - RowBox[{"If", "[", - RowBox[{ - RowBox[{"q", ">", "1"}], ",", - RowBox[{ - RowBox[{"1", "/", "4"}], "*", - RowBox[{ - RowBox[{"(", - RowBox[{"2", "-", "q"}], ")"}], "^", "3"}]}], ",", - RowBox[{ - RowBox[{ - RowBox[{"1", "/", "4"}], "*", - RowBox[{ - RowBox[{"(", - RowBox[{"2", "-", "q"}], ")"}], "^", "3"}]}], "-", - RowBox[{ - RowBox[{"(", - RowBox[{"1", "-", "q"}], ")"}], "^", "3"}]}]}], "]"}]}], - "]"}]}]}], "\[IndentingNewLine]", - RowBox[{ - RowBox[{"W", "[", - RowBox[{"r_", ",", "h_"}], "]"}], "=", - RowBox[{ - RowBox[{"1", "/", - RowBox[{"h", "^", "3"}]}], " ", "*", - RowBox[{"f", "[", - RowBox[{"r", "/", "h"}], "]"}]}]}]}]}]], "Input", - CellChangeTimes->{{3.560154174311659*^9, 3.5601543108245993`*^9}}], - -Cell[BoxData[ - FractionBox[ - RowBox[{"If", "[", - RowBox[{ - RowBox[{ - FractionBox["r", "h"], ">", "2"}], ",", "0", ",", - RowBox[{"If", "[", - RowBox[{ - RowBox[{ - FractionBox["r", "h"], ">", "1"}], ",", - RowBox[{ - FractionBox["1", "4"], " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", - FractionBox["r", "h"]}], ")"}], "3"]}], ",", - RowBox[{ - RowBox[{ - FractionBox["1", "4"], " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", - FractionBox["r", "h"]}], ")"}], "3"]}], "-", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"1", "-", - FractionBox["r", "h"]}], ")"}], "3"]}]}], "]"}]}], "]"}], - RowBox[{ - SuperscriptBox["h", "3"], " ", "\[Pi]"}]]], "Output", - CellChangeTimes->{{3.560154211258333*^9, 3.560154216293594*^9}, { - 3.560154312540955*^9, 3.560154319804675*^9}}] -}, Open ]], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"Plot", "[", - RowBox[{ - RowBox[{"W", "[", - RowBox[{"r", ",", "1"}], "]"}], ",", - RowBox[{"{", - RowBox[{"r", ",", "0", ",", "2.5"}], "}"}]}], "]"}]], "Input", - CellChangeTimes->{{3.560154325145775*^9, 3.560154343883732*^9}, { - 3.560154674704236*^9, 3.56015467532159*^9}}], - -Cell[BoxData[ - GraphicsBox[{{}, {}, - {Hue[0.67, 0.6, 0.6], LineBox[CompressedData[" -1:eJxF1nkwVW/8B3BLqWyFFspW2UopWZLqvLWoLGVLi2RfE0pJlvqKQvaQpULZ -okUIlSi7EMW9IS3Knsi+XPe653d/M7/53fPPmdc8M+d5f+Z5PzNnvY27sT0X -BweHPScHx/++k6/b8CjbuRCuH+pdt1tSUU2R1j8prYN1M1e0trFM1r4LlJY+ -jXKzZW8UWRaMKc7j7bBB/sUdHbIsJ5dHSC2Wdsb24vQUCZa3jNpFko7u8EsX -FBBh+YdgCpPR7oGRQhkZHpZlspapz9M9UWWf0DtjQcX5vZ6us1LeOKwsatDH -8kvqr4ypA354F3PKroXleRf97+OO/+HqYl7Ztyw/5Px+e1b7Bl4t+nknjeWD -TXdkZ9oDUMkX9DSY5cHEwxVTzjfx5+5Lj3Msh9stmE/Sb8FVUnRQl2UK3Slu -TOo2Ctbo/1rE8pU6ye2j+aFY9t91u59nqVgbS20cORCO7y3Pk4pYttmsxf3X -MQoy8vViZ1nmovt8mvKLhoK0qMNWlqm/TCWmtO/g8/gmB7o5FVef8b2ZaI+B -OL/5zWiWdWP6eSYexuK4Xlf6SZbFr1YcH3eOg7HhX29xlssPeI2N0u/ieaKt -fuoZKpZ865YfkUpCVwqRctuMio7yMs/hwSQIhFy8RbD8JCux6m/+PWgIvVEY -P03FMY9jlkMHHqBqTC/LgOX4ZSXxA46pcLAaih4/SUWhl67HJdGHiKxRPB/H -8oOhnWKTfg9BZggsU2PZ7bOw44T2I3x+fzXU/QQVwg8+cI23p4Hvt+n7+uNU -0AWLcy7sScfBuEVvzVjuvZFuOPYwHVwPuq79MaGi2PF6yqhzBrbE/zrDwbKZ -iqrmP3omkO8VLmJExaOG1It/pXJgaX3ozGd9KqYSizZ3hedgHZ9lsTLLhxwb -e1ppOVAxWRIQrUfFMPesaQnlCUKORbkd0qVCY+8xzdvBz1DSrVEUe5iKzy8Y -XPKjL5CwtPVd4T5WXiyzEFDKw0vfvvARLSp2y8VpBtvl4UXTk68bWY6cypn0 -acmDdQplIoSgQvXOF3ubp/kI7jm9R203Ff81bNFVtnyJrTdFJ1arUpGd/0ru -6d2XYH7QXaWgQkVr4n5u2Y+s9a1GNeo7WP11PFUquqsQ1B6bT/rbqWjgvqnE -IVIEKbppxpktVKze+034U20xmKfM1iyWoSL3xe1v55VKQOHrMIwQomJIeChc -17oE2985ljusoEL+ii6hEFeC6+srZYnlrH7v4XvUQyuB/9lX1H5+KmLqwx3M -at7ixdnadqmlrL72RI0fNi9DqXC4iDKTglS+xkAh7XJcHrU592WQAmKvr7m/ -RTmkg+dm3QYo+O6mqDbqVY5vvWdiePopEG0N6//4pBxbTc+Pbeuh4E6ivk7I -igq8eNtm7/KDgkDZZkHyWwW2qQ603WqhwBEtScMeVVh1Kb3z6GsKONJmSr6F -VUF2Ii6zoJiCxEXi3xsyqpDuRslaVUTBh3oHyZy2KpAHR4615VOgcJyRZq9Z -DfJJxJH9TykYdJJ/9pOzBlI+et41yRSci7n27nN0LTRW8Uddu0GBW9+mnsLc -euyuULjeeYCCXxZfl1bU14PvnDBP1j4KTDpClJp661HoG5DlDgo0Gge8+9Y2 -wCrKRXVBkwLu/MwVq4Mb0Hkh6QHnDlY+v/WEl2UjMn9vtMqToqBCRCxp14om -pGSOVzyltYJ3/U35Lo3P4FpuY6Wb3YrIFY2bH1lRIGfo5PeQ2QKbb8ffc39s -A6+L1pTTthZcrLlp4y7fCd3FtWGvDD4j/wK/iajOD3zvaFEe72rGI1+Xoe0n -f2GUNjNr/OsjAlIZRlPDv3F171fnqdEGcFB+aZX69KBgmmuF88IHyF12uHp8 -cx84pd7sdPpai97phBN8Zf2YLspPMPpaDd2+c9diLg5iv78ZbWlhJY71RitK -Sg5hTajZO7PMcgxV2fh/eP4XFrF1a5/Hl0FJqE3igOUI+N4r83dVvwEvpq2/ -T/5DX/vFigD3YhweMZvdYDqGGef27YXSL9FRacdptmUcG3hEZDnU81AUUm0h -PT2OGErPw+NhT+Gw2PNXc/MEGuqUHtq0ZcHWw7eI/94k5lYVBkhMp2Gbo2Kz -vvUUBtLyt9xXT8Un4S18nrumodSuoqTSm4i3uQH3t3DPQKnzmPOy1ljw6Juu -TeCZgWAkL29JWSyM/sgncfLOYOVKu6irObEY2NgU37ZiBs90BlWlbsRCJGlN -jL/kDJhadHth5Vi4BD4Loe6agW2Uj3p4bAzWnWrz9Lswg92yp/Y/s7kDX87N -hh9/zOA8b/EGeaNI9B/k65D7PQOevS3azdsiYRgybHmjdwZ+r2h9EYKRkFn+ -wl397wxGh1y9T3yMwEdx1aiHczPYI1E1xqcTAUkNovmyyCyWaEppcRwOR6Wb -sb6Eziz+5sSc/nQ8FLzffY64Fc4iiH/ZzuXFQVD3yhGIejWL2Uu7ch0fBMFa -uKP1Rcks6Ku9r1MCgvD6iNrZ8fJZ+K0d5u8zCIJD0ejFy02s79nyrRoduoWK -SLv7Pv2zaLS94ZoocwtX9x37FyQ6Bxvln2ICWYHoy1ofl+o3h6eUSdW+Ln8M -3/xN4/afg5aCuPbKen9M2DyydAqcw7P3d0dPFviDQ2q9onLoHMrC1P+J3/LH -2gTpysqEOXC6HIh4oOgPw2Cpsb6COWx4an2COncdpY4S+lv+zIGRL3GSftoP -Vdo/8qOH52Cxv2b0jLIfGjYmr5kenQNX1jOnnqV+aO8S7ymbmUNLdG6ew2tf -TJwU9zFYRMOBxOyoPWK+UDiyLttDigaZyErNVd3eiFMQW/TGlIYGodlPs1Fe -MLeQe9xzigbuoshIDzcvyMSp6Aqa07B+oldO4KgXCsmj0bY2NIybbsp+xOsF -aluAuKA7DaLB8QcPBV+ByK1hVdsQGs7lKDy6EOSJmN/v7QXe0nC/1Lyh6/4l -mK1pWqrxjoY2SXtrjeuXsOFo51ObChpWzvOP5FpdQsHrqfFXdTT470zInJa9 -hNaoTddtqDQY3Vk7OZDvASEiNv7VCA1vOrlKjT9dRPQ9hzpr6Xmsjw3tb5O6 -AK8rAZf8Ns7j/Podj0x4LsDCOEUqQW4ecl9mstq+umMLb5vXxy3zaOTNbGy7 -7I56b22FnbvmEXTb3Pb0czdwnZa5zW88jw6Df03HFFzhuaZb91XgPEq/GCbv -0D4H88mFmZageVz/4ZJeIHoOBz+JpQ/fnsfmzvc89sPOEA42oq+PnoezTYuH -VZwzcmfKn4Y/mIf7tZby3H4nDH5J5bcpmofeKYZadJwjzOLONvMPzOO3V15M -ykp7iL/kXecyNI9gJwbf2JAdulpeOdaPzOOx8e77VyvsYLdcmDNoijVfW4Kz -krsd3EJrVUhOOkzGJusUmmwR4L8taVycjlVCvg80Ym2Qc57Trs2YjswNnUo1 -h6xwPiw3T/UEHbTnBvkFG62g9OTMQsxpOnbq9r9p5bBCwUBRvIEVHeFjc/Em -XpYotTlX/8GVjrmFP72Wjhb4fIqi9DaEtV9RQcXcaXPMHcykpb6jw695aeyW -4FPwe5saWF1BR6i2cDjD8BQ4dtwT+FNNh+zJsUjxdaewRCpq/Y5GOv6GLKvR -yzuJ1bSrOtXtdNj9bLhi9fMEVJ7rJw2O0XFUyFJom44p3FZNaShvZCAo3ndT -5UljTIT9qzSVY8CwRfOGtKIxPLn+6PtsYiAy+YN3HdMIfqM/rKq2McC3aZvV -vywjhNXXhZjuYWBH9f6gpHlDZF+73+FtykBb/Ms8GwsD9PTtv1oZzICq5pBa -dIIu9qc2p20OY+Cqos7fYB1dPDxl1hQTyYDAWt6fogwdWDRe3GB7l4Hz1ACp -RbY66Mx7+JE7nYFsHp6I0Z1H0OrLlNZ+x0Dow8WDdxe0oawWqve8goEEQzKz -87U2ov+turKqhoG8uJTtLy5r45j11sa+RgZK3qUYLP53EA2HzD2DvjIg0X1m -m+nAAVQKldTXTTFwJ9NXj5zch/WN2lPb5hj474YOz/yrffC/2SKZSGdAL4RD -wtdvH4jZwUtOXAtQYsxzCi7Zh5LvaySXrVjAqpp2GYcNWih47Omhq7iAF2q1 -h19170HaXuV1TdYLyE7PdL7coY7KY7yBdLsFKE/Qv0bfUke3Zc/QJqcFxPLm -l15RUcfGgLslQW4LeD5uxXn/jhoyammntXwXsGsp7eHmk6rINKhMfBm3ALWG -VVxRPDtQa3WfozthAe3rlY9+eq+M/ouXnVbcX8C6x4ZP672VIRcnp+H6iJU3 -tubD4Oh2ZHWEtsvlLoB8b3jApX4bsq1NVt+rWwCXf4mCfZciPnhsuf6hYQH6 -r+Vv73NSxGDg4v6ZpgWIiAwaXprYDIWsV0Um1AWM/y50+MS7GTlD60wFfi/g -fUXFDxU9BTy51BvrP7+AxR7vw8ilsmi4WUbPXVhAZs7yRr0sGQzdjbf9wcHE -psaFguaDMlB8fURFcwkT96/xNF24tRHPGM9aJ0WYKLzGbz62cgOe3/IUctzK -BEcG95h5mySG95wNuLWdieK8g+/Fd0liy9TByXQVJtQLFN/MJEvgic3KL792 -MTHy/FrekKs4svEy0ewQEwYa2w+5yq3F4My9Zd46TMSmpHInJYlBPjfAJ0Gf -idYtXycqBMWQJW5sTjVmgvy6Z4jBXIMM2pjUMUsm0jfVfMmfWYXevI7o8zZM -HPW/WWLtvQobnco5w+yZqFHe0TG+sBJpbVE9dS5MdJcN7cvgX4nUl0qP911l -ImTrJ8dh1t/7PdfzSmp3mBCeKAotmxZAp8zxVJM4JoYH9x55ryIAse+7V3gk -MJFipRIn4MmPRF2+idxkJtwXhf304OTDXfknRQpPmCgNGHK7q7UU1J935A4/ -Z+JG97eW7YlLsDLeO8E+jwlThcCWnxM8iFmk451WzAT/vTW5pvmLEf17YI94 -FRN+XqaCvUbc+Jz46blmLROCrpMhOhVcWG74SvJ0PRNluVG5wSpciHwXxBH/ -iYmYIwOcF6U5EX5ftnr5dyZ2eErPnFZnEh+NBdSUupig9+8mxfYtEHy805n6 -3UwcKO31WWPEIEKvVgffHmSielquI9p/ngg2tdXjnmadr6tT5MC6WSLN4Lu5 -/RwTkRGOfzv3zhClOqZudXQmsgN2f3SznyYm9h6+E8ZFYvHNHXJKNZOEuZxi -u8gKEprCQncX14wRXtIZg54iJKJ5XM7NCo0RMWsl5ttXkzjDdaXkb90/ok5w -ucQDCRLSB9cZJm8dJnbMTtjIKJLo8rU9RXUZII6Ou1wOUiLxa/2viCvG/YTT -395bg8okGOphyrz7+4jkrrbsZxokBH4J/Li4u4d4/dWgRHAPidP8xeP0g91E -K+VD4wWQ0Pu6NbLnxG9i6YeSf6qHWHl5z4y5yXURGytVOBJ0SNjR+DJf+vwg -iNJnQjR9EvzhKQ/Cfn4jLuWlqJaZkFBlPgluonQQPxMDfLStSVjUKBcGnKcQ -tBh62GM7Ek3+5f26aq3EyojLycucSCRnFFWe4GshdG44lDe5kTi/QvH4xtIm -ws63q2WbB4ne8f0ppwsbieuep3rueJIod372qK66nih01uU54Uei7+ecJl2x -lvhkW7Xm9X8kjkJyUdKNamLo7J5NawNJpM+LjUiOVBJSxkr6P2+TiDwsdiuW -u5zQ1H98ViuCROCVd82FZmXE8UPS7mnRJHJSOB5e+lJChGoKxzgkkMjYma/W -t7uYyFQNS/9wj4SJW4bVEdlColxpUdHmFBJq7ift9yoXEJ0K12rDH5E4mczv -9cAij5jeMNP+L4OEfSF/jO+p58QKCfc/htms/EKCEpPKT4jNawbnC56SeCKa -Nm1w+DFhxdcp4VVAYo/K3U2+4mmEz2KTbV+LSKhn0tOPv0gl7pKNWrvfkCg2 -F63mCLtP5NEOGieXkggVDfU/U5dANEyW2ZLvSXyxNcuqsYwj+kbUPa2rWPvV -xnA9NLhDcAy+CKqqJbE0ybs6XiaCWNutkCjbwOoHWSVjdCiEUPv+KCe4iUQr -ZU35dnogsULJae7FZxKyysJHZj39ieH/lA53UEj4JkuN51f6EB9apu5ytJNY -tvvx4gwXTyJj49tehU4SDmv3bS9rv0D4e95QMfpBQki9P+vvqDNhXnc4wPsX -iUqNoPRwH1tCQ0yw5VEPifDZb+oPbMyJlS5UqYZ+Ekv8jB/vGzYhxkrvuU38 -YZ0PTVC77aku8VHQumztCKtvozL2U7VaRLaVPP+BMRIR2YpB+2xViJsFI2Yu -kyTWxe7/6N28kbBaVJgTO8O6X7836N7MXEHsOeEz95bG6nM5c/eB2Mm9otla -h3sZJLiHXIV3SVbtnffj1uPh4NDi+L9nMNPB4x4n223N9UlK3GwXSEcPnuBh -+1y1eNBjfra/8qlXHBFju1Y1afD7WrYLzzKWXxRnO/pFpUWSFNtHTAwYf2TZ -fn3PaWfYDrYTN91/3qTH9tTkoczAo2wbvJt4sMuAbR4T3fBMY7Y9/Wjnrp1m -2/jTSYWtjmzze65MDw9g+7/yiES9N2wfXWea2SDD+f/e51Z3Rug226niNjIf -p9mWqXocf+wMF3verZWbW9+x7TC5qKV+M/f/2yZ/Qvp2FNvnA0wYunfY9jQp -bOePZTtk2jMyOp7t3F00ekIy27QKsi3rKdvRrfwR1XVs30t3dQqqZzv9cvOB -I41sF6+Onm9sZvu7mYgT5Qvb8j1iB7p72N5e6COZ0ce25q1vNPsBtvXlk/P+ -DLHt4SItOT7Ott+eG7SCSbZvCXRTL0+znfgiPWxuju00/0WOJfNsPzWy3+/H -YLtwQ60EwWT73aQcjSTZ/h8gYzo2 - "]]}}, - AspectRatio->NCache[GoldenRatio^(-1), 0.6180339887498948], - Axes->True, - AxesOrigin->{0, 0}, - PlotRange->{{0, 2.5}, {0., 0.31830988618378947`}}, - PlotRangeClipping->True, - PlotRangePadding->{ - Scaled[0.02], - Scaled[0.02]}]], "Output", - CellChangeTimes->{3.5601543446066847`*^9, 3.5601546760449047`*^9}] -}, Open ]], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"N", "[", - RowBox[{"Expand", "[", - RowBox[{ - RowBox[{"(", - RowBox[{ - RowBox[{ - RowBox[{"1", "/", "4"}], "*", - RowBox[{ - RowBox[{"(", - RowBox[{"2", "-", "q"}], ")"}], "^", "3"}]}], "-", - RowBox[{ - RowBox[{"(", - RowBox[{"1", "-", "q"}], ")"}], "^", "3"}]}], ")"}], "/", "Pi"}], - "]"}], "]"}]], "Input", - CellChangeTimes->{{3.560154431542004*^9, 3.560154500452031*^9}}], - -Cell[BoxData[ - RowBox[{"0.3183098861837907`", "\[VeryThinSpace]", "-", - RowBox[{"0.477464829275686`", " ", - SuperscriptBox["q", "2"]}], "+", - RowBox[{"0.238732414637843`", " ", - SuperscriptBox["q", "3"]}]}]], "Output", - CellChangeTimes->{{3.560154427870244*^9, 3.560154500989884*^9}}] -}, Open ]], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"N", "[", - RowBox[{"Expand", "[", - RowBox[{ - RowBox[{"(", - RowBox[{ - RowBox[{"1", "/", "4"}], "*", - RowBox[{ - RowBox[{"(", - RowBox[{"2", "-", "q"}], ")"}], "^", "3"}]}], ")"}], "/", "Pi"}], - "]"}], "]"}]], "Input", - CellChangeTimes->{{3.560154530785256*^9, 3.56015454752137*^9}}], - -Cell[BoxData[ - RowBox[{"0.6366197723675814`", "\[VeryThinSpace]", "-", - RowBox[{"0.954929658551372`", " ", "q"}], "+", - RowBox[{"0.477464829275686`", " ", - SuperscriptBox["q", "2"]}], "-", - RowBox[{"0.07957747154594767`", " ", - SuperscriptBox["q", "3"]}]}]], "Output", - CellChangeTimes->{{3.560154539254085*^9, 3.560154548437131*^9}}] -}, Open ]], - -Cell[BoxData[{ - RowBox[{ - RowBox[{"DWr", "[", - RowBox[{"r_", ",", "h_"}], "]"}], ":=", - RowBox[{ - RowBox[{ - RowBox[{"Derivative", "[", - RowBox[{"1", ",", "0"}], "]"}], "[", "W", "]"}], "[", - RowBox[{"r", ",", "h"}], "]"}]}], "\[IndentingNewLine]", - RowBox[{ - RowBox[{"DWh", "[", - RowBox[{"r_", ",", "h_"}], "]"}], ":=", - RowBox[{ - RowBox[{ - RowBox[{"Derivative", "[", - RowBox[{"0", ",", "1"}], "]"}], "[", "W", "]"}], "[", - RowBox[{"r", ",", "h"}], "]"}]}]}], "Input", - CellChangeTimes->{{3.5601545811631327`*^9, 3.5601545907204247`*^9}, { - 3.5601546570572557`*^9, 3.56015471264272*^9}, {3.5601550735178423`*^9, - 3.560155113042481*^9}, {3.560155146451144*^9, 3.560155154786213*^9}, { - 3.5601552200011473`*^9, 3.56015522178111*^9}}], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"Plot", "[", - RowBox[{ - RowBox[{"DWr", "[", - RowBox[{"r", ",", "1"}], "]"}], ",", - RowBox[{"{", - RowBox[{"r", ",", "0", ",", "2.5"}], "}"}]}], "]"}]], "Input", - CellChangeTimes->{{3.5601551158258877`*^9, 3.560155135295669*^9}}], - -Cell[BoxData[ - GraphicsBox[{{}, {}, - {Hue[0.67, 0.6, 0.6], LineBox[CompressedData[" -1:eJxF12k4VV/UAHBD+hdCpaRkLOlWQhMaFpEpSaZSSIYylFKUKSFkyFgiIRJp -FFFR3WXKTNx7K5m5gykqlPne97zP8z7vPV/O8/twzll7n73WXlvO4aKZMx8P -D08oLw/P/97TAx0Wqzq5H7j3xPWAopIbuYoqa3xc1hAWv5m6y5rVR041+aas -rDUY7NFaQZo6jullMTICsq5wTFA3jPPrHJ7f731hSsYXtvOHPWntu4bUOZe7 -v2UiwfaxQlTk51v4X0f/plGZ+/AOLkrzxyRjVv1DzxGZp3A9K0Z5bt0TfJUf -2XFeuRSuVhjOu616h+eg9f7Py5VwpemYZpFXGXowN9OLXtVBlLhJZPLXKhSU -C93Uo94C0y3Tx9Ml6jBWrIGUZU+F2QnV26WpjejQYYH8jd+AfvXY7JHBLyj4 -oO+C549vkBV4wGfv5BcsPOkh1cP6Bjkdg55iPC3I337Lt5T3O7yy/Vs+JNSC -j9tKd3ju+Q57bk1MuCu0IOurbF73o++w6VLow1fHWtCt5Wd8iW8bpNyfFt/2 -vAU9P4c6XNzUDreNSL4iZq0ozdBb/G9HOwifW9z00qoVG/iWPgvQagfTEyPp -hqdacaNWzJ9I63ZYVfLl7VWnVvxRmhT0OLodkq/wfc692orar3Mzfvxqh8RG -UXO71FZc/qC2Xed9B5iK1O/a0tOK5JKowPqqDqitKCOdpLeie5ux/LHWDuA3 -LksLG2jFz6taXe2GO6Co88dU669W9I3/Me0j1Qmrpq5GafFQsC9sePWroE6o -FbWW85alYMElYfM1hl3EfH5ctc+Ogo2pK31fWnVBwGl1+8MOFGRVrX140KkL -PMWfR1qdpeBayc0j5290Qekfb197DwqGlh+6WV7cBc99sxoPBlLQasWNN27y -3bANj2prp1Nw9s2flZ/muqFYdqnTxjYKindPa5gt7YHM664xrzsoqLyEx35g -dQ9weMqT9vRQ0NFG5MVytR6Q9LfU0WAR8SzaonvWpQcK94mvEJqkYIalk5fo -1x4I/Tt4fVCEiln+7sMqx3vh6f5fsVHaVFQpWzp9y7YXSo3vqtvpUrFsUZ5A -j2MvJLqwDZT1qdgbw5CNudQLYdVyGZXGVJTOtD0+FNkL0etyJCuPUzG16mhV -1sdeECRlWK66QMXEZTszVsj3Ab3Num9xEhXlzSjPXZX6QC8o0/FcMhUL7l0q -KVPug/M7XxhX3adii8xLmsfePmBXrDC/kkFFETVFoQaLPjjgK6Wa+4SKUVZr -fG7e6oM9RvJqp0uoGPJw/tjkzz7gD5zSl26noqnKe88tE33gVrl5ireTiK/8 -SoLDTB9kP12k2t9FxdL+4ZYWgX4IO1Gkm9JHxfGNP46+lO4HCa25kfZBKjq8 -KD5y1rQfHHZv0iz9R8zHfs8L6Vb98C2CfufsNBXZTVtjaDb9IB1g4CM6S4z3 -V3bjQdd+kDok3Ge1QEWKWuJhmZB+YNr7q33kp6F2iYdh25t+eIRTtF5RGooY -kVxFS/tBQebAD8vlNOxsZ0bolfWDr8w2l5oVNPSZt6ktauyHO3wCipmraPga -DusnMPvhrlmanNI6GspWbzpkJEGHMknXNYkbaThmRXcKXk8Hs3CNmi+KNPw4 -kBH6XoEOxXcKeQSVaHhi6eoqRRU6jMRnrPQi0TDeeJEOvyEdNq6yK5HeTkMe -aq/WRz86lOv6L2reQ8PQIXKWXzAdwofWhAyr03ApTzqfRgQdgtu9Rvk0abhi -m3VVcRIdlmWdi9m0j4Ybb7Xq57+mQ1qZ+jho0fBZen7ehXd04K2p7dqrTUPl -opilW8l0eCAp8W7HQRqq9xk25DXQwS17TE1Cl4ZGeytMHrHo4GcYefSlPg2b -j2Xm24/S4UaB0NMIAxqauwSKyUzSocTZ+PdpQxraJGlSHvAyoKFwiQHfYRp6 -/Cq0TJJigG3PeanVJjQcF0h4a67AAIGnffQWwtekLkqsIDGgiyx+/9ZRGgYZ -bvkRu4cBql+KGwdNaXgnO9smwowBh0LoJ/zMabimNPiTnjUDYl8axYpb0DCt -5bS0gD0DhDZeKnhGOHdhXW/wBQZkTP8tq7ekIUl8VkvLiwHFFlYvLaxomE9q -y2L7MYAz73WrnfD743cd/SMYsFttdFH7cRru87hcpRHHAGnUyzM/QcPyUNON -00kM2Jtip1FHuL5AeMArmwGi2+s25FnT0KR2WH/HMwbU/Fx3Y/lJGlK7a/P+ -vGZA4b+tNVcJdwqFuXmQGbD5V6TsrlM0tJd3bNj6mQEFXV9VYwkz1bW3jjQw -4NKaXhU64THnhdFzPxgQdnn7dJANDa8EdJgo9jKgM8GrrI7wdGJJPoPFAPeF -a1dFbGnIW+bteWaSAQolJc+jCYd/M6fIzDHg8NJpUhVhwVHVHd28THiaM5k8 -TTiOX+xu2hImaMTmjyvZ0VB87djkSVEm8FQq7bUknKLSaCm5mgmzOvae1wmv -13/29rsUE0jLrZKyCD+yjZC4p8CEKJLgk3LCil5nfSxITJCND8jtIrxNs8f8 -oQoTepJOrJ0kvJPnxPbh3UzA6Cq+/07TcG91i+Cu/Uwo8yw7vZrwwduGrBs6 -TGDoG6vKEzY0qyivNyS+J+JyjUTYdM3e9FWmTIivWqm2nfDx7jc+9lZMkHA/ -ekaFsN3jrRbPbZhQwbtOQJmws1vO9n8OTIiLuLpeifB5FWkhbVcm+HHOPJMm -fOXfPVb0RSZEnO16vpyw30fRim/eTHj/aVCOl3BwSES6XAAThARChUeJ+CMM -eH3PhzAhcF+++1fCcSJ+Fu8imCDm5AalhO/RxrfzxTGhzr/41gPC6anuQkeS -mJB1M17Xl/BjewYr+QETUv3nvM0JP1e0rejPYkKx4791JMKFP7+mb8tjwvje -4N0LxP8pKTTx9XnFBPNFaZ8bCZf51FhUFjGh9aNeXQrh5kUlQtblTNjNPqys -SPhrvepAdg0TpG7lxg0Q66Mz/lnFWBMT5HgSnHMID0ul+Ya2MyGxYtXF1YT/ -9ItbfullAkfo9aM6Yr1N58WorB1gQuShdiNfwgK7bgzkTzBhaWT3dBOxfoXn -pipmZ4j1kPiefZnwivJLGYd4WLAsRt5XnLDsEQfLDmEWJB/LbTAm8mGf86HK -xYos0JK6MJBI5JPOFnLGsa0s2P+kOmEdYaM/u/3S1FhgJ//6dSaRfyeuK6mq -AQt4h9SMM4l89UoSemh7ggXd76ZmvIh89z8V6pdnxwJWZYtBnxkNQ+QWLCec -WLCqTEPYiHD8yzHhSE8WUCI+/BY7RsMXn1v9iqJY0HcqUMWLqC+Mv8lWQp9Y -YLiENzqeqFePtHdc6apkQWhNx5dCop6djmmOy69nwW4IHG7Ro2H7BoE68zYW -iJxYm8VziIat5pf3pk2wwGd7/kplol6SC4xlt5EGgOR9WKScqL8BCwP7OCoD -IPAxKz+TqM8ahjetW/cMgM0HVAzYTcOi3pJE70MD4EU3u6e4k6i/opsEyPYD -cObHVz9Dor4nX+AdNkkegPU2JtY1G2hoUZK2WC5jANJIssedFIj5FlBXmHg8 -AIkFJaQFORrGpnnYJBcOwNigjrm8DLEfNHQ09zQNwOmFoqeakjS8tPntm0uL -BkHx09Lz2cLEfDPdrid6DoJuynU9y99U1Luu8ELXZxDeqz6Y9B6j4sFVne3/ -AgfB5YHCw8SfVNQ8dET91O1B8O/xGkJivyXlbJ9QeDII10iS5sO9VBRynnQp -7hyEdw0ufoEtVGxkXLdo0x8C170bJB+/oGJtwO6bUSZD4Nz5wEL3GRWrxH8V -7LMcgrQjgbd7if7go669aJbDEJRl3vwjnE3FF48P1rteH4KOpfHxu4j+Isbp -P625wiGwGciXrw6logkjfou09DBEfRFKZpwgvreFQbPZQFgoYP1uKyoevrIn -8AFpGI75blUMNaeiAV93y5o9w6AxfIS8yoQYn+yWqytNh8FV7s5SUR0q7rL5 -XLYkZBiOMhWFT26lohRtxnKCOQxKlVfKj81TMHXdEY7qz2HYoj4ct2OGgpKO -mXmXxofhX3dm7/J/FFw9rjc3yh6GvvLmJVW/KCgmdjdzUGIEpLu2erH7Kchv -rDzSZTgCkq7jbxtqKThc6RBU+3IEZL8/+lmTQEGZhDa32KIR0FwW0P0zhoIW -diaWFh9GQDF5smhZFAXJ0xqk3toRcM+tFNUJoWDituW0KfoIlBT+CAi4TMG9 -ybhp09qfYBPC3Ekyo+BtN6kvYeE/oWnBX8pThOg3l39br3N6FHScH9e98GtF -VgwrJMl5FJw61hpJE/13uuDUwID7KDBVMnbf9mxF4UVrCm/7jIKAX4O5rUsr -Dv+z1vuWMAr1oWUZNMtWzOns8nCtGgW7APFrEiqtKJXHxLjNY5Az4ag61teC -gvD3TOfEGFTzZTX2aLag/ujJKXnL3+C8X3DHlHETtlU48Z7c+gdoAvpp9/bX -Y3FElZ3s3z+wdeOM+bvIGjwr4N3b3DwOt86mW4hUV6HjZf9i4dQJqCvbEqRp -XYHbz21pNj4zCXVROtfm15fhlxVbhbw1/gLVzfuAwfsP+OFVyIOt/P9gp6bW -gXMt79Cfl2Ta2PUPMrfBYvfhNyjY6WfgUTQFbbOsNrsLr5GZK3f3YcA0ZJjk -lhVOPMO7SpKLSixnoG6l6D2+3TkYn3q25ozsLNwczU/+6JOJJ+/aNgsPzMIy -Ez4X2ZYUlHojuM59eBY+Vz87Y5+fgj2t787Vjc7CvNSdxW9jU9BJdAVv+OQs -CDsomD06koIeUdU7OLxzEG9y8HdgYzKGBG2//0dqDhQLfA0Mmu/h0/O8Tt/M -5iAlrdllXf9dnNbNmXlInoOzwstdK40SMODDw5tV5XOgNnFB46BqAvKopS4b -qpoDqnY1iyqRgP/JxMmpNcxB2mrXtu3MeFw942NY9X0O5DOuSdQHxeOOl8b3 -B3/Pga7RpaVi1+LQY9WkuqrCPORsjN1jHngbx6PHKiwV58F3UE5iielt9OYb -MvbbPA9L/F+87pe7jQG/uuwrt8/DStHc5UurozG6ribCch/hCVL0WrFozLv+ -oM3Xch7mnvnbmD6PRDrzoE/FrXnY3K7u8x87HA8+bH5Eip4Hw1bR9H8t4Zh5 -4mRTYuw8qP4hS6x4HI52DZ7yjknzIP48R6TZKBzbX2c28mfPQ8p/SXwnU8OQ -4s+WPUSeh2THOb4K7VCsWF5aVzM5D25PWM6UgmCUazg0uX16HtqKKw4ZRwVj -UGirdMrcPPCMXvUfdwjGA1ODV1z4FiD467nBSvFgLO2UkF4qtgAQ517r5xeE -hU+8LxttWQCbw3+1PdMC8dF+1XVNZxag/qOHy9A2P6wwEbw557QADSX7FS6N -+WL/afrwZpcFkC54oqyb74sKIUml4R4L8CDWeA9N1RcfV89Ya/kvgJ1Au+hd -TR/MOVqR8ubuAtT08XcnnryKeWfMV6fWLECL5Z2x27TLWHt5a2Bt/QK8SSv/ -+CnjMg7eFGD9a1oAVldUx17Xy6iU+67YnLYA6kfHBGU5nvh0eJ3lsr4FCBqd -VolQ9sRnVxh3gmYXoFdVc2f+nov4Msx7+bltbFhldbCcae+KP/fZhoSpsIn+ -UELSUtAVt07qTmTvYEO7quNy6yIXfOYg/rVXgw3NTjZ0aUEXzIM3KSf12FD0 -5bvwo49n8fHMbxmT02ywKR33Dt/vhIzXbfHnHdiQ4rv5K/x2RAWXMt5oZza8 -MflGi8l2xEff4ug17myQ0kwsHhByxIdvlJ9o+7DhwOo784n0M5h64bzyrgQ2 -BFV7asfZnib2W4uH5nfZELuQJXLvqx1Kdu4Vu5zMBlN7slS1iR2mGAmNv0pn -g27eh7ksXVtM2vSsWOkZG6oO1lYZHDiF8X0D+6Qq2WCYtfmmmetxbEn58lKz -mk2cp6ROj05YoajpO2nrOjZovpjzL7hhhbHkcJ57X9hwzDnQpzjVEm8/2Fgl -2smG4b6kgBfd5njL0vEw/182TCxaVmCVYYqPjnbaOE+z4bDO84c7d5jiR0NL -j5o5NoRfTU3uTTyK4/v1E6L5OCCQr17CO3kEbRS3fF8pxoGq0OayZd+N8Jrs -40HvlRzIu693NWe1ESauXT/7fTUH7shV506fMMQaEdH1aes58Caqw6eVqY9q -U+MOG7ZwQLD8dJ+x7CE88sfdK1yZAwoeKy+WeeuiywgjbFCVA7qxzdd+Nelg -es+3vBfqHKjfdGr2RvhBXFJbOrZTjwPj5KZKtzVa2J0S4nfoDIc476i0NAfu -wZnEuegnTsT734cbbwzejeIxXulLXTiwwXqEPB6xCw2Dz5Y1eXCg5khf2Hje -DixyNVpsFcABB86NtDCSCn5xrJR4f4OI9+v9xE0Fyjhsu2/z2pscKPc51qa6 -ZhvKmCkbd0dyIENhrZLYfySM0lyReDaZA1KCy39MnNuAOTujs2tTOcDQvn/n -xjoFLFNeVEzK4EDDIdCc+SGHf+X/fR97zIHrTnXJ539Jo9j6i0OmeRwY3Fl7 -YeKlFJIkBmcLn3NgWwaflFXIWrQXal9/rZADwuok3vzzq9FPwHz7j2IOdLlK -qOeEiGMSp0FrbwkHzHZo6V97tQLrJz45cpADH6QjO23jRJA5utv7TCUHWAe1 -hDe7CSHPYH54ZTUHgpM9fj6/tAR3dWY9vdXEgVqKCrmNhx/FlF2m81uI55HH -UuUND/68oazfRuWA3tsyJQ2JBXJt62QSz3cOBB5+OK0pPkN+rPCBodTOARlI -lzxr+5cc5B2841gXB0gJpdGl43/INjX6Ib69HNh3cyz9Ut4oWV1SpDWLzoHJ -zNxqMv8gWdydJlPPIuLxT084PdVP/v0x1WN8iAPkjbkLOYu7yI0iZz6tHeWA -dHOslNrRr+Q8+03COr85YNA7xKmObyKHFo6edJ/gQKpEwVEvRgXZflHR0zv/ -OHAhlGXaplZM3mflN/1hhgOeQeVKpeRH5DV5WvqMeQ68EL9l8u5KCHk2gP/w -Yh4eLZ7/uwZzzl5O5eX6W3PdfWV+rgtl4wetFnPtViUV/kSY6x9Cu8sNJLmu -3nl/sHMt10W286KeUlzH51fY3Zfh2sD86PzQRq7fp7rsiVbjOmXzg5dNh7me -nNDLuXmE66Pk8TSNo1wvNje6nWPGtXfAjNt1a67NvhxX2naOa2Fv8ezbIVzf -KItJOVzC9ZF1ljn1G3j/39oeNaeWR3L9UMphQ+NfrjdUPrlncoqPO95tFSQK -meuzE4ta60j8/2+HgnHZyDiuz4eYzxslcO1tXvRd+A7XEX+9Y+Pvcf1KY2Yu -OZ3rmXLOt9znXMdThGOqarhOzb7gEl7HdbZXs45BA9dvV8fPNjRz3XlypQv1 -K9eb6JI6/XSuVYr8pB8zudYM65hxHuDaeFP666Fhri+7y0r/+cN1wL7gmcIJ -rsOW9dO8/nKdkp8dPT3N9aOgRedKZ7l+fsz5YMA810Xy1esPsLkmTyjOcDhc -/w8yc+em - "]]}}, - AspectRatio->NCache[GoldenRatio^(-1), 0.6180339887498948], - Axes->True, - AxesOrigin->{0, 0}, - PlotRange->{{0, 2.5}, {-0.3183098745627588, 0.}}, - PlotRangeClipping->True, - PlotRangePadding->{ - Scaled[0.02], - Scaled[0.02]}]], "Output", - CellChangeTimes->{{3.560155132339073*^9, 3.560155136020277*^9}}] -}, Open ]], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"Plot", "[", - RowBox[{ - RowBox[{"DWh", "[", - RowBox[{"r", ",", "1"}], "]"}], ",", - RowBox[{"{", - RowBox[{"r", ",", "0", ",", "2.5"}], "}"}], ",", - RowBox[{"PlotRange", "\[Rule]", "Full"}]}], "]"}]], "Input", - CellChangeTimes->{{3.560154655600813*^9, 3.560154655644041*^9}, { - 3.560155161815674*^9, 3.5601551791128883`*^9}, {3.560158982837762*^9, - 3.560159022468958*^9}}], - -Cell[BoxData[ - GraphicsBox[{{}, {}, - {Hue[0.67, 0.6, 0.6], LineBox[CompressedData[" -1:eJxF13k0FX/4B3BLqaRSStJCpUilTbK/lRZJpdAiUSJrtihfKSLJGiJLQiFb -4V5UlowsIWtIksi+3mu71879ze+c3/nd+WfO65zPPDOf5/M8n5nZamh90ZiL -g4Mji5OD43/Prx8Z8hwwslD2e58aV+ZPJ4rrRTUui55GqZlVRglp1rd8d1HR -q3g5rNqeR3pl0Md03iZD9Kap7k8l/brAT2SxqBm8N7m9DSe9Z9jIn2VijTMJ -AvIupP+ujFqY+2UHoUK+aX3SYu+WyczMOiC/9XG7LGlLJYc7kyL/YZ21J5OP -dEbDvziGqjMYXQeO/PGjEzMWGi2jJi4ANSQxlnQMZ4vX5InHKFIrPXOb9PGq -wB0Tv9xwRfjPtu2k+8JOfWWYPUFnWPe+Zl864Ws0rzc+64H9MrP2PqTrZ02D -R0S88Cg4PK3Fh07cK92yf5jiDVm5M0nOpIVfNFTQVH2xrvhqpyBpQ0kV7kGT -5/ig1SYq700nuGadahjOAViv6CJR5EUnGv7pbGacCIRAXZXjSdKO75dnj/0K -Amd12azCMzqhHtTDMxbzAinrow6metKJTY5ftUfNgnGq8W6OMOkC1fsjw7Mh -iP/PrqTDg04s+dMhThMJR07lFNc+dzrRVPDFYagvHE8PrJy760Ynkt+FFQ1S -IjAXXnop4zGdOGd3zmBANRKWSo5yW13pxMtlOS97TaLRcXQFPduZTmTeV7e7 -KxSDgSMuUnUP6ETkwJEN484xuFMg297lRCesateYjJ14A3nNhvhpRzqxJrKM -a/TXW7xs7e/+Z08nZld+TLJRjAVfS8NY5V060fU4VnMkJha5M8luGXZ04qPJ -o6hhsziI3zI/ZGNDJ3QPScvTZ+MhoG0nkGhBJ958j7YdFEmCs5Z3tMBNOsEI -y5Js803C3OTSNd4GdOKkSUVn3XQSzErsZGeu04kh7kmdnPpkXPB5k1KqSydk -lc7Je3m+x66X0Q7rtelEbdocl/hwGnqU8vp9T5DPi2X6K6TSYbVppClalU4o -7AyW9zQi3e3w7v1ROuHPSBp3+pEO6V7+3gwlOiEd+NPYMIUCB+fPav8dphMu -3/eoHzDIQIa7ZVv7djqRSPm0MyUkAzJZX0dfbKUTdWHHuHdUZiAzqIZPRYSs -X5MreUJymdjOlWbyVJhOfOd+IsUhkAUTj/SZCn46Iaj0Z03Nt48wz9DrVZyj -EalpXn8spXJgf+2caVk1jRhYM+CrfjMHds5GaowKGiF+T11ZIjgHX5WoyhvL -aUSM4vI3ndM52FEeck+3iEYElfve1i3JRbRw9/moTzTiXufz0VN6X2Bo1O2j -FE0jopdXuK8+UYAuOzvVfnMaoaz0QM9VvwDnGT5CziY0osVq9+Hh+wWw0ulR -4zWiEUJ1Pj2VyQUoa10oErpOIwLDNE4/4/+KMzvEbyycoxHuO6pXsv58hZ9S -iAD9AI0wwY/wIbsitDnRXK0YQwTH24mcPz5FOLuRj9NjZIgIW7Sp5XtcEU41 -92uEDg0RZeW3tyQ1FkHk1iLNtK4hQkJ77q2xfDFyLd4ofmgYIvpMxd+3cpYg -lfr7XkfGEGEe9DC/NuAbHPsOJ6tYDxFW3bs6M1PL0e90qEupeZD4p/976dfy -crhvyPw51DBIaDU9k6rqKkdvjYBVaM0gIVvR+1+38Hfcj//xuq14kOCmxPML -en5H3V/6UcW0QSLMeavyfYMKPKint0u4DxJfBTaEy/FX4a0WK7ldfJDg3fpE -vE22FpxpMhRD4wHCn79C8s2NeqyukGhOrO8jDP9oE9yVjfhyWWJH9nAPYVvy -xNBavBkWUjvPBdG6CIoNn5bQ6b8IWvGSbwujg3jzwGJg/+V/UJY/9C36yz/C -LXruAmOoHdPdhy6vPP2X4Kj/p5Ln1Al78TUvrPf+Jnba33bUluxGV3R+SF9l -A9HFDL20/EsP3jZaK6u21BLq3eYPg2z7MHshT1jgXTlxritg95YtA5B//O71 -gWuFxECRoWvZh0HoLalMePvkMyES2GTunzmIgNxHo1u0PhPa+ud0tHMHwWdm -UJO99TORPyUn+a9sEHlTm1+dIT4RQXtXN0x2DsJx4GNO++xHQiGUEBcXHkIt -f8C1Aacswtd8U43H0yHoyUTvZQVQCanVjZtVDWhQC9Uv9FdNJnr8etxCjGlw -5awVezmRRLzmnezttaCBo4Xe8ScpieBbJET1daRhf+fg9LbVScTAxNWTjYE0 -jBzvUovtSCDiW/5amRXToJjQKOwdGE9sSuwmnu+i45ndolWtIm+IBrEJsY59 -dDDCy2LHmmMInzc83tIydGgrjf04/TKGmIkQ1/59jPT0HreCFTFEk59Z3zY9 -Olxj3GrjWa+JF3dp/B/96QgL/cO82B5O8IJ5s2WcDtnlBxwE+YIIx6MSSz/O -0KFSk8Wj6xVI9KheS33OOYwbCR1TqksCiUK1rzPHVg0jMfrZFb7FAYTTRb8X -SZLDKOCLGxrh9yUGjXeU3Ls5jJEPTpYC+k+ISl8didU1w1AZ1W0wGjQhTtF0 -J7fpjMC1ulN280N3RK11r3e5MoL9PkzLB3JPwFBISWu5NgKbXB/7g8wniPGe -NXlpOIICf7GmFPOnmBGPbFpmM4L0qK09P8564b3h3+wR7xGI6gp+E2b6g7/J -wDm/YAT/ZiWs/LuCcZvleWVj8QjCwlyfuMiGIG9nurRj6QiEvgtaivqGwMyB -g3agmhx/j77R7uBLFK55cz3+zwhufPs1dflBKOzPdij7TpDxs6cenFgIR1Oh -EafunlHwf09Qv10ehZGjzcIP941C6FrXg+cro7Hk63npmIOjUKMb/HbQioYM -IW/SIzuKMPOntk4t0QjO5a+yOzGKIZumAqpDDM5n5oX56I/imbuyksWjN/gW -v3b/l4BRjCxbUDe4H4tWMe/T/16Mgs9PQaAlJBbMWJYhd+go7Ctv1stmxkLs -7UDI6dejKE54YxI0Egu3qILZn0nk+K5H60pN4qAcallKLxxF4ur9h1Q045H1 -rFhflDkKxiG75vxlCUhUrxDynBqFYlXmNZ+dCYjgq6ujzY7Ckfqh8ZBqAlwD -2k7mco1hz2aRMSHnBGiEzUhd4h/D/lCv04eHEtCRsJ/ls3sMIyrfqgtLErGy -NDJ68uYYIt3Ff77QTwanV+xVfeMxVGZsaZ65lwyGerJAiekY7Pmpr/A8Gb+r -P3kGWY9BaDmvnyGRjLjGOuu9D8n4kkszVm1JgXzPUpVbYWOoPfj0hsDPFNxe -7PCvunoMjh51ndskP0BYpPzX7A8yfkP/eR6lD6iW3Vwj8XMMZbQZ46rzHyBj -WfLF7c8YRFcOfOFx+ACe+nWvZPrG0Me5QV88/wPiYz7qRHGOg8/9jankmVR0 -KUx9vyM9Dh6dVzRPzTSE6WgURhwZh+lxmR8TBmnQsI7JLpUfh+Oz0ip16zRk -vlVL3Hp0HGtj3uOtXxqeLAv3+Hl2HJE8Uaf4y9Mg9ktORclkHIqEm+N/m9Nx -y+5BFl/EOOS/BjkGeqRD5mBr1q7X4+BeZLHT0ycdvGMqH0/GjEOqpO+lTWA6 -0m15Prm+G8dqA9ufW16nY84m8DODSsaTZRn0Z6TjpXVCbkvFODw/nzdWaEuH -2T7evOnqcbjINJdYd6VDcdgyT7BuHGFxzgjrT0en1cEvmk3jiKbFvygZT8d+ -qy/5xV3jYP1XdyloCQXllvVf38+Pwz3zJ9f0Hgoi98gUlnMwEOUl99D2AAU2 -Q2GFPdwMRBaIm7QepkDQUr9IlJeBmBFEhyhTYGjRXxwsyMBkdMIRp/MUzJmx -Sp33MaBk/7JI1JqCml2GZeEHGbiTN5PfYUfB2/7iso+HGVBouuYafo+C02be -5SMKDIiLU8y7H1Lw0nRdBfllAEWzEftJHwr2meyu1rjJQOVHbuFl7yhoNROx -/mDEQFtKrKBeEgW+lgL8K00Z4DPcqPr2PQX9trMXaqwYODa069TSDApinSt+ -XnBmoNh2IE+BoOCiC3GP6sKAuZn1tEQhBRxuGesF3Bk49Gs2clkJBdc9X11t -8GKgZrhQOv47BeuDLP5eCmWgbuUB05M/KfgWbPDoUwQD9ylCUTW/KLAP1RIR -imJA2+WInUYzBT8iFW7+jmPg6H0q/9Y2CnwSlndfozLg96O4V6aPAvlklkde -FgPJpb611wco6Hs/vnNzNgO8Ubn+jkMUnKD+MW0lGNj9rfud1wgFrLyUoRtV -DATOBy3ZOEVBKhHt97WWgbDhz3rt0xToFb6Q2tbAAOOeQ/6rWQqySx/YdDYz -cFBUvWhogQLT79arj7cycGWflL07BxWCVbeoce0M6KWqnF3JRcXdujMM4z4G -HjU+e8GxmIptPxHybZCBLkvvlaY8VNT+OiQjPszA+7MXq4qWULH370bHPiYD -w2WVB67wUtHbOzhrzs3EWPSLy8yVVLwcaIus4GFilzudwlpFxXFavdIeXiZ+ -GMNzgZ+KN2O5LjR+JjgWXcv/vYYKTWaa6Lm1TFS8H937UYCKhcnYr6nrmXi+ -T1Dacy0V1+Z9uG22MOH5OkZkiSAVvByucbVbmVhyr27NR9KfuexPHNjBBCe3 -5Oer66nkfmHaEyjBhKiCn+AY6XVL9TzHdjOxeF2HtIsQFcW8mhJa+5g457lS -mHMDFXYrjpdnHGQi6tlU5X3Sovyy5mtlmIhY9VS7k3TNmj3LHeSYaF/z+pOq -MBXKd+70v1Jkwubb7dgXpFNLU0sLwYRa1Oee36S3bBuJ7z/GxNOivZ6CG6nw -dz7whP8kEycMbnqfJr3QaGd45DQTMRF0ph3pOwcyVfQ1mKBE2dQEkW7xYW7x -OE/eP9htUxJpjR6Z+ZSL5P3iiY4s0nkqjn/qdJjQnikUzya951V29vQVJgzy -xEeppCOZM6Giekykr/U6Hkt6uabivVMGTFzf4ybuTdop+aG2lSETXntSo01I -DywiDoYYM6GhnEhVIK1rwLE6z5SJIu/lt3lIf88+OtxhQeZP4S5RSs5Pfq17 -1TJrJqQdnYpdSCdbFafst2PCwiLPSYq0cPli78sOTJQoNfTWk/nz2n7K9JEj -E3pC11fbkJ56+Oxk/AMy/2Jjc1ykTZvKxSofMZHlLZXuS66Pmp9G+wYPJnSX -8z55Qq7f514/QuUZE5X329LGyPUWP1bz2sSHCbHOsuzLpHkmL+hmBTLB0D3+ -iGcdFfcuvJBtCWbiXTbfcU2yfnpSGgS5w5hIzdVZCCDrq+TG5XrNKCb2nmu8 -PbmaCuncMMr9N0w07z8uupl03Lrm51FxZD6uRXfJkfX65LuexlAyE8O7veOu -kPXNEIuSFEhl4pKYZoruCipuubQtlacwMb7Upl6Lj4pj0oYlnp+YkFOQjNpN -9gfVPzY2NYcJ4T8zZ3iXkf3V3/X45xey3pYHHfhH9hNnlIny9mImuIpcc2zJ -/itYcudTfi0TKrxXbuwh+3W/YWpIdz1ZX6k6ZiVkP8fkDd/l+8XEz93jjTrz -FLjY2u3X/ctE32WnCb0ZCpT/3E9i9jMxOUbpXMSgIDfV7dUe7gls2+utX9BN -AY+GjnAozwRm3r7xSuyk4EK/eDgn7wTO3LTm8mynoHd71ctG/gnESOZM7/xL -gUD4+iDXLRM4ub7r23g9BRbu7581yE0gIvCYcCm5n34UdVkCpQlsXCP93/ov -FHDmX3iapDKB84krwvRzKAidmnB/dGoCa70WH/yRSUHRnaMuEjpkPNW2KDVy -/954pdHB2WYC5vXjEU8DKLjNTGT03J2A2MEWzZN+FFCCHty9cH8ClXdbZee9 -KDhVJWq789EEtgt8ydV0p+DuMQvLWp8JSPRnaz93oKBiD8ctsYQJJO62WGN7 -hYIHnJKalX8nYFXvQJ1aT0HP8eVNO9snECsUlFAgQIHmsyGDx10TEFTTmnNd -RYHYqjRrmcEJGDWVtg3wUFC5Sfp5zNQEqO2BZ4yY6dgiq1xtLzCJuVTz6PHa -dBRaXdTYfHoSvowjjzLcyPd5i5OaVeYkwq6sDDSKS4PM/aQVzz9N4m3d807d -8DTcXNNUl5YziRkFrX8n/dPwWe3w9dGCSXBW3lvE4ZiG21nDtvZVk9hkvVpy -Sj0NX/2NXjn1TCJgT/jjZcOpcDx6jv5UaAp2f9cJz+9LRfe7rcHRzlPIaWul -+IW+R7DEhkXZOtOQbxbTXF6QiICI26U3RWeQOnSfK+x3HHSDr1fz9c5AeCvH -qFFfDKaOx09H589Cv2JBuNEwHJ3dxxwLPeegPLzJKpIzCMeiq99K+sxh+4Yx -0b2dgYi5olsV5D+Hc3ybraqKA8nrbLfdCpkDtn9RU3oWiOb0mEru2Dlc7dku -9HdVIOoeLIieyJ/DIx2n3sPbAlC4Oqe8lDEHYuTRqh2JvnirdGBj1c15CKdG -B2t1e6DwHK/7rNE8DJ31Z4yyPNBh0Dmwy3Qe211ODr728MB2t5Ccp1bzZL3y -invu9EDct+mrKg/m0aCVslrR/AnizxeGZQTPo60iesWVaTck3tQSjCidR83v -r3avjriizG7Po7Lv87Ds7A6d5HNFn/vinomqecjKDqznKnGBxLtPWVoN8xDI -jDgot8kFSQMbdVa0z+P90+uJE9UPkXy364XrzDwea/Cos048wAcPh9UmexeQ -tfCVt8r5HoYUr7t57F9AoNoukxnpe9jDOD4ee2gBOzcsGfGiOSDZcO3Pf3IL -cIms7ag1cEAiMsJ0Ty7gjrpq9ZLT9oibHhE5Z7AAe+W+cmMpO3SlNwVYGi4g -YMWHlNYhW2w3LeD0MV7A0ohACSLFlvxPft5ZarGA87JG5z9L2iI6QyrhqOMC -ppN+1QXvtUHEHUupw4ELqPlk4+zhb4lmMe1oreAFZO9rEckTssSGFgV+u9AF -HH3+u7Ey1gJh6svHUl8v4E90q4xLnjlCxJOzJJIX4HbSsFB+yhQB7b2Km4oW -sC7qx9b4cPKlrnPrDDdzAZEoeL9qjQHenm/RM55agOaP49mRmvrIO61jVTq7 -gFMd9tL6gdcxpnQq0IeLhfixWGOPDXrQ27n7lwA/C6sGG+RDjl/FfdG4PgcB -FpYHahyfCL6CIOHNM78EWXAMXxoR3nsZpStXbY7czIKrqvCG1BeXcHByzFBs -NwuK6xVoyku0cXbUwv6pFAt+Q5b/Osy0YDrY5dF3gIX0Jc7zjdUX8bqtMfG9 -LAvqjxdiP0ZfwNKyHLr0SRbuyF3WXpp7Dq1hbk4nbrKQn5Cf9jnkJKaDZn0S -jFjgoGaVx787gbV+9q+XmZLjf37/YJ53HKcf3y6osmLB+EwfxWviGDLN1Hku -ObNwUVvRblYdqLlVtP6zCwt55qa31+YqYeC64i5hdxY4j0obrz2sCJGLUhqt -XiyoMlqUuk/KwVt+TdDtUBaqO6r/8+04hHhpn9iyCBbOm+i0R789iAKpRVmS -USzsk2sr/2F5AMxtE7/oceT1QtepVyEF/s3W/ZqJLDz6tyG3lH8PJNf3zVBT -WAj/oaKYPLELN5Y3b75PZeH35/1bFnHshNNirX2/s1ig39stH7pdDCGsChWF -bDKfA+PHG69tw/fxL7dYBAtGHLHyIuu2oJsm43CziAXluv0aZ7g2gqMv7WnR -NxayJLdKKG8QwuGWN0meVSxo+Izs7i1eA34p06m0WhYkvCSsqx/zY8hF6lRT -PQtWl2lrRr/xoewHI4TjFwuVOv58U/VLEbc9t0uimYXaB3wdrZKL4erw+NCF -vyw0HVFJrqvghF7pKbf//rEQNMI9xWE1ryy7YeWPN50scF3WbpH8Pam81qJB -5HsPC6Uu2/Y9/D2mPJIXYTXWz4Lac2XFuNYh5cqVN78I01j4JDj5YYlMt3Li -DXE+1REWKqJiNlPv/lV+QqXpWoyz8L77sn34zzrlG4syk15MsHBiIHWv3s4S -ZcVLTlO50yz0cfgMG36iKgslqpzqmiPrN6Y+dqtNkPKMM/cZHg4OFY7/O/ri -b9tFcLLdWF0eLsXNNlU0oO8SD9vmxZueJvCx/Xu5zFe1DWx/kw7vaxFmO/P6 -3CrbTWwHpBXqh4uwraZ1fq5/B9ufI0yP+BxkO2zXqw9VZ9hmjJ+Mdz/L9vn8 -sUi582zzaKn7xl9k28F52vzhVbYv1lyW2GvCNp/D2lhfN7ZdCvzCzmSzfXaj -Tvx3Mc7/91Gr0murvdiO3mQoVslkW6wo4eW5a1zs+e4tlKzLZ/v2+KIf5ZLc -/29Dypio13O2Ld205tQD2XbQyvzF94LtZ0wH/4CXbKfKTc+GvmZ7+iur8V0K -2wF1fH7FpWxHxN4xfVrOdqx9tapaBdsfBQNmKqrZbtEVMK3/ybZ45wbVjk62 -92c6bYnrZlve48+0cS/bGuKv0/sH2LazEN0yOsq2s+Ljaeo42x4rOhrsmWyH -pcX6TE2x/dZ1kUnODNspF4yPOc+xnbnt22blBbbzx3dOs1hs/w+pgDiQ - "]]}}, - AspectRatio->NCache[GoldenRatio^(-1), 0.6180339887498948], - Axes->True, - AxesOrigin->{0, 0}, - PlotRange->{{0, 2.5}, {-0.9549296585513659, 0.07073552342169737}}, - PlotRangeClipping->True, - PlotRangePadding->{ - Scaled[0.02], - Scaled[0.02]}]], "Output", - CellChangeTimes->{{3.560155172170507*^9, 3.5601551796044407`*^9}, { - 3.560158983450157*^9, 3.5601590230207157`*^9}}] -}, Open ]], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"DWr", "[", - RowBox[{"r", ",", "h"}], "]"}]], "Input", - CellChangeTimes->{{3.5601552083271513`*^9, 3.560155253227319*^9}, { - 3.560160694674526*^9, 3.560160694745482*^9}, {3.560161180197549*^9, - 3.5601611810400257`*^9}}], - -Cell[BoxData[ - FractionBox[ - RowBox[{"If", "[", - RowBox[{ - RowBox[{ - FractionBox["r", "h"], ">", "2"}], ",", "0", ",", - RowBox[{"If", "[", - RowBox[{ - RowBox[{ - FractionBox["r", "h"], ">", "1"}], ",", - RowBox[{"-", - FractionBox[ - RowBox[{"3", " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", - FractionBox["r", "h"]}], ")"}], "2"]}], - RowBox[{"4", " ", "h"}]]}], ",", - RowBox[{ - FractionBox[ - RowBox[{"3", " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"1", "-", - FractionBox["r", "h"]}], ")"}], "2"]}], "h"], "-", - FractionBox[ - RowBox[{"3", " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", - FractionBox["r", "h"]}], ")"}], "2"]}], - RowBox[{"4", " ", "h"}]]}]}], "]"}]}], "]"}], - RowBox[{ - SuperscriptBox["h", "3"], " ", "\[Pi]"}]]], "Output", - CellChangeTimes->{{3.560155230596974*^9, 3.560155253885023*^9}, - 3.5601606952114277`*^9, 3.56016118252979*^9}] -}, Open ]], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"N", "[", - RowBox[{"Expand", "[", - RowBox[{ - RowBox[{"-", - FractionBox["3", "4"]}], " ", - RowBox[{ - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", "q"}], ")"}], "2"], "/", "Pi"}]}], "]"}], - "]"}]], "Input", - CellChangeTimes->{{3.560160709698295*^9, 3.560160723505558*^9}, { - 3.560161185019305*^9, 3.560161189166279*^9}, 3.560237508328278*^9}], - -Cell[BoxData[ - RowBox[{ - RowBox[{"-", "0.954929658551372`"}], "+", - RowBox[{"0.954929658551372`", " ", "q"}], "-", - RowBox[{"0.238732414637843`", " ", - SuperscriptBox["q", "2"]}]}]], "Output", - CellChangeTimes->{{3.560160720336149*^9, 3.560160724559553*^9}, - 3.5601612038241377`*^9, 3.5602375092839746`*^9}] -}, Open ]], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"N", "[", - RowBox[{"Expand", "[", - RowBox[{ - RowBox[{"(", - RowBox[{ - RowBox[{"3", " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"1", "-", "q"}], ")"}], "2"]}], "-", - RowBox[{ - FractionBox["3", "4"], " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", "q"}], ")"}], "2"]}]}], ")"}], "/", "Pi"}], "]"}], - "]"}]], "Input", - CellChangeTimes->{{3.5601608470246563`*^9, 3.560160853545632*^9}, { - 3.560161190598509*^9, 3.5601612011456413`*^9}}], - -Cell[BoxData[ - RowBox[{ - RowBox[{ - RowBox[{"-", "0.954929658551372`"}], " ", "q"}], "+", - RowBox[{"0.716197243913529`", " ", - SuperscriptBox["q", "2"]}]}]], "Output", - CellChangeTimes->{3.560160854392119*^9, 3.560161202029501*^9}] -}, Open ]], - -Cell[CellGroupData[{ - -Cell[BoxData[ - RowBox[{"DWh", "[", - RowBox[{"r", ",", "h"}], "]"}]], "Input"], - -Cell[BoxData[ - RowBox[{ - FractionBox[ - RowBox[{"If", "[", - RowBox[{ - RowBox[{ - FractionBox["r", "h"], ">", "2"}], ",", "0", ",", - RowBox[{"If", "[", - RowBox[{ - RowBox[{ - FractionBox["r", "h"], ">", "1"}], ",", - FractionBox[ - RowBox[{"3", " ", "r", " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", - FractionBox["r", "h"]}], ")"}], "2"]}], - RowBox[{"4", " ", - SuperscriptBox["h", "2"]}]], ",", - RowBox[{ - RowBox[{"-", - FractionBox[ - RowBox[{"3", " ", "r", " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"1", "-", - FractionBox["r", "h"]}], ")"}], "2"]}], - SuperscriptBox["h", "2"]]}], "+", - FractionBox[ - RowBox[{"3", " ", "r", " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", - FractionBox["r", "h"]}], ")"}], "2"]}], - RowBox[{"4", " ", - SuperscriptBox["h", "2"]}]]}]}], "]"}]}], "]"}], - RowBox[{ - SuperscriptBox["h", "3"], " ", "\[Pi]"}]], "-", - FractionBox[ - RowBox[{"3", " ", - RowBox[{"If", "[", - RowBox[{ - RowBox[{ - FractionBox["r", "h"], ">", "2"}], ",", "0", ",", - RowBox[{"If", "[", - RowBox[{ - RowBox[{ - FractionBox["r", "h"], ">", "1"}], ",", - RowBox[{ - FractionBox["1", "4"], " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", - FractionBox["r", "h"]}], ")"}], "3"]}], ",", - RowBox[{ - RowBox[{ - FractionBox["1", "4"], " ", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"2", "-", - FractionBox["r", "h"]}], ")"}], "3"]}], "-", - SuperscriptBox[ - RowBox[{"(", - RowBox[{"1", "-", - FractionBox["r", "h"]}], ")"}], "3"]}]}], "]"}]}], "]"}]}], - RowBox[{ - SuperscriptBox["h", "4"], " ", "\[Pi]"}]]}]], "Output", - CellChangeTimes->{3.560161212213023*^9}] -}, Open ]] -}, -WindowSize->{740, 867}, -WindowMargins->{{Automatic, -1324}, {Automatic, 61}}, -FrontEndVersion->"8.0 for Linux x86 (64-bit) (November 7, 2010)", -StyleDefinitions->"Default.nb" -] -(* End of Notebook Content *) - -(* Internal cache information *) -(*CellTagsOutline -CellTagsIndex->{} -*) -(*CellTagsIndex -CellTagsIndex->{} -*) -(*NotebookFileOutline -Notebook[{ -Cell[CellGroupData[{ -Cell[579, 22, 1154, 36, 88, "Input"], -Cell[1736, 60, 937, 30, 57, "Output"] -}, Open ]], -Cell[CellGroupData[{ -Cell[2710, 95, 309, 8, 30, "Input"], -Cell[3022, 105, 7339, 126, 236, "Output"] -}, Open ]], -Cell[CellGroupData[{ -Cell[10398, 236, 459, 15, 30, "Input"], -Cell[10860, 253, 294, 6, 30, "Output"] -}, Open ]], -Cell[CellGroupData[{ -Cell[11191, 264, 343, 11, 30, "Input"], -Cell[11537, 277, 346, 7, 30, "Output"] -}, Open ]], -Cell[11898, 287, 774, 20, 50, "Input"], -Cell[CellGroupData[{ -Cell[12697, 311, 265, 7, 30, "Input"], -Cell[12965, 320, 7987, 137, 221, "Output"] -}, Open ]], -Cell[CellGroupData[{ -Cell[20989, 462, 414, 10, 30, "Input"], -Cell[21406, 474, 9020, 153, 223, "Output"] -}, Open ]], -Cell[CellGroupData[{ -Cell[30463, 632, 247, 5, 30, "Input"], -Cell[30713, 639, 1094, 35, 65, "Output"] -}, Open ]], -Cell[CellGroupData[{ -Cell[31844, 679, 404, 12, 54, "Input"], -Cell[32251, 693, 318, 7, 30, "Output"] -}, Open ]], -Cell[CellGroupData[{ -Cell[32606, 705, 541, 17, 54, "Input"], -Cell[33150, 724, 238, 6, 30, "Output"] -}, Open ]], -Cell[CellGroupData[{ -Cell[33425, 735, 79, 2, 30, "Input"], -Cell[33507, 739, 2072, 67, 118, "Output"] -}, Open ]] -} -] -*) - -(* End of internal cache information *)