diff --git a/.gitignore b/.gitignore index 5a986acbd59a818b151540fb9303eadb4f926f77..92a16483158de70ccdac93cb13797824cafd1337 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ config.sub ltmain.sh libtool -src/version.h +src/version_string.h swift*.tar.gz doc/doxyfile.stamp doc/html/ @@ -49,6 +49,7 @@ theory/latex/swift.pdf theory/kernel/kernels.pdf theory/kernel/kernel_derivatives.pdf theory/kernel/kernel_definitions.pdf +theory/paper_pasc/pasc_paper.pdf m4/libtool.m4 m4/ltoptions.m4 diff --git a/README b/README index 0c57e3f5656268c71bb7732af933302cbde9547b..59c362415a9199d08fb6dfbb1ed044c66e647254 100644 --- a/README +++ b/README @@ -23,6 +23,7 @@ Valid options are: -g Run with an external gravitational potential -G Run with self-gravity -s Run with SPH + -t {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified. -v [12] Increase the level of verbosity 1: MPI-rank 0 writes 2: All MPI-ranks write -y {int} Time-step frequency at which task graphs are dumped diff --git a/configure.ac b/configure.ac index 75f6b02d2b238365730a49c2743ccc5eee0cddf6..497107121753f212dd8b07f5a8e8eed7acdf82b5 100644 --- a/configure.ac +++ b/configure.ac @@ -16,7 +16,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Init the project. -AC_INIT([SWIFT],[0.2.0]) +AC_INIT([SWIFT],[0.3.0]) AC_CONFIG_SRCDIR([src/space.c]) AC_CONFIG_AUX_DIR([.]) AM_INIT_AUTOMAKE diff --git a/examples/CosmoVolume/cosmoVolume.yml b/examples/CosmoVolume/cosmoVolume.yml index 20d5febb280748a208633f75351d523b79286035..b857ce4a1e27cb7484148346dafcef2001e67561 100644 --- a/examples/CosmoVolume/cosmoVolume.yml +++ b/examples/CosmoVolume/cosmoVolume.yml @@ -1,6 +1,6 @@ # Define the system of units to use internally. -UnitSystem: +InternalUnitSystem: UnitMass_in_cgs: 1 # Grams UnitLength_in_cgs: 1 # Centimeters UnitVelocity_in_cgs: 1 # Centimeters per second @@ -9,7 +9,6 @@ UnitSystem: # Parameters for the task scheduling Scheduler: - nr_threads: 16 # The number of threads per MPI rank to use. nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). cell_sub_size: 5000 # Maximal number of interactions per sub-task (this is the default value). @@ -22,13 +21,25 @@ TimeIntegration: dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). +# Parameters governing the snapshots +Snapshots: + basename: cosmo # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.05 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + # Parameters for the hydrodynamics scheme SPH: - resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 1. # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. max_smoothing_length: 0.6 # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_volume_change: 2. # Maximal allowed change of volume over one time-step # Parameters related to the initial conditions InitialConditions: diff --git a/examples/CosmoVolume/run.sh b/examples/CosmoVolume/run.sh index a788a35c76a7c0b205297a7de922a9a7e833243a..412456c3cb1ae9b869afa52b1046747e32c2eefe 100755 --- a/examples/CosmoVolume/run.sh +++ b/examples/CosmoVolume/run.sh @@ -7,4 +7,4 @@ then ./getIC.sh fi -../swift -s cosmoVolume.yml +../swift -s -t 16 cosmoVolume.yml diff --git a/examples/ExternalPointMass/externalPointMass.yml b/examples/ExternalPointMass/externalPointMass.yml new file mode 100644 index 0000000000000000000000000000000000000000..6d05cb5b43c541490432c8f283b662c9d7c8d184 --- /dev/null +++ b/examples/ExternalPointMass/externalPointMass.yml @@ -0,0 +1,67 @@ + +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.9885e33 # Grams + UnitLength_in_cgs: 3.0856776e21 # Centimeters + UnitVelocity_in_cgs: 1e5 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the task scheduling +Scheduler: + nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. + cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). + cell_sub_size: 8000000 # Maximal number of interactions per sub-task (this is the default value). + cell_split_size: 400 # Maximal number of particles per cell (this is the default value). + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: pointMass # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.02 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1.9885e33 # Grams + UnitLength_in_cgs: 3.0856776e21 # Centimeters + UnitVelocity_in_cgs: 1e5 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. + max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. + max_smoothing_length: 10. # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_volume_change: 2. # Maximal allowed change of volume over one time-step + +# Parameters related to the initial conditions +InitialConditions: + file_name: Sphere.hdf5 # The file to read + h_scaling: 1. # A scaling factor to apply to all smoothing lengths in the ICs. + shift_x: 50. # A shift to apply to all particles read from the ICs (in internal units). + shift_y: 50. + shift_z: 50. + +# Parameters govering domain decomposition +DomainDecomposition: + initial_type: m # The initial strategy ("g", "m", "w", or "v"). See documentation for details. + initial_grid_x: 10 # Grid size if the 'g' strategy is chosen. + initial_grid_y: 10 + initial_grid_z: 10 + repartition_type: b # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details. + + +# External potential parameters +PointMass: + position_x: 50. # location of external point mass in internal units + position_y: 50. + position_z: 50. + mass: 1e10 # mass of external point mass in internal units + diff --git a/examples/ExternalPointMass/makeIC.py b/examples/ExternalPointMass/makeIC.py new file mode 100644 index 0000000000000000000000000000000000000000..b1b032e47383d6e757306f09063a8931635ea8e9 --- /dev/null +++ b/examples/ExternalPointMass/makeIC.py @@ -0,0 +1,142 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 John A. Regan (john.a.regan@durham.ac.uk) + # Tom Theuns (tom.theuns@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + +import h5py +import sys +import numpy +import math +import random + +# Generates a random distriution of particles, for motion in an external potnetial centred at (0,0,0) + +# physical constants in cgs +NEWTON_GRAVITY_CGS = 6.672e-8 +SOLAR_MASS_IN_CGS = 1.9885e33 +PARSEC_IN_CGS = 3.0856776e18 + +# choice of units +const_unit_length_in_cgs = (1000*PARSEC_IN_CGS) +const_unit_mass_in_cgs = (SOLAR_MASS_IN_CGS) +const_unit_velocity_in_cgs = (1e5) + +print "UnitMass_in_cgs: ", const_unit_mass_in_cgs +print "UnitLength_in_cgs: ", const_unit_length_in_cgs +print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs + +# derived units +const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs) +const_G = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs))) +print 'G=', const_G + + +# Parameters +periodic= 1 # 1 For periodic box +boxSize = 100. # +Radius = boxSize / 4. # maximum radius of particles +G = const_G +Mass = 1e10 + +N = int(sys.argv[1]) # Number of particles +L = N**(1./3.) + +# these are not used but necessary for I/O +rho = 2. # Density +P = 1. # Pressure +gamma = 5./3. # Gas adiabatic index +fileName = "Sphere.hdf5" + + +#--------------------------------------------------- +numPart = N +mass = 1 +internalEnergy = P / ((gamma - 1.)*rho) + +#-------------------------------------------------- + +#File +file = h5py.File(fileName, 'w') + +# Header +grp = file.create_group("/Header") +grp.attrs["BoxSize"] = boxSize +grp.attrs["NumPart_Total"] = [0, numPart, 0, 0, 0, 0] +grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = [0, numPart, 0, 0, 0, 0] +grp.attrs["Time"] = 0.0 +grp.attrs["NumFilesPerSnapshot"] = 1 +grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +grp.attrs["Flag_Entropy_ICs"] = [0, 0, 0, 0, 0, 0] + + +#Runtime parameters +grp = file.create_group("/RuntimePars") +grp.attrs["PeriodicBoundariesOn"] = periodic + +#Particle group +#grp0 = file.create_group("/PartType0") +grp1 = file.create_group("/PartType1") +#generate particle positions +radius = Radius * (numpy.random.rand(N))**(1./3.) +ctheta = -1. + 2 * numpy.random.rand(N) +stheta = numpy.sqrt(1.-ctheta**2) +phi = 2 * math.pi * numpy.random.rand(N) +r = numpy.zeros((numPart, 3)) +# r[:,0] = radius * stheta * numpy.cos(phi) +# r[:,1] = radius * stheta * numpy.sin(phi) +# r[:,2] = radius * ctheta +r[:,0] = radius +# +speed = numpy.sqrt(G * Mass / radius) +v = numpy.zeros((numPart, 3)) +omega = speed / radius +period = 2.*math.pi/omega +print 'period = minimum = ',min(period), ' maximum = ',max(period) + +v[:,0] = -omega * r[:,1] +v[:,1] = omega * r[:,0] + +ds = grp1.create_dataset('Velocities', (numPart, 3), 'f') +ds[()] = v +v = numpy.zeros(1) + +m = numpy.full((numPart, ), mass) +ds = grp1.create_dataset('Masses', (numPart,), 'f') +ds[()] = m +m = numpy.zeros(1) + +h = numpy.full((numPart, ), 1.1255 * boxSize / L) +ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f') +ds[()] = h +h = numpy.zeros(1) + +u = numpy.full((numPart, ), internalEnergy) +ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f') +ds[()] = u +u = numpy.zeros(1) + + +ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False) +ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L') +ds[()] = ids + +ds = grp1.create_dataset('Coordinates', (numPart, 3), 'd') +ds[()] = r + +file.close() diff --git a/examples/ExternalPointMass/run.sh b/examples/ExternalPointMass/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..9bbe03738b472f48715ae1875dfd462ef577b3d9 --- /dev/null +++ b/examples/ExternalPointMass/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Generate the initial conditions if they are not present. +if [ ! -e Sphere.hdf5 ] +then + echo "Generating initial conditions for the point mass potential box example..." + python makeIC.py 10000 +fi + +../swift -g -t 2 externalPointMass.yml diff --git a/examples/ExternalPointMass/test.pro b/examples/ExternalPointMass/test.pro new file mode 100644 index 0000000000000000000000000000000000000000..21c10e9d27daa45b085c6a659ba3cf7260f017fb --- /dev/null +++ b/examples/ExternalPointMass/test.pro @@ -0,0 +1,65 @@ +; +; test energy / angular momentum conservation of test problem +; +@physunits + +indir = '/gpfs/data/tt/Codes/Swift-git/swiftsim/examples/' +basefile = 'output_' +nfiles = 657 +nfollow = 100 ; number of particles to follow +eout = fltarr(nfollow, nfiles) +ekin = fltarr(nfollow, nfiles) +epot = fltarr(nfollow, nfiles) +tout = fltarr(nfiles) +; set properties of potential +uL = 1e3 * phys.pc ; unit of length +uM = phys.msun ; unit of mass +uV = 1d5 ; unit of velocity + +; derived units +constG = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ; +pcentre = [50.,50.,50.] * 1d3 * pc / uL +mextern = 1d10 * msun / uM +; +; +; +ifile = 0 +for ifile=0,nfiles-1 do begin +;for ifile=0,3 do begin + inf = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5' + time = h5ra(inf, 'Header','Time') + p = h5rd(inf,'PartType1/Coordinates') + v = h5rd(inf,'PartType1/Velocities') + id = h5rd(inf,'PartType1/ParticleIDs') + indx = sort(id) +; + id = id[indx] + for ic=0,2 do begin + tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx] + tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx] + endfor +; calculate energy + dd = size(p,/dimen) & npart = dd[1] + ener = fltarr(npart) + dr = fltarr(npart) & dv = dr + for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2 + for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2 + dr = sqrt(dr) +; print,'time = ',time,p[0,0],v[0,0],id[0] + ek = 0.5 * dv + ep = - constG * mextern / dr + ener = ek + ep + tout(ifile) = time + eout(*,ifile) = ener[0:nfollow-1] + ekin(*,ifile) = ek[0:nfollow-1] + epot(*,ifile) = ep[0:nfollow-1] +endfor + +; calculate relative energy change +de = 0.0 * eout +for ifile=1, nfiles -1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0] + + +end + + diff --git a/examples/Makefile.am b/examples/Makefile.am index 200d340be168651e6dfc91f4b89a0a343d34d9da..15fceb236057d21ba878cb09da39aca5bb70b692 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -62,21 +62,19 @@ swift_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="e swift_fixdt_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS) # Scripts to generate ICs -EXTRA_DIST = UniformBox/makeIC.py \ +EXTRA_DIST = UniformBox/makeIC.py UniformBox/run.sh UniformBox/uniformBox.yml \ UniformDMBox/makeIC.py \ PerturbedBox/makeIC.py \ - SedovBlast/makeIC.py SedovBlast/makeIC_fcc.py SedovBlast/solution.py \ - SodShock/makeIC.py SodShock/solution.py SodShock/glass_001.hdf5 SodShock/glass_002.hdf5 SodShock/rhox.py \ - CosmoVolume/getIC.sh \ + SedovBlast/makeIC.py SedovBlast/makeIC_fcc.py SedovBlast/solution.py SedovBlast/run.sh SedovBlast/sedov.yml \ + SodShock/makeIC.py SodShock/solution.py SodShock/glass_001.hdf5 SodShock/glass_002.hdf5 SodShock/rhox.py SodShock/run.sh SodShock/sodShock.yml \ + CosmoVolume/getIC.sh CosmoVolume/run.sh CosmoVolume/cosmoVolume.yml \ BigCosmoVolume/makeIC.py \ BigPerturbedBox/makeIC_fcc.py \ GreshoVortex/makeIC.py GreshoVortex/solution.py \ - MultiTypes/makeIC.py - + MultiTypes/makeIC.py \ + parameter_example.yml # Scripts to plot task graphs EXTRA_DIST += plot_tasks_MPI.py plot_tasks.py \ process_plot_tasks_MPI process_plot_tasks -# Simple run scripts -EXTRA_DIST += runs.sh diff --git a/examples/SedovBlast/run.sh b/examples/SedovBlast/run.sh index 58646cf42eecc3f31fdb8a63ca2108c02d9580ba..f71830eb6a66a9ea84e93fd1bed1261b1cb42b7b 100755 --- a/examples/SedovBlast/run.sh +++ b/examples/SedovBlast/run.sh @@ -7,4 +7,4 @@ then python makeIC_fcc.py fi -../swift -s sedov.yml +../swift -s -t 16 sedov.yml diff --git a/examples/SedovBlast/sedov.yml b/examples/SedovBlast/sedov.yml index f354ef5679eb5b6176ab90298bb307c6c2b27f0e..b5ad8d855535e70133464fa6a117db37a2c9fb8b 100644 --- a/examples/SedovBlast/sedov.yml +++ b/examples/SedovBlast/sedov.yml @@ -1,6 +1,6 @@ # Define the system of units to use internally. -UnitSystem: +InternalUnitSystem: UnitMass_in_cgs: 1 # Grams UnitLength_in_cgs: 1 # Centimeters UnitVelocity_in_cgs: 1 # Centimeters per second @@ -9,7 +9,6 @@ UnitSystem: # Parameters for the task scheduling Scheduler: - nr_threads: 16 # The number of threads per MPI rank to use. nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). cell_sub_size: 5000 # Maximal number of interactions per sub-task (this is the default value). @@ -22,13 +21,25 @@ TimeIntegration: dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). +# Parameters governing the snapshots +Snapshots: + basename: sedov # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.1 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + # Parameters for the hydrodynamics scheme SPH: - resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 1. # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. max_smoothing_length: 1. # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_volume_change: 2. # Maximal allowed change of volume over one time-step # Parameters related to the initial conditions InitialConditions: diff --git a/examples/SodShock/run.sh b/examples/SodShock/run.sh index 646f1e3a337170e2e406c24e7505e42b81de364b..b8141e51543f348d6ec6be505d136aed7d803b2e 100755 --- a/examples/SodShock/run.sh +++ b/examples/SodShock/run.sh @@ -7,4 +7,4 @@ then python makeIC.py fi -../swift -s sodShock.yml +../swift -s -t 16 sodShock.yml diff --git a/examples/SodShock/sodShock.yml b/examples/SodShock/sodShock.yml index 5fe7be7b9fc13bb5bc67556d79d8ff9d9eff81d9..7bb1895c15c80c6a4e54248d1d2bf85dea278ce1 100644 --- a/examples/SodShock/sodShock.yml +++ b/examples/SodShock/sodShock.yml @@ -1,6 +1,6 @@ # Define the system of units to use internally. -UnitSystem: +InternalUnitSystem: UnitMass_in_cgs: 1 # Grams UnitLength_in_cgs: 1 # Centimeters UnitVelocity_in_cgs: 1 # Centimeters per second @@ -9,7 +9,6 @@ UnitSystem: # Parameters for the task scheduling Scheduler: - nr_threads: 16 # The number of threads per MPI rank to use. nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). cell_sub_size: 5000 # Maximal number of interactions per sub-task. @@ -22,13 +21,25 @@ TimeIntegration: dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). +# Parameters governing the snapshots +Snapshots: + basename: sod # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.1 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + # Parameters for the hydrodynamics scheme SPH: - resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 1. # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. max_smoothing_length: 0.01 # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_volume_change: 2. # Maximal allowed change of volume over one time-step # Parameters related to the initial conditions InitialConditions: diff --git a/examples/UniformBox/run.sh b/examples/UniformBox/run.sh index ca78b0ac0425bf1b3f6dd9d30bfc95d35083739f..0cb0a505915be47bafb99ed7531685bfeb3dc829 100755 --- a/examples/UniformBox/run.sh +++ b/examples/UniformBox/run.sh @@ -7,4 +7,4 @@ then python makeIC.py 100 fi -../swift -s uniformBox.yml +../swift -s -t 16 uniformBox.yml diff --git a/examples/UniformBox/uniformBox.yml b/examples/UniformBox/uniformBox.yml index 2d5512815b60511b5dbc373df43fae4658272093..ce989d22ad8a2310ef86303e1b7b6788fdcc427f 100644 --- a/examples/UniformBox/uniformBox.yml +++ b/examples/UniformBox/uniformBox.yml @@ -1,6 +1,6 @@ # Define the system of units to use internally. -UnitSystem: +InternalUnitSystem: UnitMass_in_cgs: 1 # Grams UnitLength_in_cgs: 1 # Centimeters UnitVelocity_in_cgs: 1 # Centimeters per second @@ -9,7 +9,6 @@ UnitSystem: # Parameters for the task scheduling Scheduler: - nr_threads: 16 # The number of threads per MPI rank to use. nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). cell_sub_size: 5000 # Maximal number of interactions per sub-task (this is the default value). @@ -22,14 +21,26 @@ TimeIntegration: dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). +# Parameters governing the snapshots +Snapshots: + basename: uniformBox # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.01 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + # Parameters for the hydrodynamics scheme SPH: - resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 1. # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. max_smoothing_length: 0.1 # Maximal smoothing length allowed (in internal units). - + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_volume_change: 2. # Maximal allowed change of volume over one time-step + # Parameters related to the initial conditions InitialConditions: file_name: ./uniformBox.hdf5 # The file to read diff --git a/examples/main.c b/examples/main.c index bd94d79ad2530c51bd1dc82401c5e3d60cc5e589..b607791729ac0d419a227edae72814542898d931 100644 --- a/examples/main.c +++ b/examples/main.c @@ -3,6 +3,9 @@ * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), * Matthieu Schaller (matthieu.schaller@durham.ac.uk) * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * Angus Lepper (angus.lepper@ed.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -67,6 +70,9 @@ void print_help_message() { "Run with an external gravitational potential"); printf(" %2s %8s %s\n", "-G", "", "Run with self-gravity"); printf(" %2s %8s %s\n", "-s", "", "Run with SPH"); + printf(" %2s %8s %s\n", "-t", "{int}", + "The number of threads to use on each MPI rank. Defaults to 1 if not " + "specified."); printf(" %2s %8s %s\n", "-v", "[12]", "Increase the level of verbosity 1: MPI-rank 0 writes "); printf(" %2s %8s %s\n", "", "", "2: All MPI-ranks write"); @@ -134,12 +140,13 @@ int main(int argc, char *argv[]) { int with_hydro = 0; int with_fp_exceptions = 0; int verbose = 0; + int nr_threads = 1; char paramFileName[200] = ""; unsigned long long cpufreq = 0; /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "cdef:gGhsv:y")) != -1) switch (c) { + while ((c = getopt(argc, argv, "cdef:gGhst:v:y:")) != -1) switch (c) { case 'c': with_cosmology = 1; break; @@ -168,6 +175,14 @@ int main(int argc, char *argv[]) { case 's': with_hydro = 1; break; + case 't': + if (sscanf(optarg, "%d", &nr_threads) != 1) { + if (myrank == 0) + printf("Error parsing the number of threads (-t).\n"); + if (myrank == 0) print_help_message(); + return 1; + } + break; case 'v': if (sscanf(optarg, "%d", &verbose) != 1) { if (myrank == 0) printf("Error parsing verbosity level (-v).\n"); @@ -249,17 +264,6 @@ int main(int argc, char *argv[]) { MPI_Bcast(params, sizeof(struct swift_params), MPI_BYTE, 0, MPI_COMM_WORLD); #endif - /* Initialize unit system */ - struct UnitSystem us; - units_init(&us, params); - if (myrank == 0) { - message("Unit system: U_M = %e g.", us.UnitMass_in_cgs); - message("Unit system: U_L = %e cm.", us.UnitLength_in_cgs); - message("Unit system: U_t = %e s.", us.UnitTime_in_cgs); - message("Unit system: U_I = %e A.", us.UnitCurrent_in_cgs); - message("Unit system: U_T = %e K.", us.UnitTemperature_in_cgs); - } - /* Prepare the domain decomposition scheme */ #ifdef WITH_MPI struct partition initial_partition; @@ -277,6 +281,29 @@ int main(int argc, char *argv[]) { } #endif + /* Initialize unit system and constants */ + struct UnitSystem us; + struct phys_const prog_const; + units_init(&us, params, "InternalUnitSystem"); + phys_const_init(&us, &prog_const); + if (myrank == 0 && verbose > 0) { + message("Unit system: U_M = %e g.", us.UnitMass_in_cgs); + message("Unit system: U_L = %e cm.", us.UnitLength_in_cgs); + message("Unit system: U_t = %e s.", us.UnitTime_in_cgs); + message("Unit system: U_I = %e A.", us.UnitCurrent_in_cgs); + message("Unit system: U_T = %e K.", us.UnitTemperature_in_cgs); + phys_const_print(&prog_const); + } + + /* Initialise the hydro properties */ + struct hydro_props hydro_properties; + hydro_props_init(&hydro_properties, params); + + /* Initialise the external potential properties */ + struct external_potential potential; + if (with_external_gravity) potential_init(params, &us, &potential); + if (with_external_gravity && myrank == 0) potential_print(&potential); + /* Read particles and space information from (GADGET) ICs */ char ICfileName[200] = ""; parser_get_param_string(params, "InitialConditions:file_name", ICfileName); @@ -314,6 +341,13 @@ int main(int argc, char *argv[]) { for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL; Ngpart = 0; } + if (!with_hydro) { + free(parts); + parts = NULL; + for (size_t k = 0; k < Ngpart; ++k) + if (gparts[k].id > 0) error("Linking problem"); + Ngas = 0; + } /* Get the total number of particles across all nodes. */ long long N_total[2] = {0, 0}; @@ -377,7 +411,8 @@ int main(int argc, char *argv[]) { /* Initialize the engine with the space and policies. */ if (myrank == 0) clocks_gettime(&tic); struct engine e; - engine_init(&e, &s, params, nr_nodes, myrank, engine_policies, talking); + engine_init(&e, &s, params, nr_nodes, myrank, nr_threads, engine_policies, + talking, &prog_const, &hydro_properties, &potential); if (myrank == 0) { clocks_gettime(&toc); message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), @@ -385,33 +420,13 @@ int main(int argc, char *argv[]) { fflush(stdout); } + /* Write the state of the system before starting time integration. */ + if (!dry_run) engine_dump_snapshot(&e); + /* Now that everything is ready, no need for the parameters any more */ free(params); params = NULL; - int with_outputs = 1; - if (with_outputs && !dry_run) { - /* Write the state of the system before starting time integration. */ - if (myrank == 0) clocks_gettime(&tic); -#if defined(WITH_MPI) -#if defined(HAVE_PARALLEL_HDF5) - write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, - MPI_INFO_NULL); -#else - write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, - MPI_INFO_NULL); -#endif -#else - write_output_single(&e, &us); -#endif - if (myrank == 0 && verbose) { - clocks_gettime(&toc); - message("writing particle properties took %.3f %s.", - clocks_diff(&tic, &toc), clocks_getunit()); - fflush(stdout); - } - } - /* Init the runner history. */ #ifdef HIST for (k = 0; k < runner_hist_N; k++) runner_hist_bins[k] = 0; @@ -420,10 +435,10 @@ int main(int argc, char *argv[]) { /* Get some info to the user. */ if (myrank == 0) { message( - "Running on %lld gas particles and %lld DM particles until t=%.3e with " - "%i threads and %i queues (dt_min=%.3e, dt_max=%.3e)...", - N_total[0], N_total[1], e.timeEnd, e.nr_threads, e.sched.nr_queues, - e.dt_min, e.dt_max); + "Running on %lld gas particles and %lld DM particles from t=%.3e until " + "t=%.3e with %d threads and %d queues (dt_min=%.3e, dt_max=%.3e)...", + N_total[0], N_total[1], e.timeBegin, e.timeEnd, e.nr_threads, + e.sched.nr_queues, e.dt_min, e.dt_max); fflush(stdout); } @@ -468,28 +483,6 @@ int main(int argc, char *argv[]) { /* Take a step. */ engine_step(&e); - if (with_outputs && j % 100 == 0) { - - if (myrank == 0) clocks_gettime(&tic); -#if defined(WITH_MPI) -#if defined(HAVE_PARALLEL_HDF5) - write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, - MPI_INFO_NULL); -#else - write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, - MPI_INFO_NULL); -#endif -#else - write_output_single(&e, &us); -#endif - if (myrank == 0 && verbose) { - clocks_gettime(&toc); - message("writing particle properties took %.3f %s.", - clocks_diff(&tic, &toc), clocks_getunit()); - fflush(stdout); - } - } - /* Dump the task data using the given frequency. */ if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) { #ifdef WITH_MPI @@ -571,28 +564,8 @@ int main(int argc, char *argv[]) { (double)runner_hist_bins[k]); #endif - if (with_outputs) { - - if (myrank == 0) clocks_gettime(&tic); -/* Write final output. */ -#if defined(WITH_MPI) -#if defined(HAVE_PARALLEL_HDF5) - write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, - MPI_INFO_NULL); -#else - write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD, - MPI_INFO_NULL); -#endif -#else - write_output_single(&e, &us); -#endif - if (myrank == 0 && verbose) { - clocks_gettime(&toc); - message("writing particle properties took %.3f %s.", - clocks_diff(&tic, &toc), clocks_getunit()); - fflush(stdout); - } - } + /* Write final output. */ + engine_dump_snapshot(&e); #ifdef WITH_MPI if ((res = MPI_Finalize()) != MPI_SUCCESS) diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index b91e99baf383a399b72bfb73f1791ab7ac6f3d91..761e2f67c16766bc341b2bcb47f40ebbc4ea30ad 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -1,6 +1,6 @@ # Define the system of units to use internally. -UnitSystem: +InternalUnitSystem: UnitMass_in_cgs: 1 # Grams UnitLength_in_cgs: 1 # Centimeters UnitVelocity_in_cgs: 1 # Centimeters per second @@ -9,7 +9,6 @@ UnitSystem: # Parameters for the task scheduling Scheduler: - nr_threads: 2 # The number of threads per MPI rank to use. nr_queues: 0 # The number of task queues to use. Use 0 to let the system decide. cell_max_size: 8000000 # Maximal number of interactions per task (this is the default value). cell_sub_size: 8000000 # Maximal number of interactions per sub-task (this is the default value). @@ -22,13 +21,25 @@ TimeIntegration: dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). +# Parameters governing the snapshots +Snapshots: + basename: output # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.01 # Time difference between consecutive outputs (in internal units) + UnitMass_in_cgs: 1 # Unit system for the outputs (Grams) + UnitLength_in_cgs: 1 # Unit system for the outputs (Centimeters) + UnitVelocity_in_cgs: 1 # Unit system for the outputs (Centimeters per second) + UnitCurrent_in_cgs: 1 # Unit system for the outputs (Amperes) + UnitTemp_in_cgs: 1 # Unit system for the outputs (Kelvin) + # Parameters for the hydrodynamics scheme SPH: - resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 1. # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length. - max_smoothing_length: 3. # Maximal smoothing length allowed (in internal units). + max_smoothing_length: 0.1 # Maximal smoothing length allowed (in internal units). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + max_volume_change: 2. # Maximal allowed change of volume over one time-step # Parameters related to the initial conditions InitialConditions: @@ -46,3 +57,11 @@ DomainDecomposition: initial_grid_z: 10 repartition_type: b # The re-decomposition strategy ("n", "b", "v", "e" or "x"). See documentation for details. +# Parameters related to external potentials + +# Point mass external potential +PointMass: + position_x: 50. # location of external point mass in internal units + position_y: 50. + position_z: 50. + mass: 1e10 # mass of external point mass in internal units diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index 895c32ef9c3d1490e6d30b7dc79e40171a228ee9..c96d063e0bf1adf614a447f0dd524302a070e9dd 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -58,9 +58,10 @@ PLOT_PARAMS = {"axes.labelsize": 10, pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", "kick", - "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", - "part_sort", "gpart_sort", "split_cell", "rewait", "count"] +TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", + "kick", "send", "recv", "grav_pp", "grav_mm", "grav_up", + "grav_down", "grav_external", "part_sort", "gpart_sort", + "split_cell", "rewait", "count"] TASKCOLOURS = {"none": "black", "sort": "lightblue", @@ -77,6 +78,7 @@ TASKCOLOURS = {"none": "black", "grav_mm": "mediumturquoise", "grav_up": "mediumvioletred", "grav_down": "mediumnightblue", + "grav_external": "darkred", "part_sort": "steelblue", "gpart_sort": "teal" , "split_cell": "seagreen", diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py index d59fe6417b524b8cb3cf8f6117fca3b8b3f3c780..ae84b0177bfa01d4bb4c2c9c3e44c088b8ae7776 100755 --- a/examples/plot_tasks_MPI.py +++ b/examples/plot_tasks_MPI.py @@ -64,9 +64,10 @@ PLOT_PARAMS = {"axes.labelsize": 10, pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", "kick", - "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", - "part_sort", "gpart_sort", "split_cell", "rewait", "count"] +TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", + "kick", "send", "recv", "grav_pp", "grav_mm", "grav_up", + "grav_down", "grav_external", "part_sort", "gpart_sort", + "split_cell", "rewait", "count"] TASKCOLOURS = {"none": "black", "sort": "lightblue", @@ -83,8 +84,9 @@ TASKCOLOURS = {"none": "black", "grav_mm": "mediumturquoise", "grav_up": "mediumvioletred", "grav_down": "mediumnightblue", + "grav_external": "darkred", "part_sort": "steelblue", - "gpart_sort": "teal", + "gpart_sort": "teal" , "split_cell": "seagreen", "rewait": "olive", "count": "powerblue"} diff --git a/src/Makefile.am b/src/Makefile.am index a96f35b3cf0d8a23aec4f8c0f8d16bec8638cbcd..f50f5574d16227cc925866596ad1766ee4f5a171 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -35,13 +35,15 @@ endif # List required headers include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \ - common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h + common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \ + physical_constants.h physical_constants_cgs.h potentials.h version.h hydro_properties.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ - kernel_hydro.c kernel_gravity.c tools.c part.c partition.c clocks.c parser.c + kernel_hydro.c kernel_gravity.c tools.c part.c partition.c clocks.c parser.c \ + physical_constants.c potentials.c hydro_properties.c # Include files for distribution, not installation. nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ @@ -70,27 +72,27 @@ libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI $(METIS_INCS) libswiftsim_mpi_la_SHORTNAME = mpi -# Versioning. If any sources change then update the version.h file with +# Versioning. If any sources change then update the version_string.h file with # the current git revision and package version. -# May have a checkout without a version.h file and no git command (tar/zip +# May have a checkout without a version_string.h file and no git command (tar/zip # download), allow that, but make sure we know it. -version.h: version.h.in $(AM_SOURCES) $(include_HEADERS) $(noinst_HEADERS) +version_string.h: version_string.h.in $(AM_SOURCES) $(include_HEADERS) $(noinst_HEADERS) if test "X$(GIT_CMD)" != "X"; then \ GIT_REVISION=`$(GIT_CMD) describe --abbrev=8 --always --tags --dirty`; \ GIT_BRANCH=`$(GIT_CMD) branch | sed -n 's/^\* \(.*\)/\1/p'`; \ sed -e "s,@PACKAGE_VERSION\@,$(PACKAGE_VERSION)," \ -e "s,@GIT_REVISION\@,$${GIT_REVISION}," \ - -e "s|@GIT_BRANCH\@|$${GIT_BRANCH}|" version.h.in > version.h; \ + -e "s|@GIT_BRANCH\@|$${GIT_BRANCH}|" version_string.h.in > version_string.h; \ else \ - if test ! -f version.h; then \ + if test ! -f version_string.h; then \ sed -e "s,@PACKAGE_VERSION\@,$(PACKAGE_VERSION)," \ -e "s,@GIT_REVISION\@,unknown," \ - -e "s,@GIT_BRANCH\@,unknown," version.h.in > version.h; \ + -e "s,@GIT_BRANCH\@,unknown," version_string.h.in > version_string.h; \ fi; \ fi -# Make sure version.h is built first. -BUILT_SOURCES = version.h +# Make sure version_string.h is built first. +BUILT_SOURCES = version_string.h # And distribute the built files. -EXTRA_DIST = version.h version.h.in +EXTRA_DIST = version_string.h version_string.h.in diff --git a/src/cell.c b/src/cell.c index 61acfaaea7a0af01a78ab773541564e9a2723f4e..29072173b060a936fe6f4f43c091581d522985b3 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1,6 +1,10 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -46,6 +50,7 @@ #include "atomic.h" #include "error.h" #include "gravity.h" +#include "hydro_properties.h" #include "hydro.h" #include "space.h" #include "timers.h" @@ -114,7 +119,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { if (k & 1) temp->loc[2] += temp->h[2]; temp->depth = c->depth + 1; temp->split = 0; - temp->dx_max = 0.0; + temp->dx_max = 0.f; temp->nodeID = c->nodeID; temp->parent = c; c->progeny[k] = temp; diff --git a/src/cell.h b/src/cell.h index a471eac44bfd3533c4220ab8c5ff2ddec724e87f..fbdaa928bda51aa76c149df1249d982fbeef64c3 100644 --- a/src/cell.h +++ b/src/cell.h @@ -1,6 +1,10 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -122,6 +126,9 @@ struct cell { /* Tasks for gravity tree. */ struct task *grav_up, *grav_down; + /* Task for external gravity */ + struct task *grav_external; + /* Number of tasks that are associated with this cell. */ int nr_tasks; diff --git a/src/common_io.c b/src/common_io.c index 2a635723d5bd4db7bce0a0172e8c083bf479ac32..143c1ca94160f2a43c40aa44384803ce721e4dbf 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -335,11 +335,15 @@ void writeCodeDescription(hid_t h_file) { * * @todo Use a proper XML library to avoid stupid copies. */ -FILE* prepareXMFfile() { +FILE* prepareXMFfile(const char* baseName) { char buffer[1024]; - FILE* xmfFile = fopen("output.xmf", "r"); - FILE* tempFile = fopen("output_temp.xmf", "w"); + char fileName[FILENAME_BUFFER_SIZE]; + char tempFileName[FILENAME_BUFFER_SIZE]; + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s.xmf", baseName); + snprintf(tempFileName, FILENAME_BUFFER_SIZE, "%s_temp.xmf", baseName); + FILE* xmfFile = fopen(fileName, "r"); + FILE* tempFile = fopen(tempFileName, "w"); if (xmfFile == NULL) error("Unable to open current XMF file."); @@ -355,8 +359,8 @@ FILE* prepareXMFfile() { fclose(xmfFile); /* We then copy the XMF file back up to the closing lines */ - xmfFile = fopen("output.xmf", "w"); - tempFile = fopen("output_temp.xmf", "r"); + xmfFile = fopen(fileName, "w"); + tempFile = fopen(tempFileName, "r"); if (xmfFile == NULL) error("Unable to open current XMF file."); @@ -369,7 +373,7 @@ FILE* prepareXMFfile() { } fprintf(xmfFile, "\n"); fclose(tempFile); - remove("output_temp.xmf"); + remove(tempFileName); return xmfFile; } @@ -380,8 +384,11 @@ FILE* prepareXMFfile() { * @todo Exploit the XML nature of the XMF format to write a proper XML writer *and simplify all the XMF-related stuff. */ -void createXMFfile() { - FILE* xmfFile = fopen("output.xmf", "w"); +void createXMFfile(const char* baseName) { + + char fileName[FILENAME_BUFFER_SIZE]; + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s.xmf", baseName); + FILE* xmfFile = fopen(fileName, "w"); fprintf(xmfFile, "<?xml version=\"1.0\" ?> \n"); fprintf(xmfFile, "<!DOCTYPE Xdmf SYSTEM \"Xdmf.dtd\" []> \n"); diff --git a/src/common_io.h b/src/common_io.h index b7f3a1a317d69937dde8692eead8f00c75649477..70ed25993cb110a1faf42a41c08d316c81b54271 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -97,8 +97,8 @@ void writeAttribute_i(hid_t grp, char* name, int data); void writeAttribute_l(hid_t grp, char* name, long data); void writeAttribute_s(hid_t grp, char* name, const char* str); -void createXMFfile(); -FILE* prepareXMFfile(); +void createXMFfile(const char* baseName); +FILE* prepareXMFfile(const char* baseName); void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time); void writeXMFoutputfooter(FILE* xmfFile, int outputCount, float time); void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N, diff --git a/src/const.h b/src/const.h index 6a52ec4796a4904629a57ffa8b32a3107bde263e..6432ef6a9e8107d94c93a32967e291d7e1e4d24d 100644 --- a/src/const.h +++ b/src/const.h @@ -24,8 +24,7 @@ #define const_hydro_gamma (5.0f / 3.0f) /* SPH Viscosity constants. */ -#define const_viscosity_alpha \ - 0.8f /* Used in the legacy gadget-2 SPH mode only */ +#define const_viscosity_alpha 0.8f #define const_viscosity_alpha_min \ 0.1f /* Values taken from (Price,2004), not used in legacy gadget mode */ #define const_viscosity_alpha_max \ @@ -38,36 +37,27 @@ 1.f /* Value taken from (Price,2008), not used in legacy gadget mode */ /* Time integration constants. */ -#define const_cfl 0.1f -#define const_ln_max_h_change \ - 0.231111721f /* Particle can't change volume by more than a factor of \ - 2=1.26^3 over one time step */ #define const_max_u_change 0.1f -/* Neighbour search constants. */ -#define const_eta_kernel \ - 1.2349f /* Corresponds to 48 ngbs with the cubic spline kernel */ -#define const_delta_nwneigh 0.1f -#define const_smoothing_max_iter 30 -#define CUBIC_SPLINE_KERNEL - /* Gravity stuff. */ -#define const_theta_max \ - 0.57735f /* Opening criteria, which is the ratio of the \ - cell distance over the cell width. */ +#define const_theta_max 0.57735f +#define const_G 6.672e-8f /* Gravitational constant. */ +#define const_epsilon 0.0014f /* Gravity blending distance. */ -#define const_G 6.672e-8f /* Gravitational constant. */ -#define const_epsilon 0.0014f /* Gravity blending distance. */ -#define const_iepsilon 714.285714286f /* Inverse gravity blending distance. */ -#define const_iepsilon2 (const_iepsilon* const_iepsilon) -#define const_iepsilon3 (const_iepsilon2* const_iepsilon) -#define const_iepsilon4 (const_iepsilon2* const_iepsilon2) -#define const_iepsilon5 (const_iepsilon3* const_iepsilon2) -#define const_iepsilon6 (const_iepsilon3* const_iepsilon3) +/* Kernel function to use */ +#define CUBIC_SPLINE_KERNEL +//#define QUARTIC_SPLINE_KERNEL +//#define QUINTIC_SPLINE_KERNEL +//#define WENDLAND_C2_KERNEL +//#define WENDLAND_C4_KERNEL +//#define WENDLAND_C6_KERNEL /* SPH variant to use */ //#define MINIMAL_SPH #define GADGET2_SPH //#define DEFAULT_SPH +/* Gravity properties */ +#define EXTERNAL_POTENTIAL_POINTMASS + #endif /* SWIFT_CONST_H */ diff --git a/src/engine.c b/src/engine.c index 625d43aaef3d62b6bc993f1659a96ed0a431cd33..188386078fc6e06612c924adf8aede77798c5c60 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1,7 +1,11 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * Angus Lepper (angus.lepper@ed.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -54,6 +58,9 @@ #include "minmax.h" #include "part.h" #include "partition.h" +#include "parallel_io.h" +#include "serial_io.h" +#include "single_io.h" #include "timers.h" const char *engine_policy_names[13] = { @@ -106,9 +113,12 @@ void engine_make_ghost_tasks(struct engine *e, struct cell *c, struct cell *super) { struct scheduler *s = &e->sched; + const int is_with_external_gravity = + (e->policy & engine_policy_external_gravity) == + engine_policy_external_gravity; /* Am I the super-cell? */ - if (super == NULL && c->nr_tasks > 0) { + if (super == NULL && (c->count > 0 || c->gcount > 0)) { /* Remember me. */ super = c; @@ -116,18 +126,32 @@ void engine_make_ghost_tasks(struct engine *e, struct cell *c, /* Local tasks only... */ if (c->nodeID == e->nodeID) { - /* Generate the ghost task. */ - c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, - c, NULL, 0); - /* Add the drift task. */ - c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0, - c, NULL, 0); /* Add the init task. */ c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c, NULL, 0); + + /* Add the drift task. */ + c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0, + c, NULL, 0); + /* Add the kick task. */ c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, 0, c, NULL, 0); + + if (c->count > 0) { + + /* Generate the ghost task. */ + c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, + 0, c, NULL, 0); + } + + if (c->gcount > 0) { + + /* Add the external gravity tasks */ + if (is_with_external_gravity) + c->grav_external = scheduler_addtask( + s, task_type_grav_external, task_subtype_none, 0, 0, c, NULL, 0); + } } } @@ -216,27 +240,29 @@ void engine_redistribute(struct engine *e) { space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose); /* We need to re-link the gpart partners of parts. */ - int current_dest = dest[0]; - size_t count_this_dest = 0; - for (size_t k = 0; k < s->nr_parts; ++k) { - if (s->parts[k].gpart != NULL) { - - /* As the addresses will be invalidated by the communications, we will */ - /* instead store the absolute index from the start of the sub-array */ - /* of particles to be sent to a given node. */ - /* Recall that gparts without partners have a negative id. */ - /* We will restore the pointers on the receiving node later on. */ - if (dest[k] != current_dest) { - current_dest = dest[k]; - count_this_dest = 0; - } + if (s->nr_parts > 0) { + int current_dest = dest[0]; + size_t count_this_dest = 0; + for (size_t k = 0; k < s->nr_parts; ++k) { + if (s->parts[k].gpart != NULL) { + + /* As the addresses will be invalidated by the communications, we will + * instead store the absolute index from the start of the sub-array of + * particles to be sent to a given node. + * Recall that gparts without partners have a negative id. + * We will restore the pointers on the receiving node later on. */ + if (dest[k] != current_dest) { + current_dest = dest[k]; + count_this_dest = 0; + } - /* Debug */ - /* if(s->parts[k].gpart->id < 0) */ - /* error("Trying to link a partnerless gpart !"); */ + /* Debug */ + /* if(s->parts[k].gpart->id < 0) */ + /* error("Trying to link a partnerless gpart !"); */ - s->parts[k].gpart->id = count_this_dest; - count_this_dest++; + s->parts[k].gpart->id = count_this_dest; + count_this_dest++; + } } } @@ -397,7 +423,6 @@ void engine_redistribute(struct engine *e) { if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { for (int k = 0; k < 6 * nr_nodes; k++) { char buff[MPI_MAX_ERROR_STRING]; - int res; MPI_Error_string(stats[k].MPI_ERROR, buff, &res); message("request %i has error '%s'.", k, buff); } @@ -825,7 +850,10 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, ticks tic = getticks(); /* Re-set the proxies. */ - for (int k = 0; k < e->nr_proxies; k++) e->proxies[k].nr_parts_out = 0; + for (int k = 0; k < e->nr_proxies; k++) { + e->proxies[k].nr_parts_out = 0; + e->proxies[k].nr_gparts_out = 0; + } /* Put the parts and gparts into the corresponding proxies. */ for (size_t k = 0; k < *Npart; k++) { @@ -845,7 +873,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Re-link the associated gpart with the buffer offset of the part. */ if (s->parts[offset_parts + k].gpart != NULL) { - s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_in; + s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_out; } /* Load the part and xpart into the proxy. */ @@ -904,6 +932,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, count_parts_in, count_gparts_in); } if (offset_parts + count_parts_in > s->size_parts) { + message("re-allocating parts array."); s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; struct part *parts_new = NULL; struct xpart *xparts_new = NULL; @@ -918,8 +947,14 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; + for (size_t k = 0; k < offset_parts; k++) { + if (s->parts[k].gpart != NULL) { + s->parts[k].gpart->part = &s->parts[k]; + } + } } if (offset_gparts + count_gparts_in > s->size_gparts) { + message("re-allocating gparts array."); s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; struct gpart *gparts_new = NULL; if (posix_memalign((void **)&gparts_new, gpart_align, @@ -928,6 +963,11 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); free(s->gparts); s->gparts = gparts_new; + for (size_t k = 0; k < offset_gparts; k++) { + if (s->gparts[k].id > 0) { + s->gparts[k].part->gpart = &s->gparts[k]; + } + } } /* Collect the requests for the particle data from the proxies. */ @@ -982,7 +1022,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, reqs_in[pid + 1] == MPI_REQUEST_NULL && reqs_in[pid + 2] == MPI_REQUEST_NULL) { /* Copy the particle data to the part/xpart/gpart arrays. */ - struct proxy *p = &e->proxies[pid >> 1]; + struct proxy *p = &e->proxies[pid / 3]; memcpy(&s->parts[offset_parts + count_parts], p->parts_in, sizeof(struct part) * p->nr_parts_in); memcpy(&s->xparts[offset_parts + count_parts], p->xparts_in, @@ -999,7 +1039,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, for (int k = 0; k < p->nr_gparts_in; k++) { struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k]; if (gp->id >= 0) { - struct part *p = &s->parts[offset_gparts + count_parts + gp->id]; + struct part *p = &s->parts[offset_parts + count_parts + gp->id]; gp->part = p; p->gpart = gp; } @@ -1172,6 +1212,26 @@ void engine_count_and_link_tasks(struct engine *e) { } } +/** + * @brief Creates the dependency network for the hydro tasks of a given cell. + * + * @param sched The #scheduler. + * @param density The density task to link. + * @param force The force task to link. + * @param c The cell. + */ +static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, + struct task *density, + struct task *force, + struct cell *c) { + + /* init --> density loop --> ghost --> force loop --> kick */ + scheduler_addunlock(sched, c->super->init, density); + scheduler_addunlock(sched, density, c->super->ghost); + scheduler_addunlock(sched, c->super->ghost, force); + scheduler_addunlock(sched, force, c->super->kick); +} + /** * @brief Duplicates the first hydro loop and construct all the * dependencies for the hydro part @@ -1210,11 +1270,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { atomic_inc(&t->ci->nr_force); /* Now, build all the dependencies for the hydro */ - /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ - scheduler_addunlock(sched, t->ci->super->init, t); - scheduler_addunlock(sched, t, t->ci->super->ghost); - scheduler_addunlock(sched, t->ci->super->ghost, t2); - scheduler_addunlock(sched, t2, t->ci->super->kick); + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); } /* Otherwise, pair interaction? */ @@ -1232,18 +1288,11 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ - /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ if (t->ci->nodeID == nodeID) { - scheduler_addunlock(sched, t->ci->super->init, t); - scheduler_addunlock(sched, t, t->ci->super->ghost); - scheduler_addunlock(sched, t->ci->super->ghost, t2); - scheduler_addunlock(sched, t2, t->ci->super->kick); + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); } if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - scheduler_addunlock(sched, t->cj->super->init, t); - scheduler_addunlock(sched, t, t->cj->super->ghost); - scheduler_addunlock(sched, t->cj->super->ghost, t2); - scheduler_addunlock(sched, t2, t->cj->super->kick); + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); } } @@ -1265,23 +1314,24 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ - /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ if (t->ci->nodeID == nodeID) { - scheduler_addunlock(sched, t, t->ci->super->ghost); - scheduler_addunlock(sched, t->ci->super->ghost, t2); - scheduler_addunlock(sched, t2, t->ci->super->kick); + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); } if (t->cj != NULL && t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - scheduler_addunlock(sched, t, t->cj->super->ghost); - scheduler_addunlock(sched, t->cj->super->ghost, t2); - scheduler_addunlock(sched, t2, t->cj->super->kick); + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); } } /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */ /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */ /* scheduler_addunlock(sched, t->ci->grav_down, t); */ + + /* External gravity tasks should depend on init and unlock the kick */ + else if (t->type == task_type_grav_external) { + scheduler_addunlock(sched, t->ci->init, t); + scheduler_addunlock(sched, t, t->ci->kick); + } } } @@ -1383,7 +1433,7 @@ void engine_maketasks(struct engine *e) { engine_make_hydroloop_tasks(e); /* Add the gravity mm tasks. */ - if ((e->policy & engine_policy_self_gravity) == engine_policy_self_gravity) + if (e->policy & engine_policy_self_gravity) engine_make_gravityinteraction_tasks(e); /* Split the tasks. */ @@ -1399,7 +1449,7 @@ void engine_maketasks(struct engine *e) { e->nr_links = 0; /* Add the gravity up/down tasks at the top-level cells and push them down. */ - if ((e->policy & engine_policy_self_gravity) == engine_policy_self_gravity) + if (e->policy & engine_policy_self_gravity) engine_make_gravityrecursive_tasks(e); /* Count the number of tasks associated with each cell and @@ -1418,7 +1468,7 @@ void engine_maketasks(struct engine *e) { engine_make_extra_hydroloop_tasks(e); /* Add the communication tasks if MPI is being used. */ - if ((e->policy & engine_policy_mpi) == engine_policy_mpi) { + if (e->policy & engine_policy_mpi) { /* Loop over the proxies. */ for (int pid = 0; pid < e->nr_proxies; pid++) { @@ -1471,7 +1521,7 @@ int engine_marktasks(struct engine *e) { const ticks tic = getticks(); /* Much less to do here if we're on a fixed time-step. */ - if ((e->policy & engine_policy_fixdt) == engine_policy_fixdt) { + if (e->policy & engine_policy_fixdt) { /* Run through the tasks and mark as skip or not. */ for (int k = 0; k < nr_tasks; k++) { @@ -1760,7 +1810,7 @@ void engine_collect_kick(struct cell *c) { int ti_end_min = max_nr_timesteps, ti_end_max = 0; /* Only do something is the cell is non-empty */ - if (c->count != 0) { + if (c->count != 0 || c->gcount != 0) { /* If this cell is not split, I'm in trouble. */ if (!c->split) error("Cell has no super-cell."); @@ -1855,17 +1905,18 @@ void engine_init_particles(struct engine *e) { struct space *s = e->s; + struct clocks_time time1, time2; + clocks_gettime(&time1); + if (e->nodeID == 0) message("Initialising particles"); /* Make sure all particles are ready to go */ /* i.e. clean-up any stupid state in the ICs */ - if ((e->policy & engine_policy_hydro) == engine_policy_hydro) { + if (e->policy & engine_policy_hydro) { space_map_cells_pre(s, 1, cell_init_parts, NULL); } - if (((e->policy & engine_policy_self_gravity) == - engine_policy_self_gravity) || - ((e->policy & engine_policy_external_gravity) == - engine_policy_external_gravity)) { + if ((e->policy & engine_policy_self_gravity) || + (e->policy & engine_policy_external_gravity)) { space_map_cells_pre(s, 1, cell_init_gparts, NULL); } @@ -1873,22 +1924,17 @@ void engine_init_particles(struct engine *e) { engine_marktasks(e); - // printParticle(e->s->parts, 1000, e->s->nr_parts); - // printParticle(e->s->parts, 515050, e->s->nr_parts); - - // message("\n0th DENSITY CALC\n"); - /* Build the masks corresponding to the policy */ unsigned int mask = 0; unsigned int submask = 0; /* We always have sort tasks */ mask |= 1 << task_type_sort; + mask |= 1 << task_type_init; /* Add the tasks corresponding to hydro operations to the masks */ - if ((e->policy & engine_policy_hydro) == engine_policy_hydro) { + if (e->policy & engine_policy_hydro) { - mask |= 1 << task_type_init; mask |= 1 << task_type_self; mask |= 1 << task_type_pair; mask |= 1 << task_type_sub; @@ -1898,20 +1944,19 @@ void engine_init_particles(struct engine *e) { } /* Add the tasks corresponding to self-gravity to the masks */ - if ((e->policy & engine_policy_self_gravity) == engine_policy_self_gravity) { + if (e->policy & engine_policy_self_gravity) { /* Nothing here for now */ } - /* Add the tasks corresponding to self-gravity to the masks */ - if ((e->policy & engine_policy_external_gravity) == - engine_policy_external_gravity) { + /* Add the tasks corresponding to external gravity to the masks */ + if (e->policy & engine_policy_external_gravity) { - /* Nothing here for now */ + mask |= 1 << task_type_grav_external; } /* Add MPI tasks if need be */ - if ((e->policy & engine_policy_mpi) == engine_policy_mpi) { + if (e->policy & engine_policy_mpi) { mask |= 1 << task_type_send; mask |= 1 << task_type_recv; @@ -1922,16 +1967,14 @@ void engine_init_particles(struct engine *e) { engine_launch(e, e->nr_threads, mask, submask); TIMER_TOC(timer_runners); - // message("\n0th ENTROPY CONVERSION\n") - /* Apply some conversions (e.g. internal energy -> entropy) */ - space_map_cells_pre(s, 1, cell_convert_hydro, NULL); + space_map_cells_pre(s, 0, cell_convert_hydro, NULL); - // printParticle(e->s->parts, e->s->xparts,1000, e->s->nr_parts); - // printParticle(e->s->parts, e->s->xparts,515050, e->s->nr_parts); + clocks_gettime(&time2); /* Ready to go */ e->step = -1; + e->wallclock_time = (float)clocks_diff(&time1, &time2); } /** @@ -2011,7 +2054,24 @@ void engine_step(struct engine *e) { } #endif - // message("\nDRIFT\n"); + /* Check for output */ + while (ti_end_min >= e->ti_nextSnapshot && e->ti_nextSnapshot > 0) { + + e->ti_old = e->ti_current; + e->ti_current = e->ti_nextSnapshot; + e->time = e->ti_current * e->timeBase + e->timeBegin; + e->timeOld = e->ti_old * e->timeBase + e->timeBegin; + e->timeStep = (e->ti_current - e->ti_old) * e->timeBase; + + /* Drift everybody to the snapshot position */ + engine_launch(e, e->nr_threads, 1 << task_type_drift, 0); + + /* Dump... */ + engine_dump_snapshot(e); + + /* ... and find the next output time */ + engine_compute_next_snapshot_time(e); + } /* Move forward in time */ e->ti_old = e->ti_current; @@ -2024,11 +2084,6 @@ void engine_step(struct engine *e) { /* Drift everybody */ engine_launch(e, e->nr_threads, 1 << task_type_drift, 0); - // printParticle(e->s->parts, e->s->xparts, 1000, e->s->nr_parts); - // printParticle(e->s->parts, e->s->xparts, 515050, e->s->nr_parts); - - // if(e->step == 2) exit(0); - if (e->nodeID == 0) { /* Print some information to the screen */ @@ -2043,8 +2098,6 @@ void engine_step(struct engine *e) { fflush(e->file_stats); } - // message("\nACCELERATION AND KICK\n"); - /* Re-distribute the particles amongst the nodes? */ if (e->forcerepart != REPART_NONE) engine_repartition(e); @@ -2057,11 +2110,11 @@ void engine_step(struct engine *e) { /* We always have sort tasks and kick tasks */ mask |= 1 << task_type_sort; mask |= 1 << task_type_kick; + mask |= 1 << task_type_init; /* Add the tasks corresponding to hydro operations to the masks */ - if ((e->policy & engine_policy_hydro) == engine_policy_hydro) { + if (e->policy & engine_policy_hydro) { - mask |= 1 << task_type_init; mask |= 1 << task_type_self; mask |= 1 << task_type_pair; mask |= 1 << task_type_sub; @@ -2072,20 +2125,18 @@ void engine_step(struct engine *e) { } /* Add the tasks corresponding to self-gravity to the masks */ - if ((e->policy & engine_policy_self_gravity) == engine_policy_self_gravity) { + if (e->policy & engine_policy_self_gravity) { /* Nothing here for now */ } - /* Add the tasks corresponding to self-gravity to the masks */ - if ((e->policy & engine_policy_external_gravity) == - engine_policy_external_gravity) { - - /* Nothing here for now */ + /* Add the tasks corresponding to external gravity to the masks */ + if (e->policy & engine_policy_external_gravity) { + mask |= 1 << task_type_grav_external; } /* Add MPI tasks if need be */ - if ((e->policy & engine_policy_mpi) == engine_policy_mpi) { + if (e->policy & engine_policy_mpi) { mask |= 1 << task_type_send; mask |= 1 << task_type_recv; @@ -2101,8 +2152,6 @@ void engine_step(struct engine *e) { clocks_gettime(&time2); e->wallclock_time = (float)clocks_diff(&time1, &time2); - // printParticle(e->s->parts, e->s->xparts,1000, e->s->nr_parts); - // printParticle(e->s->parts, e->s->xparts,515050, e->s->nr_parts); } /** @@ -2302,6 +2351,37 @@ void engine_split(struct engine *e, struct partition *initial_partition) { #endif } +/** + * @brief Writes a snapshot with the current state of the engine + * + * @param e The #engine. + */ +void engine_dump_snapshot(struct engine *e) { + + struct clocks_time time1, time2; + clocks_gettime(&time1); + + if (e->verbose) message("writing snapshot at t=%f.", e->time); + +/* Dump... */ +#if defined(WITH_MPI) +#if defined(HAVE_PARALLEL_HDF5) + write_output_parallel(e, e->snapshotBaseName, e->snapshotUnits, e->nodeID, + e->nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); +#else + write_output_serial(e, e->snapshotBaseName, e->snapshotUnits, e->nodeID, + e->nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); +#endif +#else + write_output_single(e, e->snapshotBaseName, e->snapshotUnits); +#endif + + clocks_gettime(&time2); + if (e->verbose) + message("writing particle properties took %.3f %s.", + (float)clocks_diff(&time1, &time2), clocks_getunit()); +} + #ifdef HAVE_SETAFFINITY /** * @brief Returns the initial affinity the main thread is using. @@ -2328,7 +2408,6 @@ void engine_pin() { #ifdef HAVE_SETAFFINITY cpu_set_t *entry_affinity = engine_entry_affinity(); - int pin; for (pin = 0; pin < CPU_SETSIZE && !CPU_ISSET(pin, entry_affinity); ++pin) ; @@ -2365,20 +2444,27 @@ void engine_unpin() { * @param params The parsed parameter file. * @param nr_nodes The number of MPI ranks. * @param nodeID The MPI rank of this node. + * @param nr_threads The number of threads per MPI rank. * @param policy The queuing policy to use. * @param verbose Is this #engine talkative ? + * @param physical_constants The #phys_const used for this run. + * @param hydro The #hydro_props used for this run. + * @param potential The properties of the external potential. */ void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, - int policy, int verbose) { + int nr_threads, int policy, int verbose, + const struct phys_const *physical_constants, + const struct hydro_props *hydro, + const struct external_potential *potential) { /* Clean-up everything */ bzero(e, sizeof(struct engine)); /* Store the values. */ e->s = s; - e->nr_threads = parser_get_param_int(params, "Scheduler:nr_threads"); + e->nr_threads = nr_threads; e->policy = policy; e->step = 0; e->nr_nodes = nr_nodes; @@ -2396,12 +2482,24 @@ void engine_init(struct engine *e, struct space *s, e->ti_old = 0; e->ti_current = 0; e->timeStep = 0.; + e->timeBase = 0.; + e->timeFirstSnapshot = + parser_get_param_double(params, "Snapshots:time_first"); + e->deltaTimeSnapshot = + parser_get_param_double(params, "Snapshots:delta_time"); + e->ti_nextSnapshot = 0; + parser_get_param_string(params, "Snapshots:basename", e->snapshotBaseName); + e->snapshotUnits = malloc(sizeof(struct UnitSystem)); + units_init(e->snapshotUnits, params, "Snapshots"); e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min"); e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max"); e->file_stats = NULL; e->verbose = verbose; e->count_step = 0; e->wallclock_time = 0.f; + e->physical_constants = physical_constants; + e->hydro_properties = hydro; + e->external_potential = potential; engine_rank = nodeID; /* Make the space link back to the engine. */ @@ -2515,12 +2613,8 @@ void engine_init(struct engine *e, struct space *s, engine_print_policy(e); /* Print information about the hydro scheme */ - if ((e->policy & engine_policy_hydro) == engine_policy_hydro) { - if (e->nodeID == 0) message("Hydrodynamic scheme: %s.", SPH_IMPLEMENTATION); - if (e->nodeID == 0) - message("Hydrodynamic kernel: %s with %.2f +/- %.2f neighbours.", - kernel_name, kernel_nwneigh, const_delta_nwneigh); - } + if (e->policy & engine_policy_hydro) + if (e->nodeID == 0) hydro_props_print(e->hydro_properties); /* Check we have sensible time bounds */ if (e->timeBegin >= e->timeEnd) @@ -2541,7 +2635,7 @@ void engine_init(struct engine *e, struct space *s, e->ti_current = 0; /* Fixed time-step case */ - if ((e->policy & engine_policy_fixdt) == engine_policy_fixdt) { + if (e->policy & engine_policy_fixdt) { e->dt_min = e->dt_max; /* Find timestep on the timeline */ @@ -2578,6 +2672,19 @@ void engine_init(struct engine *e, struct space *s, error("Maximal time-step size larger than the simulation run time t=%e", e->timeEnd - e->timeBegin); + /* Deal with outputs */ + if (e->deltaTimeSnapshot < 0.) + error("Time between snapshots (%e) must be positive.", + e->deltaTimeSnapshot); + + if (e->timeFirstSnapshot < e->timeBegin) + error( + "Time of first snapshot (%e) must be after the simulation start t=%e.", + e->timeFirstSnapshot, e->timeBegin); + + /* Find the time of the first output */ + engine_compute_next_snapshot_time(e); + /* Construct types for MPI communications */ #ifdef WITH_MPI part_create_mpi_types(); @@ -2650,8 +2757,8 @@ void engine_init(struct engine *e, struct space *s, e->runners[k].cpuid = k; e->runners[k].qid = k * nr_queues / e->nr_threads; } - // message( "runner %i on cpuid=%i with qid=%i." , e->runners[k].id , - // e->runners[k].cpuid , e->runners[k].qid ); + /* message( "runner %i on cpuid=%i with qid=%i." , e->runners[k].id , */ + /* e->runners[k].cpuid , e->runners[k].qid ); */ } /* Free the affinity stuff */ @@ -2691,3 +2798,33 @@ void engine_print_policy(struct engine *e) { fflush(stdout); #endif } + +/** + * @brief Computes the next time (on the time line) for a dump + * + * @param e The #engine. + */ +void engine_compute_next_snapshot_time(struct engine *e) { + + for (double time = e->timeFirstSnapshot; + time < e->timeEnd + e->deltaTimeSnapshot; time += e->deltaTimeSnapshot) { + + /* Output time on the integer timeline */ + e->ti_nextSnapshot = (time - e->timeBegin) / e->timeBase; + + if (e->ti_nextSnapshot > e->ti_current) break; + } + + /* Deal with last snapshot */ + if (e->ti_nextSnapshot >= max_nr_timesteps) { + e->ti_nextSnapshot = -1; + if (e->verbose) message("No further output time."); + } else { + + /* Be nice, talk... */ + const float next_snapshot_time = + e->ti_nextSnapshot * e->timeBase + e->timeBegin; + if (e->verbose) + message("Next output time set to t=%f.", next_snapshot_time); + } +} diff --git a/src/engine.h b/src/engine.h index 31527ad1246a4a40f675c1237384ab44dbd09bb2..15abc4b6393fec97582e2ec3c7551f5f64f9a0e4 100644 --- a/src/engine.h +++ b/src/engine.h @@ -1,6 +1,11 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * Angus Lepper (angus.lepper@ed.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -32,6 +37,7 @@ #include <stdio.h> /* Includes. */ +#include "hydro_properties.h" #include "lock.h" #include "proxy.h" #include "runner.h" @@ -40,6 +46,9 @@ #include "task.h" #include "parser.h" #include "partition.h" +#include "physical_constants.h" +#include "potentials.h" +#include "units.h" /* Some constants. */ enum engine_policy { @@ -124,6 +133,13 @@ struct engine { /* Time base */ double timeBase; + /* Snapshot information */ + double timeFirstSnapshot; + double deltaTimeSnapshot; + int ti_nextSnapshot; + char snapshotBaseName[200]; + struct UnitSystem *snapshotUnits; + /* File for statistics */ FILE *file_stats; @@ -164,13 +180,27 @@ struct engine { /* Are we talkative ? */ int verbose; + + /* Physical constants definition */ + const struct phys_const *physical_constants; + + /* Properties of the hydro scheme */ + const struct hydro_props *hydro_properties; + + /* Properties of external gravitational potential */ + const struct external_potential *external_potential; }; /* Function prototypes. */ void engine_barrier(struct engine *e, int tid); +void engine_compute_next_snapshot_time(struct engine *e); +void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, - int policy, int verbose); + int nr_threads, int policy, int verbose, + const struct phys_const *physical_constants, + const struct hydro_props *hydro, + const struct external_potential *potential); void engine_launch(struct engine *e, int nr_runners, unsigned int mask, unsigned int submask); void engine_prepare(struct engine *e); diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index 92a9f64c1f84a9e949f4c0e9485f892b5c808cdc..4dd8d5b7c7194ae072f90e7703907500c208b9b9 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -1,6 +1,7 @@ /******************************************************************************* * This file is part of SWIFT. * Coypright (c) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2016 Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -18,19 +19,50 @@ ******************************************************************************/ #include <float.h> +#include "potentials.h" /** - * @brief Computes the gravity time-step of a given particle + * @brief Computes the gravity time-step of a given particle due to an external + *potential. * - * @param gp Pointer to the g-particle data + * This function only branches towards the potential chosen by the user. * + * @param potential The properties of the external potential. + * @param phys_const The physical constants in internal units. + * @param gp Pointer to the g-particle data. */ +__attribute__((always_inline)) + INLINE static float gravity_compute_timestep_external( + const struct external_potential* potential, + const struct phys_const* const phys_const, + const struct gpart* const gp) { + + float dt = FLT_MAX; + +#ifdef EXTERNAL_POTENTIAL_POINTMASS + dt = + fminf(dt, external_gravity_pointmass_timestep(potential, phys_const, gp)); +#endif + + return dt; +} +/** + * @brief Computes the gravity time-step of a given particle due to self-gravity + * + * This function only branches towards the potential chosen by the user. + * + * @param phys_const The physical constants in internal units. + * @param gp Pointer to the g-particle data. + */ __attribute__((always_inline)) - INLINE static float gravity_compute_timestep(struct gpart* gp) { + INLINE static float gravity_compute_timestep_self( + const struct phys_const* const phys_const, + const struct gpart* const gp) { - /* Currently no limit is imposed */ - return FLT_MAX; + float dt = FLT_MAX; + + return dt; } /** @@ -71,6 +103,24 @@ __attribute__((always_inline)) __attribute__((always_inline)) INLINE static void gravity_end_force(struct gpart* gp) {} +/** + * @brief Computes the gravitational acceleration induced by external potentials + * + * This function only branches towards the potential chosen by the user. + * + * @param potential The properties of the external potential. + * @param phys_const The physical constants in internal units. + * @param gp The particle to act upon. + */ +__attribute__((always_inline)) INLINE static void external_gravity( + const struct external_potential* potential, + const struct phys_const* const phys_const, struct gpart* gp) { + +#ifdef EXTERNAL_POTENTIAL_POINTMASS + external_gravity_pointmass(potential, phys_const, gp); +#endif +} + /** * @brief Kick the additional variables * diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h index 654745bfeb70dddba772af9e23797713376377a7..72ae5fd55f399448d78c6f2d1533c7f007f16dd1 100644 --- a/src/gravity/Default/gravity_debug.h +++ b/src/gravity/Default/gravity_debug.h @@ -24,5 +24,6 @@ __attribute__((always_inline)) "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "mass=%.3e t_begin=%d, t_end=%d\n", p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2], - p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin, p->ti_end); + p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin, + p->ti_end); } diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index 0dfdb82e4ec11c9153f77439027d7e4451ded7f4..a54fc1d18e7e84a1ca7a7e7913117ed207549271 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -25,6 +25,9 @@ struct gpart { /* Particle position. */ double x[3]; + /* Offset between current position and position at last tree rebuild. */ + float x_diff[3]; + /* Particle velocity. */ float v_full[3]; @@ -40,6 +43,10 @@ struct gpart { /* Particle time of end of time-step. */ int ti_end; + /* /\* current time of x, and of v_full *\/ */ + /* float tx; */ + /* float tv; */ + /* Anonymous union for id/part. */ union { diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index 03953b07ad4e172d96b6e3382814e036a538e2bd..b4ed2daf385691958d5849b0ac2d651ac6f98d34 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -27,10 +27,14 @@ * */ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( - struct part* p, struct xpart* xp) { + struct part* p, struct xpart* xp, + const struct hydro_props* hydro_properties) { + + const float CFL_condition = hydro_properties->CFL_condition; /* CFL condition */ - const float dt_cfl = 2.f * const_cfl * kernel_gamma * p->h / p->force.v_sig; + const float dt_cfl = + 2.f * kernel_gamma * CFL_condition * p->h / p->force.v_sig; /* Limit change in u */ const float dt_u_change = @@ -93,7 +97,7 @@ __attribute__((always_inline)) /* Final operation on the density (add self-contribution). */ p->rho += p->mass * kernel_root; - p->rho_dh -= 3.0f * p->mass * kernel_root * kernel_igamma; + p->rho_dh -= 3.0f * p->mass * kernel_root; p->density.wcount += kernel_root; /* Finish the calculation by inserting the missing h-factors */ @@ -101,6 +105,11 @@ __attribute__((always_inline)) p->rho_dh *= ih4; p->density.wcount *= (4.0f / 3.0f * M_PI * kernel_gamma3); p->density.wcount_dh *= ih * (4.0f / 3.0f * M_PI * kernel_gamma4); + + const float irho = 1.f / p->rho; + + /* Compute the derivative term */ + p->rho_dh = 1.f / (1.f + 0.33333333f * p->h * p->rho_dh * irho); } /** diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h index 0e9ad46ddc1d4e8c8d3ffdbf3e81262ec49a7092..09a7f4c7caa6a2a9fcf77c8d83b0ffcab2ef8b54 100644 --- a/src/hydro/Default/hydro_io.h +++ b/src/hydro/Default/hydro_io.h @@ -104,10 +104,10 @@ void writeSPHflavour(hid_t h_grpsph) { /* Kernel function description */ writeAttribute_s(h_grpsph, "Kernel", kernel_name); - writeAttribute_f(h_grpsph, "Kernel eta", const_eta_kernel); - writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh); - writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh); - writeAttribute_f(h_grpsph, "Hydro gamma", const_hydro_gamma); + // writeAttribute_f(h_grpsph, "Kernel eta", const_eta_kernel); + // writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh); + // writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh); + // writeAttribute_f(h_grpsph, "Hydro gamma", const_hydro_gamma); /* Viscosity and thermal conduction */ writeAttribute_s(h_grpsph, "Thermal Conductivity Model", @@ -123,11 +123,11 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_f(h_grpsph, "Viscosity decay length", const_viscosity_length); /* Time integration properties */ - writeAttribute_f(h_grpsph, "CFL parameter", const_cfl); - writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt", - const_ln_max_h_change); - writeAttribute_f(h_grpsph, "Maximal Delta h change over dt", - exp(const_ln_max_h_change)); + // writeAttribute_f(h_grpsph, "CFL parameter", const_cfl); + // writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt", + // const_ln_max_h_change); + // writeAttribute_f(h_grpsph, "Maximal Delta h change over dt", + // exp(const_ln_max_h_change)); writeAttribute_f(h_grpsph, "Maximal Delta u change over dt", const_max_u_change); } diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 60453d0c7995f7af2a3166502a24aa590873a043..a4096d5c30d525307d5327559ef6b007c6931486 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -20,8 +20,8 @@ /* Extra particle data not needed during the SPH loops over neighbours. */ struct xpart { - /* Old position, at last tree rebuild. */ - double x_old[3]; + /* Offset between current position and position at last tree rebuild. */ + float x_diff[3]; /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 22c5734ed5762400285521b30f9aa60795c45325..9fa1aae9dbb13ac3aa7e2271f30ef57e91ae85ae 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -25,20 +25,16 @@ * */ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( - struct part* p, struct xpart* xp) { + struct part* p, struct xpart* xp, + const struct hydro_props* hydro_properties) { - /* Acceleration */ - float ac = - sqrtf(p->a_hydro[0] * p->a_hydro[0] + p->a_hydro[1] * p->a_hydro[1] + - p->a_hydro[2] * p->a_hydro[2]); - ac = fmaxf(ac, 1e-30); - - const float dt_accel = sqrtf(2.f); // MATTHIEU + const float CFL_condition = hydro_properties->CFL_condition; /* CFL condition */ - const float dt_cfl = 2.f * const_cfl * kernel_gamma * p->h / p->force.v_sig; + const float dt_cfl = + 2.f * kernel_gamma * CFL_condition * p->h / p->force.v_sig; - return fminf(dt_cfl, dt_accel); + return dt_cfl; } /** @@ -94,7 +90,7 @@ __attribute__((always_inline)) /* Final operation on the density (add self-contribution). */ p->rho += p->mass * kernel_root; - p->rho_dh -= 3.0f * p->mass * kernel_root * kernel_igamma; + p->rho_dh -= 3.0f * p->mass * kernel_root; p->density.wcount += kernel_root; /* Finish the calculation by inserting the missing h-factors */ diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h index a4d1f7dd4397ebfc850b582e1ca81fc0d4edb76a..4f00d8e020f9eaac30fe7f272ca1248d7e4eec58 100644 --- a/src/hydro/Gadget2/hydro_debug.h +++ b/src/hydro/Gadget2/hydro_debug.h @@ -23,14 +23,15 @@ __attribute__((always_inline)) "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "h=%.3e, " - "wcount=%d, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, P=%.3e, S=%.3e, " + "wcount=%d, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, P=%.3e, " + "S=%.3e, " "dS/dt=%.3e, c=%.3e\n" "divV=%.3e, curlV=%.3e, rotV=[%.3e,%.3e,%.3e] \n " "v_sig=%e dh/dt=%.3e t_begin=%d, t_end=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], - p->h, (int)p->density.wcount, p->density.wcount_dh, p->mass, p->rho_dh, p->rho, - p->force.pressure, p->entropy, p->entropy_dt, p->force.soundspeed, + p->h, (int)p->density.wcount, p->density.wcount_dh, p->mass, p->rho_dh, + p->rho, p->force.pressure, p->entropy, p->entropy_dt, p->force.soundspeed, p->div_v, p->force.curl_v, p->density.rot_v[0], p->density.rot_v[1], p->density.rot_v[2], p->force.v_sig, p->h_dt, p->ti_begin, p->ti_end); } diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index d988c678affcf4ca722a965a7e52a7c120b4a924..8fcae293280e55bb838edf3c243f4e322914216f 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -65,7 +65,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( /* Compute contribution to the density */ pi->rho += mj * wi; - pi->rho_dh -= mj * kernel_igamma * (3.f * wi + ui * wi_dx); + pi->rho_dh -= mj * (3.f * wi + ui * wi_dx); /* Compute contribution to the number of neighbours */ pi->density.wcount += wi; @@ -78,7 +78,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( /* Compute contribution to the density */ pj->rho += mi * wj; - pj->rho_dh -= mi * kernel_igamma * (3.f * wj + uj * wj_dx); + pj->rho_dh -= mi * (3.f * wj + uj * wj_dx); /* Compute contribution to the number of neighbours */ pj->density.wcount += wj; @@ -134,7 +134,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( /* Compute contribution to the density */ pi->rho += mj * wi; - pi->rho_dh -= mj * kernel_igamma * (3.f * wi + u * wi_dx); + pi->rho_dh -= mj * (3.f * wi + u * wi_dx); /* Compute contribution to the number of neighbours */ pi->density.wcount += wi; diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index c1c59dfa4980a2843e7e13bee4c964c9b254cae6..41f3348db10172c1d0d5813debb50365252a413b 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -83,7 +83,7 @@ __attribute__((always_inline)) INLINE static void hydro_write_particles( writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Entropy", FLOAT, N, 1, parts, N_total, mpi_rank, offset, entropy, us, UNIT_CONV_ENTROPY_PER_UNIT_MASS); writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", @@ -104,10 +104,10 @@ void writeSPHflavour(hid_t h_grpsph) { /* Kernel function description */ writeAttribute_s(h_grpsph, "Kernel", kernel_name); - writeAttribute_f(h_grpsph, "Kernel eta", const_eta_kernel); - writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh); - writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh); - writeAttribute_f(h_grpsph, "Hydro gamma", const_hydro_gamma); + // writeAttribute_f(h_grpsph, "Kernel eta", const_eta_kernel); + // writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh); + // writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh); + // writeAttribute_f(h_grpsph, "Hydro gamma", const_hydro_gamma); /* Viscosity and thermal conduction */ writeAttribute_s(h_grpsph, "Thermal Conductivity Model", @@ -118,9 +118,9 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_f(h_grpsph, "Viscosity beta", 3.f); /* Time integration properties */ - writeAttribute_f(h_grpsph, "CFL parameter", const_cfl); - writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt", - const_ln_max_h_change); - writeAttribute_f(h_grpsph, "Maximal Delta h change over dt", - exp(const_ln_max_h_change)); + // writeAttribute_f(h_grpsph, "CFL parameter", const_cfl); + // writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt", + // const_ln_max_h_change); + // writeAttribute_f(h_grpsph, "Maximal Delta h change over dt", + // exp(const_ln_max_h_change)); } diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 05754d07dd70bed071e99c86b95eb17eb2194012..863bdbefde4543c1f1f6b0415dc6c229f3d58012 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -20,8 +20,8 @@ /* Extra particle data not needed during the SPH loops over neighbours. */ struct xpart { - /* Old position, at last tree rebuild. */ - double x_old[3]; + /* Offset between current position and position at last tree rebuild. */ + float x_diff[3]; /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index 7db3c275ce7e3389610e8297c287cbd5301c6c64..01957065cf5b1d89bb839449ac60b9ec965c2c47 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -27,13 +27,18 @@ * * @param p Pointer to the particle data * @param xp Pointer to the extended particle data + * @param hydro_properties The SPH parameters * */ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( - struct part* p, struct xpart* xp) { + struct part* p, struct xpart* xp, + const struct hydro_props* hydro_properties) { + + const float CFL_condition = hydro_properties->CFL_condition; /* CFL condition */ - const float dt_cfl = 2.f * const_cfl * kernel_gamma * p->h / p->force.v_sig; + const float dt_cfl = + 2.f * kernel_gamma * CFL_condition * p->h / p->force.v_sig; return dt_cfl; } @@ -94,7 +99,7 @@ __attribute__((always_inline)) /* Final operation on the density (add self-contribution). */ p->rho += p->mass * kernel_root; - p->rho_dh -= 3.0f * p->mass * kernel_root * kernel_igamma; + p->rho_dh -= 3.0f * p->mass * kernel_root; p->density.wcount += kernel_root; /* Finish the calculation by inserting the missing h-factors */ diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index afe5de83f423e43b4d2480cca1ac3e84d6c549de..43926d526b47fa8f1b92bb862edabefb7417de3c 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -104,9 +104,9 @@ void writeSPHflavour(hid_t h_grpsph) { /* Kernel function description */ writeAttribute_s(h_grpsph, "Kernel", kernel_name); - writeAttribute_f(h_grpsph, "Kernel eta", const_eta_kernel); - writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh); - writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh); + // writeAttribute_f(h_grpsph, "Kernel eta", const_eta_kernel); + // writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh); + // writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh); writeAttribute_f(h_grpsph, "Hydro gamma", const_hydro_gamma); /* Viscosity and thermal conduction */ @@ -115,11 +115,11 @@ void writeSPHflavour(hid_t h_grpsph) { writeAttribute_s(h_grpsph, "Viscosity Model", "No model"); /* Time integration properties */ - writeAttribute_f(h_grpsph, "CFL parameter", const_cfl); - writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt", - const_ln_max_h_change); - writeAttribute_f(h_grpsph, "Maximal Delta h change over dt", - exp(const_ln_max_h_change)); + // writeAttribute_f(h_grpsph, "CFL parameter", const_cfl); + // writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt", + // const_ln_max_h_change); + // writeAttribute_f(h_grpsph, "Maximal Delta h change over dt", + // exp(const_ln_max_h_change)); writeAttribute_f(h_grpsph, "Maximal Delta u change over dt", const_max_u_change); } diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index 173397ef2c72ee99f4d10742f3645afd1e706218..2580ef2a94eabda5a34de9d4e4b48227bc0e5146 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -26,7 +26,8 @@ */ struct xpart { - double x_old[3]; /*!< Old position, at last tree rebuild. */ + float x_diff[3]; /*!< Offset between current position and position at last + tree rebuild. */ float v_full[3]; /*!< Velocity at the last full step. */ diff --git a/src/hydro_properties.c b/src/hydro_properties.c new file mode 100644 index 0000000000000000000000000000000000000000..3c270b2702edb503c60cdb3cf1029a014d437124 --- /dev/null +++ b/src/hydro_properties.c @@ -0,0 +1,63 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* This object's header. */ +#include "hydro_properties.h" + +/* Standard headers */ +#include <float.h> +#include <math.h> + +/* Local headers. */ +#include "error.h" +#include "kernel_hydro.h" +#include "hydro.h" + +void hydro_props_init(struct hydro_props *p, + const struct swift_params *params) { + + /* Kernel properties */ + p->eta_neighbours = parser_get_param_float(params, "SPH:resolution_eta"); + const float eta3 = p->eta_neighbours * p->eta_neighbours * p->eta_neighbours; + p->target_neighbours = 4.0 * M_PI * kernel_gamma3 * eta3 / 3.0; + p->delta_neighbours = parser_get_param_float(params, "SPH:delta_neighbours"); + + /* Ghost stuff */ + p->max_smoothing_iterations = + parser_get_param_int(params, "SPH:max_ghost_iterations"); + + /* Time integration properties */ + p->CFL_condition = parser_get_param_float(params, "SPH:CFL_condition"); + const float max_volume_change = + parser_get_param_float(params, "SPH:max_volume_change"); + p->log_max_h_change = logf(powf(max_volume_change, 0.33333333333f)); +} + +void hydro_props_print(const struct hydro_props *p) { + + message("Hydrodynamic scheme: %s.", SPH_IMPLEMENTATION); + message("Hydrodynamic kernel: %s with %.2f +/- %.2f neighbours (eta=%f).", + kernel_name, p->target_neighbours, p->delta_neighbours, + p->eta_neighbours); + message("Hydrodynamic integration: CFL parmeter: %.4f.", p->CFL_condition); + message( + "Hydrodynamic integration: Max change of volume: %.2f" + "(max |dlog(h)/dt|=%f).", + powf(expf(p->log_max_h_change), 3.f), p->log_max_h_change); +} diff --git a/src/hydro_properties.h b/src/hydro_properties.h new file mode 100644 index 0000000000000000000000000000000000000000..c84252a1dc12f0e5591a7e512fdf4e246f4ab048 --- /dev/null +++ b/src/hydro_properties.h @@ -0,0 +1,56 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#ifndef SWIFT_HYDRO_PROPERTIES +#define SWIFT_HYDRO_PROPERTIES + +/* Config parameters. */ +#include "../config.h" + +/* Local includes. */ +#include "const.h" +#include "parser.h" + +/** + * @brief Contains all the constants and parameters of the hydro scheme + */ +struct hydro_props { + + /* Kernel properties */ + float eta_neighbours; + float target_neighbours; + float delta_neighbours; + + /* Kernel properties */ + int max_smoothing_iterations; + + /* Time integration properties */ + float CFL_condition; + float log_max_h_change; + +/* Viscosity parameters */ +#ifdef GADGET_SPH + float const_viscosity_alpha; +#endif +}; + +void hydro_props_print(const struct hydro_props *p); +void hydro_props_init(struct hydro_props *p, const struct swift_params *params); + +#endif /* SWIFT_HYDRO_PROPERTIES */ diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h index 7fd4b061a7e94be01a11b06ad23d9113f579ebb8..cca449efa6dde73eac428d1e20fa5c28156360fa 100644 --- a/src/kernel_gravity.h +++ b/src/kernel_gravity.h @@ -25,9 +25,12 @@ #include "inline.h" #include "vector.h" -/* Gravity kernel stuff - * ----------------------------------------------------------------------------------------------- - */ +#define const_iepsilon (1. / const_epsilon) +#define const_iepsilon2 (const_iepsilon *const_iepsilon) +#define const_iepsilon3 (const_iepsilon2 *const_iepsilon) +#define const_iepsilon4 (const_iepsilon2 *const_iepsilon2) +#define const_iepsilon5 (const_iepsilon3 *const_iepsilon2) +#define const_iepsilon6 (const_iepsilon3 *const_iepsilon3) /* The gravity kernel is defined as a degree 6 polynomial in the distance r. The resulting value should be post-multiplied with r^-3, resulting diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index 66f51391fb9504ba30363b1980aaad1fcc9174b7..a9e979151fd466ce4538895f52da24af1e48c81b 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -20,6 +20,8 @@ #ifndef SWIFT_KERNEL_HYDRO_H #define SWIFT_KERNEL_HYDRO_H +#include <math.h> + /* Includes. */ #include "const.h" #include "error.h" @@ -143,12 +145,6 @@ static const float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] #define kernel_igamma3 kernel_igamma2 *kernel_igamma #define kernel_igamma4 kernel_igamma3 *kernel_igamma -/* Some powers of eta */ -#define kernel_eta3 const_eta_kernel *const_eta_kernel *const_eta_kernel - -/* The number of neighbours (i.e. N_ngb) */ -#define kernel_nwneigh 4.0 * M_PI *kernel_gamma3 *kernel_eta3 / 3.0 - /* Kernel self contribution (i.e. W(0,h)) */ #define kernel_root \ (kernel_coeffs[kernel_degree]) * kernel_constant *kernel_igamma3 @@ -212,6 +208,61 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float u, *W = w * (float)kernel_constant * (float)kernel_igamma3; } +#ifdef VECTORIZE + +static const vector kernel_igamma_vec = FILL_VEC((float)kernel_igamma); + +static const vector kernel_ivals_vec = FILL_VEC((float)kernel_ivals); + +static const vector kernel_constant_vec = FILL_VEC((float)kernel_constant); + +static const vector kernel_igamma3_vec = FILL_VEC((float)kernel_igamma3); + +static const vector kernel_igamma4_vec = FILL_VEC((float)kernel_igamma4); + +/** + * @brief Computes the kernel function and its derivative (Vectorised version). + * + * Return 0 if $u > \\gamma = H/h$ + * + * @param u The ratio of the distance to the smoothing length $u = x/h$. + * @param w (return) The value of the kernel function $W(x,h)$. + * @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$. + */ +__attribute__((always_inline)) + INLINE static void kernel_deval_vec(vector *u, vector *w, vector *dw_dx) { + + /* Go to the range [0,1[ from [0,H[ */ + vector x; + x.v = u->v * kernel_igamma_vec.v; + + /* Load x and get the interval id. */ + vector ind; + ind.m = vec_ftoi(vec_fmin(x.v * kernel_ivals_vec.v, kernel_ivals_vec.v)); + + /* load the coefficients. */ + vector c[kernel_degree + 1]; + for (int k = 0; k < VEC_SIZE; k++) + for (int j = 0; j < kernel_degree + 1; j++) + c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; + + /* Init the iteration for Horner's scheme. */ + w->v = (c[0].v * x.v) + c[1].v; + dw_dx->v = c[0].v; + + /* And we're off! */ + for (int k = 2; k <= kernel_degree; k++) { + dw_dx->v = (dw_dx->v * x.v) + w->v; + w->v = (x.v * w->v) + c[k].v; + } + + /* Return everything */ + w->v = w->v * kernel_constant_vec.v * kernel_igamma3_vec.v; + dw_dx->v = dw_dx->v * kernel_constant_vec.v * kernel_igamma4_vec.v; +} + +#endif + /* Some cross-check functions */ void hydro_kernel_dump(int N); diff --git a/src/parallel_io.c b/src/parallel_io.c index d1c739b59021f38b2259f82dd06c547e0e7c147d..4579be8f04ae687140413383e061988c9783b570 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -509,8 +509,12 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, *its XMF descriptor * * @param e The engine containing all the system. - * @param us The UnitSystem used for the conversion of units - *in the output + * @param baseName The common part of the snapshot file name. + * @param us The UnitSystem used for the conversion of units in the output. + * @param mpi_rank The MPI rank of this node. + * @param mpi_size The number of MPI ranks. + * @param comm The MPI communicator. + * @param info The MPI information object * * Creates an HDF5 output file and writes the particles *contained @@ -522,9 +526,9 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, * Calls #error() if an error occurs. * */ -void write_output_parallel(struct engine* e, struct UnitSystem* us, - int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info) { +void write_output_parallel(struct engine* e, const char* baseName, + struct UnitSystem* us, int mpi_rank, int mpi_size, + MPI_Comm comm, MPI_Info info) { hid_t h_file = 0, h_grp = 0, h_grpsph = 0; const size_t Ngas = e->s->nr_parts; const size_t Ntot = e->s->nr_gparts; @@ -536,22 +540,19 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, static int outputCount = 0; FILE* xmfFile = 0; - /* Number of particles of each type */ - // const size_t Ndm = Ntot - Ngas; - - /* MATTHIEU: Temporary fix to preserve master */ + /* Number of unassociated gparts */ const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; - /* MATTHIEU: End temporary fix */ /* File name */ char fileName[FILENAME_BUFFER_SIZE]; - snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%03i.hdf5", baseName, + outputCount); /* First time, we need to create the XMF file */ - if (outputCount == 0 && mpi_rank == 0) createXMFfile(); + if (outputCount == 0 && mpi_rank == 0) createXMFfile(baseName); /* Prepare the XMF file for the new entry */ - if (mpi_rank == 0) xmfFile = prepareXMFfile(); + if (mpi_rank == 0) xmfFile = prepareXMFfile(baseName); /* Open HDF5 file */ hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); diff --git a/src/parallel_io.h b/src/parallel_io.h index f3691cb29b8d5e7f17382f1f81ba230c3898a929..970ad8c41dcbc2df3a85b178f836e16926147788 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -36,9 +36,9 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run); -void write_output_parallel(struct engine* e, struct UnitSystem* us, - int mpi_rank, int mpi_size, MPI_Comm comm, - MPI_Info info); +void write_output_parallel(struct engine* e, const char* baseName, + struct UnitSystem* us, int mpi_rank, int mpi_size, + MPI_Comm comm, MPI_Info info); #endif diff --git a/src/parser.c b/src/parser.c index 0f767bc434ef596df403fb12d3ae0f77ea546df3..7329d15e872ec766556e493063bb4f410e99dcdf 100644 --- a/src/parser.c +++ b/src/parser.c @@ -47,7 +47,10 @@ static void parse_line(char *line, struct swift_params *params); static void parse_value(char *line, struct swift_params *params); static void parse_section_param(char *line, int *isFirstParam, char *sectionName, struct swift_params *params); - +static void find_duplicate_params(const struct swift_params *params, + const char *param_name); +static void find_duplicate_section(const struct swift_params *params, + const char *section_name); static int lineNumber = 0; /** @@ -65,7 +68,8 @@ void parser_read_file(const char *file_name, struct swift_params *params) { char line[PARSER_MAX_LINE_SIZE]; /* Initialise parameter count. */ - params->count = 0; + params->paramCount = 0; + params->sectionCount = 0; /* Check if parameter file exits. */ if (file == NULL) { @@ -142,12 +146,48 @@ static int is_empty(const char *str) { return retParam; } +/** + * @brief Look for duplicate parameters. + * + * @param params Structure that holds the parameters + * @param param_name Name of parameter to be searched for + */ + +static void find_duplicate_params(const struct swift_params *params, + const char *param_name) { + for (int i = 0; i < params->paramCount; i++) { + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(param_name, params->data[i].name)) { + error("Invalid line:%d '%s', parameter is a duplicate.", lineNumber, + param_name); + } + } +} + +/** + * @brief Look for duplicate sections. + * + * @param params Structure that holds the parameters + * @param section_name Name of section to be searched for + */ + +static void find_duplicate_section(const struct swift_params *params, + const char *section_name) { + for (int i = 0; i < params->sectionCount; i++) { + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(section_name, params->section[i].name)) { + error("Invalid line:%d '%s', section is a duplicate.", lineNumber, + section_name); + } + } +} /** * @brief Parses a line from a file and stores any parameters in a structure. * * @param line Line to be parsed. * @param params Structure to be populated from file. */ + static void parse_line(char *line, struct swift_params *params) { /* Parse line if it doesn't begin with a comment. */ if (*line != PARSER_COMMENT_CHAR) { @@ -193,6 +233,7 @@ static void parse_value(char *line, struct swift_params *params) { name. */ static int isFirstParam = 1; char tmpStr[PARSER_MAX_LINE_SIZE]; + char tmpSectionName[PARSER_MAX_LINE_SIZE]; char *token; @@ -223,15 +264,36 @@ static void parse_value(char *line, struct swift_params *params) { /* If second token is NULL then the line must be a section heading. */ if (token == NULL) { - strcat(tmpStr, PARSER_VALUE_STRING); - strcpy(section, tmpStr); + strcpy(tmpSectionName, tmpStr); + strcat(tmpSectionName, PARSER_VALUE_STRING); + + /* Check for duplicate section name. */ + find_duplicate_section(params, tmpSectionName); + + /* Check for duplicate standalone parameter name used as a section name. + */ + find_duplicate_params(params, tmpStr); + + strcpy(section, tmpSectionName); + strcpy(params->section[params->sectionCount++].name, tmpSectionName); inSection = 1; isFirstParam = 1; } else { + /* Create string with standalone parameter name appended with ":" to aid + * duplicate search as section names are stored with ":" at the end.*/ + strcpy(tmpSectionName, tmpStr); + strcat(tmpSectionName, PARSER_VALUE_STRING); + + /* Check for duplicate parameter name. */ + find_duplicate_params(params, tmpStr); + + /* Check for duplicate section name used as standalone parameter name. */ + find_duplicate_section(params, tmpSectionName); + /* Must be a standalone parameter so no need to prefix name with a * section. */ - strcpy(params->data[params->count].name, tmpStr); - strcpy(params->data[params->count++].value, token); + strcpy(params->data[params->paramCount].name, tmpStr); + strcpy(params->data[params->paramCount++].value, token); inSection = 0; isFirstParam = 1; } @@ -278,8 +340,12 @@ static void parse_section_param(char *line, int *isFirstParam, * copy it into the parameter structure. */ strcpy(paramName, sectionName); strcat(paramName, tmpStr); - strcpy(params->data[params->count].name, paramName); - strcpy(params->data[params->count++].value, token); + + /* Check for duplicate parameter name. */ + find_duplicate_params(params, paramName); + + strcpy(params->data[params->paramCount].name, paramName); + strcpy(params->data[params->paramCount++].value, token); } /** @@ -294,7 +360,7 @@ int parser_get_param_int(const struct swift_params *params, const char *name) { char str[PARSER_MAX_LINE_SIZE]; int retParam = 0; - for (int i = 0; i < params->count; i++) { + for (int i = 0; i < params->paramCount; i++) { /*strcmp returns 0 if both strings are the same.*/ if (!strcmp(name, params->data[i].name)) { /* Check that exactly one number is parsed. */ @@ -326,7 +392,7 @@ char parser_get_param_char(const struct swift_params *params, char str[PARSER_MAX_LINE_SIZE]; char retParam = 0; - for (int i = 0; i < params->count; i++) { + for (int i = 0; i < params->paramCount; i++) { /*strcmp returns 0 if both strings are the same.*/ if (!strcmp(name, params->data[i].name)) { /* Check that exactly one number is parsed. */ @@ -358,7 +424,7 @@ float parser_get_param_float(const struct swift_params *params, char str[PARSER_MAX_LINE_SIZE]; float retParam = 0.f; - for (int i = 0; i < params->count; i++) { + for (int i = 0; i < params->paramCount; i++) { /*strcmp returns 0 if both strings are the same.*/ if (!strcmp(name, params->data[i].name)) { /* Check that exactly one number is parsed. */ @@ -390,7 +456,7 @@ double parser_get_param_double(const struct swift_params *params, char str[PARSER_MAX_LINE_SIZE]; double retParam = 0.; - for (int i = 0; i < params->count; i++) { + for (int i = 0; i < params->paramCount; i++) { /*strcmp returns 0 if both strings are the same.*/ if (!strcmp(name, params->data[i].name)) { /* Check that exactly one number is parsed. */ @@ -417,7 +483,7 @@ double parser_get_param_double(const struct swift_params *params, */ void parser_get_param_string(const struct swift_params *params, const char *name, char *retParam) { - for (int i = 0; i < params->count; i++) { + for (int i = 0; i < params->paramCount; i++) { /*strcmp returns 0 if both strings are the same.*/ if (!strcmp(name, params->data[i].name)) { strcpy(retParam, params->data[i].value); @@ -438,7 +504,7 @@ void parser_print_params(const struct swift_params *params) { printf("| SWIFT Parameter File |\n"); printf("--------------------------\n"); - for (int i = 0; i < params->count; i++) { + for (int i = 0; i < params->paramCount; i++) { printf("Parameter name: %s\n", params->data[i].name); printf("Parameter value: %s\n", params->data[i].value); } @@ -461,7 +527,7 @@ void parser_write_params_to_file(const struct swift_params *params, /* Start of file identifier in YAML. */ fprintf(file, "%s\n", PARSER_START_OF_FILE); - for (int i = 0; i < params->count; i++) { + for (int i = 0; i < params->paramCount; i++) { /* Check that the parameter name contains a section name. */ if (strchr(params->data[i].name, PARSER_VALUE_CHAR)) { /* Copy the parameter name into a temporary string and find the section @@ -478,7 +544,7 @@ void parser_write_params_to_file(const struct swift_params *params, /* Remove white space from parameter name and write it to the file. */ token = strtok(NULL, " #\n"); - fprintf(file, "\t%s%c %s\n", token, PARSER_VALUE_CHAR, + fprintf(file, " %s%c %s\n", token, PARSER_VALUE_CHAR, params->data[i].value); } else { fprintf(file, "\n%s%c %s\n", params->data[i].name, PARSER_VALUE_CHAR, diff --git a/src/parser.h b/src/parser.h index 7b2088ae12cdd5136a96baeabd01dd80255c8a3b..cdbdc477ae8c90a504919618c84d03622099df01 100644 --- a/src/parser.h +++ b/src/parser.h @@ -32,10 +32,16 @@ struct parameter { char value[PARSER_MAX_LINE_SIZE]; }; +struct section { + char name[PARSER_MAX_LINE_SIZE]; +}; + /* The array of parameters read from a file */ struct swift_params { + struct section section[PARSER_MAX_NO_OF_PARAMS]; struct parameter data[PARSER_MAX_NO_OF_PARAMS]; - int count; + int sectionCount; + int paramCount; }; /* Public API. */ diff --git a/src/partition.c b/src/partition.c index e559273fac96c9ecb860126d2a0a05601861c343..70249a679edfe72bda97eddfc918a8526659d995 100644 --- a/src/partition.c +++ b/src/partition.c @@ -242,17 +242,12 @@ static void accumulate_counts(struct space *s, int *counts) { struct part *parts = s->parts; int *cdim = s->cdim; - double ih[3], dim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; + double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; bzero(counts, sizeof(int) * s->nr_cells); - for (int k = 0; k < s->nr_parts; k++) { + for (size_t k = 0; k < s->nr_parts; k++) { for (int j = 0; j < 3; j++) { if (parts[k].x[j] < 0.0) parts[k].x[j] += dim[j]; @@ -837,7 +832,7 @@ void partition_initial_partition(struct partition *initial_partition, dim[0] = s->dim[0]; dim[1] = s->dim[1]; dim[2] = s->dim[2]; - for (int k = 0; k < s->nr_parts; k++) { + for (size_t k = 0; k < s->nr_parts; k++) { for (int j = 0; j < 3; j++) { if (parts[k].x[j] < 0.0) parts[k].x[j] += dim[j]; @@ -1034,7 +1029,10 @@ static int check_complete(struct space *s, int verbose, int nregions) { int failed = 0; for (int i = 0; i < nregions; i++) present[i] = 0; for (int i = 0; i < s->nr_cells; i++) { - present[s->cells[i].nodeID]++; + if (s->cells[i].nodeID <= nregions) + present[s->cells[i].nodeID]++; + else + message("Bad nodeID: %d", s->cells[i].nodeID); } for (int i = 0; i < nregions; i++) { if (!present[i]) { @@ -1045,3 +1043,53 @@ static int check_complete(struct space *s, int verbose, int nregions) { free(present); return (!failed); } + +/** + * @brief Partition a space of cells based on another space of cells. + * + * The two spaces are expected to be at different cell sizes, so what we'd + * like to do is assign the second space to geometrically closest nodes + * of the first, with the effect of minimizing particle movement when + * rebuilding the second space from the first. + * + * Since two spaces cannot exist simultaneously the old space is actually + * required in a decomposed state. These are the old cells sizes and counts + * per dimension, along with a list of the old nodeIDs. The old nodeIDs are + * indexed by the cellid (see cell_getid()), so should be stored that way. + * + * On exit the new space cells will have their nodeIDs assigned. + * + * @param oldh the cell dimensions of old space. + * @param oldcdim number of cells per dimension in old space. + * @param oldnodeIDs the nodeIDs of cells in the old space, indexed by old + *cellid. + * @param s the space to be partitioned. + * + * @return 1 if the new space contains nodeIDs from all nodes, 0 otherwise. + */ +int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeIDs, + struct space *s) { + + /* Loop over all the new cells. */ + int nr_nodes = 0; + for (int i = 0; i < s->cdim[0]; i++) { + for (int j = 0; j < s->cdim[1]; j++) { + for (int k = 0; k < s->cdim[2]; k++) { + + /* Scale indices to old cell space. */ + int ii = rint(i * s->ih[0] * oldh[0]); + int jj = rint(j * s->ih[1] * oldh[1]); + int kk = rint(k * s->ih[2] * oldh[2]); + + int cid = cell_getid(s->cdim, i, j, k); + int oldcid = cell_getid(oldcdim, ii, jj, kk); + s->cells[cid].nodeID = oldnodeIDs[oldcid]; + + if (oldnodeIDs[oldcid] > nr_nodes) nr_nodes = oldnodeIDs[oldcid]; + } + } + } + + /* Check we have all nodeIDs present in the resample. */ + return check_complete(s, 1, nr_nodes + 1); +} diff --git a/src/partition.h b/src/partition.h index af3d0be9242d75ce2c319de034428dc934e3a925..b2a132ed48e48573949d16291f72218990589158 100644 --- a/src/partition.h +++ b/src/partition.h @@ -57,6 +57,8 @@ void partition_repartition(enum repartition_type reparttype, int nodeID, void partition_initial_partition(struct partition *initial_partition, int nodeID, int nr_nodes, struct space *s); +int partition_space_to_space(double *oldh, double *oldcdim, int *oldnodeID, + struct space *s); void partition_init(struct partition *partition, enum repartition_type *reparttypestruct, const struct swift_params *params, int nr_nodes); diff --git a/src/physical_constants.c b/src/physical_constants.c new file mode 100644 index 0000000000000000000000000000000000000000..d00d63df1c1e6a4821ce4ba50dfef9c4e0def9d2 --- /dev/null +++ b/src/physical_constants.c @@ -0,0 +1,119 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "physical_constants.h" + +/* Local headers. */ +#include "error.h" +#include "physical_constants_cgs.h" + +/** + * @brief Converts physical constants to the internal unit system + * + * @param us The current internal system of units. + * @param internal_const The physical constants to initialize. + */ +void phys_const_init(struct UnitSystem* us, struct phys_const* internal_const) { + + /* Units are declared as {U_M, U_L, U_t, U_I, U_T} */ + + const float dimension_G[5] = {-1, 3, -2, 0, 0}; + internal_const->const_newton_G = + const_newton_G_cgs / units_general_conversion_factor(us, dimension_G); + + const float dimension_c[5] = {0, 1, -1, 0, 0}; + internal_const->const_speed_light_c = + const_speed_light_c_cgs / + units_general_conversion_factor(us, dimension_c); + + const float dimension_h[5] = {1, -2, -1, 0, 0}; + internal_const->const_planck_h = + const_planck_h_cgs / units_general_conversion_factor(us, dimension_h); + internal_const->const_planck_hbar = + const_planck_hbar_cgs / units_general_conversion_factor(us, dimension_h); + + const float dimension_k[5] = {1, 2, -2, 0, -1}; + internal_const->const_boltzmann_k = + const_boltzmann_k_cgs / units_general_conversion_factor(us, dimension_k); + + const float dimension_thomson[5] = {0, 2, 0, 0, 0}; + internal_const->const_thomson_cross_section = + const_thomson_cross_section_cgs / + units_general_conversion_factor(us, dimension_thomson); + + const float dimension_ev[5] = {1, 2, -2, 0, 0}; + internal_const->const_electron_volt = + const_electron_volt_cgs / + units_general_conversion_factor(us, dimension_ev); + + const float dimension_charge[5] = {0, 0, -1, 1, 0}; + internal_const->const_electron_charge = + const_electron_charge_cgs / + units_general_conversion_factor(us, dimension_charge); + + const float dimension_mass[5] = {1, 0, 0, 0, 0}; + internal_const->const_electron_mass = + const_electron_mass_cgs / + units_general_conversion_factor(us, dimension_mass); + internal_const->const_proton_mass = + const_proton_mass_cgs / + units_general_conversion_factor(us, dimension_mass); + internal_const->const_solar_mass = + const_solar_mass_cgs / + units_general_conversion_factor(us, dimension_mass); + internal_const->const_earth_mass = + const_earth_mass_cgs / + units_general_conversion_factor(us, dimension_mass); + + const float dimension_time[5] = {0, 0, 1, 0, 0}; + internal_const->const_year = + const_year_cgs / units_general_conversion_factor(us, dimension_time); + + const float dimension_length[5] = {0, 1, 0, 0, 0}; + internal_const->const_astronomical_unit = + const_astronomical_unit_cgs / + units_general_conversion_factor(us, dimension_length); + internal_const->const_parsec = + const_parsec_cgs / units_general_conversion_factor(us, dimension_length); + internal_const->const_light_year = + const_light_year_cgs / + units_general_conversion_factor(us, dimension_length); +} + +void phys_const_print(struct phys_const* internal_const) { + + message("%25s = %e", "Gravitational constant", + internal_const->const_newton_G); + message("%25s = %e", "Speed of light", internal_const->const_speed_light_c); + message("%25s = %e", "Planck constant", internal_const->const_planck_h); + message("%25s = %e", "Boltzmann constant", internal_const->const_boltzmann_k); + message("%25s = %e", "Thomson cross-section", + internal_const->const_thomson_cross_section); + message("%25s = %e", "Electron-Volt", internal_const->const_electron_volt); + message("%25s = %e", "Year", internal_const->const_year); + message("%25s = %e", "Astronomical Unit", + internal_const->const_astronomical_unit); + message("%25s = %e", "Parsec", internal_const->const_parsec); + message("%25s = %e", "Solar mass", internal_const->const_solar_mass); +} diff --git a/src/physical_constants.h b/src/physical_constants.h new file mode 100644 index 0000000000000000000000000000000000000000..aac0fa0840e08a1140bbab67e944f7f134f4222b --- /dev/null +++ b/src/physical_constants.h @@ -0,0 +1,85 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PHYSICAL_CONSTANTS_H +#define SWIFT_PHYSICAL_CONSTANTS_H + +/* Config parameters. */ +#include "../config.h" + +/* Local includes. */ +#include "units.h" + +/* physical constants in in defined programme units */ +struct phys_const { + + /* Newton's gravitationl constant */ + double const_newton_G; + + /* Speed of light in vacuum */ + double const_speed_light_c; + + /* Planck's constant */ + double const_planck_h; + + /* Planck's reduced constant */ + double const_planck_hbar; + + /* Boltzmann's constant */ + double const_boltzmann_k; + + /* Thomson cross-section */ + double const_thomson_cross_section; + + /* Charge of the electron */ + double const_electron_charge; + + /* Electron-Volt */ + double const_electron_volt; + + /* Mass of the electron */ + double const_electron_mass; + + /* Mass of the proton */ + double const_proton_mass; + + /* (Tropical) Year */ + double const_year; + + /* Astronomical unit */ + double const_astronomical_unit; + + /* Parsec */ + double const_parsec; + + /* Light-year */ + double const_light_year; + + /* Mass of the Sun */ + double const_solar_mass; + + /* Mass of the Earth */ + double const_earth_mass; +}; + +void phys_const_init(struct UnitSystem* us, struct phys_const* internal_const); + +void phys_const_print(struct phys_const* internal_const); + +#endif /* SWIFT_PHYSICAL_CONSTANTS_H */ diff --git a/src/physical_constants_cgs.h b/src/physical_constants_cgs.h new file mode 100644 index 0000000000000000000000000000000000000000..9bd435bde9f0f80c06f0de5fee980f080c2f57d3 --- /dev/null +++ b/src/physical_constants_cgs.h @@ -0,0 +1,81 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PHYSICAL_CONSTANTS_CGS_H +#define SWIFT_PHYSICAL_CONSTANTS_CGS_H + +/* The constants declared in this file should _NOT_ be used directly */ +/* Users should use the converted values in the phys_const structure */ +/* where all the constants are defined in the system of units specified */ +/* in the parameter file. */ + +/* All values are taken from K.A. Olive et al. (Particle Data Group), Chin. */ +/* Phys. C, 38, 090001 (2014) and 2015 update. */ +/* http://pdg.lbl.gov/2015/reviews/rpp2015-rev-phys-constants.pdf */ +/* http://pdg.lbl.gov/2015/reviews/rpp2015-rev-astrophysical-constants.pdf */ + +/* Newton's gravitation constant */ +const double const_newton_G_cgs = 6.67408e-8; /* g^-1 cm^3 s^-2 */ + +/* Speed of light in vacuum */ +const double const_speed_light_c_cgs = 2.99792458e10; /* cm s^-1 */ + +/* Planck's constant */ +const double const_planck_h_cgs = 6.626070040e-27; /* g cm^-2 s^-1 */ + +/* Planck's reduced constant */ +const double const_planck_hbar_cgs = 1.054571800e-27; /* g cm^-2 s^-1 */ + +/* Boltzmann's constant */ +const double const_boltzmann_k_cgs = 1.38064852e-16; /* g cm^2 s^-2 K^-1 */ + +/* Thomson cross-section */ +const double const_thomson_cross_section_cgs = 6.6524587158e-25; /* cm^2 */ + +/* Elementary charge */ +const double const_electron_charge_cgs = 1.6021766208e-19; /* A s^-1 */ + +/* Electron-Volt */ +const double const_electron_volt_cgs = 1.6021766208e-12; /* g cm^2 s^-2 */ + +/* Mass of the electron */ +const double const_electron_mass_cgs = 9.10938356e-28; /* g */ + +/* Mass of the proton */ +const double const_proton_mass_cgs = 1.672621898e-24; /* g */ + +/* Tropical year */ +const double const_year_cgs = 3.15569252e7; /* s */ + +/* Astronomical unit */ +const double const_astronomical_unit_cgs = 1.49597870700e13; /* cm */ + +/* Parsec */ +const double const_parsec_cgs = 3.08567758149e18; /* cm */ + +/* Light-year */ +const double const_light_year_cgs = 9.46053e17; /* cm */ + +/* Mass of the Sun */ +const double const_solar_mass_cgs = 1.9885e33; /* g */ + +/* Mass of the Earth */ +const double const_earth_mass_cgs = 5.9726e27; /* g */ + +#endif /* SWIFT_PHYSICAL_CONSTANTS_CGS_H */ diff --git a/src/potentials.c b/src/potentials.c new file mode 100644 index 0000000000000000000000000000000000000000..98e57d7959963e07cfbbd2fdb60a0df504e6a487 --- /dev/null +++ b/src/potentials.c @@ -0,0 +1,65 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* This object's header. */ +#include "potentials.h" + +/** + * @brief Initialises the external potential properties in the internal system + * of units. + * + * @param parameter_file The parsed parameter file + * @param us The current internal system of units + * @param potential The external potential properties to initialize + */ +void potential_init(const struct swift_params* parameter_file, + struct UnitSystem* us, + struct external_potential* potential) { + +#ifdef EXTERNAL_POTENTIAL_POINTMASS + + potential->point_mass.x = + parser_get_param_double(parameter_file, "PointMass:position_x"); + potential->point_mass.y = + parser_get_param_double(parameter_file, "PointMass:position_y"); + potential->point_mass.z = + parser_get_param_double(parameter_file, "PointMass:position_z"); + potential->point_mass.mass = + parser_get_param_double(parameter_file, "PointMass:mass"); + +#endif /* EXTERNAL_POTENTIAL_POINTMASS */ +} + +/** + * @brief Prints the properties of the external potential to stdout. + * + * @param potential The external potential properties. + */ +void potential_print(const struct external_potential* potential) { + +#ifdef EXTERNAL_POTENTIAL_POINTMASS + message("Point mass properties are (x,y,z) = (%e, %e, %e), M = %e", + potential->point_mass.x, potential->point_mass.y, + potential->point_mass.z, potential->point_mass.mass); +#endif /* EXTERNAL_POTENTIAL_POINTMASS */ +} diff --git a/src/potentials.h b/src/potentials.h new file mode 100644 index 0000000000000000000000000000000000000000..50a8903095a8a8218e8d02e3f42af07e40be5b43 --- /dev/null +++ b/src/potentials.h @@ -0,0 +1,123 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#ifndef SWIFT_POTENTIALS_H +#define SWIFT_POTENTIALS_H + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <math.h> + +/* Local includes. */ +#include "const.h" +#include "error.h" +#include "part.h" +#include "parser.h" +#include "physical_constants.h" +#include "units.h" + +/* External Potential Properties */ +struct external_potential { + +#ifdef EXTERNAL_POTENTIAL_POINTMASS + struct { + double x, y, z; + double mass; + } point_mass; +#endif +}; + +/* Include exteral pointmass potential */ +#ifdef EXTERNAL_POTENTIAL_POINTMASS + +#define EXTERNAL_GRAVITY_TIMESTEP_PREFACTOR 0.03f + +/** + * @brief Computes the time-step due to the acceleration from a point mass + * + * @param potential The properties of the externa potential. + * @param phys_const The physical constants in internal units. + * @param g Pointer to the g-particle data. + */ +__attribute__((always_inline)) + INLINE static float external_gravity_pointmass_timestep( + const struct external_potential* potential, + const struct phys_const* const phys_const, + const struct gpart* const g) { + + const float G_newton = phys_const->const_newton_G; + const float dx = g->x[0] - potential->point_mass.x; + const float dy = g->x[1] - potential->point_mass.y; + const float dz = g->x[2] - potential->point_mass.z; + const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz); + const float drdv = (g->x[0] - potential->point_mass.x) * (g->v_full[0]) + + (g->x[1] - potential->point_mass.y) * (g->v_full[1]) + + (g->x[2] - potential->point_mass.z) * (g->v_full[2]); + const float dota_x = G_newton * potential->point_mass.mass * rinv * rinv * + rinv * (-g->v_full[0] + 3.f * rinv * rinv * drdv * dx); + const float dota_y = G_newton * potential->point_mass.mass * rinv * rinv * + rinv * (-g->v_full[1] + 3.f * rinv * rinv * drdv * dy); + const float dota_z = G_newton * potential->point_mass.mass * rinv * rinv * + rinv * (-g->v_full[2] + 3.f * rinv * rinv * drdv * dz); + const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z; + const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] + + g->a_grav[2] * g->a_grav[2]; + + return EXTERNAL_GRAVITY_TIMESTEP_PREFACTOR * sqrtf(a_2 / dota_2); +} + +/** + * @brief Computes the gravitational acceleration of a particle due to a point + * mass + * + * @param potential The proerties of the external potential. + * @param phys_const The physical constants in internal units. + * @param g Pointer to the g-particle data. + */ +__attribute__((always_inline)) INLINE static void external_gravity_pointmass( + const struct external_potential* potential, + const struct phys_const* const phys_const, struct gpart* g) { + + const float G_newton = phys_const->const_newton_G; + const float dx = g->x[0] - potential->point_mass.x; + const float dy = g->x[1] - potential->point_mass.y; + const float dz = g->x[2] - potential->point_mass.z; + const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz); + + g->a_grav[0] += + -G_newton * potential->point_mass.mass * dx * rinv * rinv * rinv; + g->a_grav[1] += + -G_newton * potential->point_mass.mass * dy * rinv * rinv * rinv; + g->a_grav[2] += + -G_newton * potential->point_mass.mass * dz * rinv * rinv * rinv; +} +#endif /* EXTERNAL_POTENTIAL_POINTMASS */ + +/* Now, some generic functions, defined in the source file */ + +void potential_init(const struct swift_params* parameter_file, + struct UnitSystem* us, + struct external_potential* potential); + +void potential_print(const struct external_potential* potential); + +#endif /* SWIFT_POTENTIALS_H */ diff --git a/src/runner.c b/src/runner.c index e86a2129b013398647db416df2095a55fdb7417e..77153d0608ef086fe592a261e08dd4c701eb3b13 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1,6 +1,10 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -41,6 +45,7 @@ #include "engine.h" #include "error.h" #include "gravity.h" +#include "hydro_properties.h" #include "hydro.h" #include "minmax.h" #include "scheduler.h" @@ -68,6 +73,21 @@ const float runner_shift[13 * 3] = { const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +/* #define MY_CELL 428428428 */ +/* #define DX 0.1 */ +/* #define NX 1000 */ +/* #define CELL_ID \ */ +/* ((int)(c->loc[0] / DX) * NX *NX + (int)(c->loc[1] / DX) * NX + \ */ +/* (int)(c->loc[2] / DX)) */ +/* #define OUT \ */ +/* if (CELL_ID == MY_CELL) { \ */ +/* message(" cell= %d gcount=%d time=%f \n", CELL_ID, c->gcount, + * r->e->time); \ */ +/* } */ +/* #define OUT message(" cell %d %d %f \n",CELL_ID, c->count, r->e->time); */ +/* #define OUT if(CELL_ID == MY_CELL) message("\n cell %f %f %f %d %d */ +/* %f\n",c->loc[0],c->loc[1],c->loc[2], CELL_ID, c->count, r->e->time); */ + /* Import the density loop functions. */ #define FUNCTION density #include "runner_doiact.h" @@ -80,6 +100,95 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* Import the gravity loop functions. */ #include "runner_doiact_grav.h" +/** + * @brief Calculate gravity acceleration from external potential + * + * @param r runner task + * @param c cell + * @param timer 1 if the time is to be recorded. + */ +void runner_dograv_external(struct runner *r, struct cell *c, int timer) { + + struct gpart *g, *gparts = c->gparts; + int i, k, gcount = c->gcount; + const int ti_current = r->e->ti_current; + const struct external_potential *potential = r->e->external_potential; + const struct phys_const *constants = r->e->physical_constants; + + TIMER_TIC; + + /* Recurse? */ + if (c->split) { + for (k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_dograv_external(r, c->progeny[k], 0); + return; + } + +#ifdef TASK_VERBOSE + OUT; +#endif + + /* Loop over the parts in this cell. */ + for (i = 0; i < gcount; i++) { + + /* Get a direct pointer on the part. */ + g = &gparts[i]; + + /* Is this part within the time step? */ + if (g->ti_end <= ti_current) { + + external_gravity(potential, constants, g); + + /* /\* check for energy and angular momentum conservation - begin by */ + /* * synchronizing velocity*\/ */ + /* const float dx = g->x[0] - r->e->potential->point_mass.x; */ + /* const float dy = g->x[1] - r->e->potential->point_mass.y; */ + /* const float dz = g->x[2] - r->e->potential->point_mass.z; */ + /* const float dr = sqrtf((dx * dx) + (dy * dy) + (dz * dz)); */ + /* const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz); */ + + /* const int current_dti = g->ti_end - g->ti_begin; */ + /* const float dt = 0.5f * current_dti * r->e->timeBase; */ + /* const float vx = g->v_full[0] + dt * g->a_grav[0]; */ + /* const float vy = g->v_full[1] + dt * g->a_grav[1]; */ + /* const float vz = g->v_full[2] + dt * g->a_grav[2]; */ + + /* /\* E/L *\/ */ + /* float L[3], E; */ + /* E = 0.5 * ((vx * vx) + (vy * vy) + (vz * vz)) - */ + /* r->e->physical_constants->newton_gravity * */ + /* r->e->potential->point_mass.mass * rinv; */ + /* L[0] = dy * vz - dz * vy; */ + /* L[1] = dz * vx - dx * vz; */ + /* L[2] = dx * vy - dy * vx; */ + /* if (abs(g->id) == 1) { */ + /* float v2 = vx * vx + vy * vy + vz * vz; */ + /* float fg = r->e->physical_constants->newton_gravity * */ + /* r->e->potential->point_mass.mass * rinv; */ + /* float fga = sqrtf((g->a_grav[0] * g->a_grav[0]) + */ + /* (g->a_grav[1] * g->a_grav[1]) + */ + /* (g->a_grav[2] * g->a_grav[2])) * */ + /* dr; */ + /* // message("grav_external time= %f\t V_c^2= %f GM/r= %f E= %f L[2]= + * %f */ + /* // x= %f y= %f vx= %f vy= %f\n", r->e->time, v2, fg, E, L[2], + * g->x[0], */ + /* // g->x[1], vx, vy); */ + /* message("%f\t %f %f %f %f %f %f %f %f %f %f %f %f %f\n", r->e->time, + */ + /* g->tx, g->tv, dt, v2, fg, fga, dr, E, L[2], g->x[0], g->x[1], + */ + /* vx, vy); */ + /* /\* message(" G=%e M=%e\n", r->e->physical_constants->newton_gravity, + */ + /* * r->e->potential->point_mass.mass); *\/ */ + /* /\* exit(-1); *\/ */ + /* } */ + } + } + if (timer) TIMER_TOC(timer_dograv_external); +} + /** * @brief Sort the entries in ascending order using QuickSort. * @@ -466,7 +575,6 @@ void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) { * @param c The cell. * @param timer 1 if the time is to be recorded. */ - void runner_doinit(struct runner *r, struct cell *c, int timer) { struct part *const parts = c->parts; @@ -531,6 +639,13 @@ void runner_doghost(struct runner *r, struct cell *c) { float h_corr; const int ti_current = r->e->ti_current; const double timeBase = r->e->timeBase; + const float target_wcount = r->e->hydro_properties->target_neighbours; + const float max_wcount = + target_wcount + r->e->hydro_properties->delta_neighbours; + const float min_wcount = + target_wcount - r->e->hydro_properties->delta_neighbours; + const int max_smoothing_iter = + r->e->hydro_properties->max_smoothing_iterations; TIMER_TIC; @@ -547,7 +662,7 @@ void runner_doghost(struct runner *r, struct cell *c) { for (int k = 0; k < count; k++) pid[k] = k; /* While there are particles that need to be updated... */ - for (int num_reruns = 0; count > 0 && num_reruns < const_smoothing_max_iter; + for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter; num_reruns++) { /* Reset the redo-count. */ @@ -571,7 +686,7 @@ void runner_doghost(struct runner *r, struct cell *c) { /* Otherwise, compute the smoothing length update (Newton step). */ else { - h_corr = (kernel_nwneigh - p->density.wcount) / p->density.wcount_dh; + h_corr = (target_wcount - p->density.wcount) / p->density.wcount_dh; /* Truncate to the range [ -p->h/2 , p->h ]. */ h_corr = fminf(h_corr, p->h); @@ -579,8 +694,7 @@ void runner_doghost(struct runner *r, struct cell *c) { } /* Did we get the right number density? */ - if (p->density.wcount > kernel_nwneigh + const_delta_nwneigh || - p->density.wcount < kernel_nwneigh - const_delta_nwneigh) { + if (p->density.wcount > max_wcount || p->density.wcount < min_wcount) { /* Ok, correct then */ p->h += h_corr; @@ -672,8 +786,6 @@ void runner_doghost(struct runner *r, struct cell *c) { */ void runner_dodrift(struct runner *r, struct cell *c, int timer) { - const int nr_parts = c->count; - const int nr_gparts = c->gcount; const double timeBase = r->e->timeBase; const double dt = (r->e->ti_current - r->e->ti_old) * timeBase; const int ti_old = r->e->ti_old; @@ -684,12 +796,16 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; TIMER_TIC +#ifdef TASK_VERBOSE + OUT; +#endif /* No children? */ if (!c->split) { /* Loop over all the g-particles in the cell */ - for (int k = 0; k < nr_gparts; ++k) { + const int nr_gparts = c->gcount; + for (size_t k = 0; k < nr_gparts; k++) { /* Get a handle on the gpart. */ struct gpart *const gp = &gparts[k]; @@ -698,10 +814,22 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { gp->x[0] += gp->v_full[0] * dt; gp->x[1] += gp->v_full[1] * dt; gp->x[2] += gp->v_full[2] * dt; + + /* Compute offset since last cell construction */ + gp->x_diff[0] -= gp->v_full[0] * dt; + gp->x_diff[1] -= gp->v_full[1] * dt; + gp->x_diff[2] -= gp->v_full[2] * dt; + + /* Compute (square of) motion since last cell construction */ + const float dx2 = gp->x_diff[0] * gp->x_diff[0] + + gp->x_diff[1] * gp->x_diff[1] + + gp->x_diff[2] * gp->x_diff[2]; + dx2_max = fmaxf(dx2_max, dx2); } /* Loop over all the particles in the cell (more work for these !) */ - for (int k = 0; k < nr_parts; k++) { + const size_t nr_parts = c->count; + for (size_t k = 0; k < nr_parts; k++) { /* Get a handle on the part. */ struct part *const p = &parts[k]; @@ -737,10 +865,15 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { /* Predict the values of the extra fields */ hydro_predict_extra(p, xp, ti_old, ti_current, timeBase); + /* Compute offset since last cell construction */ + xp->x_diff[0] -= xp->v_full[0] * dt; + xp->x_diff[1] -= xp->v_full[1] * dt; + xp->x_diff[2] -= xp->v_full[2] * dt; + /* Compute (square of) motion since last cell construction */ - const float dx2 = (p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) + - (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) + - (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]); + const float dx2 = xp->x_diff[0] * xp->x_diff[0] + + xp->x_diff[1] * xp->x_diff[1] + + xp->x_diff[2] * xp->x_diff[2]; dx2_max = fmaxf(dx2_max, dx2); /* Maximal smoothing length */ @@ -773,13 +906,12 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { } /** - * @brief Combined second and first kick for fixed dt. + * @brief Kick particles in momentum space and collect statistics * * @param r The runner thread. * @param c The cell. - * @param timer The timer + * @param timer Are we timing this ? */ - void runner_dokick(struct runner *r, struct cell *c, int timer) { const float global_dt_min = r->e->dt_min; @@ -792,6 +924,10 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { struct part *const parts = c->parts; struct xpart *const xparts = c->xparts; struct gpart *const gparts = c->gparts; + const struct external_potential *potential = r->e->external_potential; + const struct hydro_props *hydro_properties = r->e->hydro_properties; + const struct phys_const *constants = r->e->physical_constants; + const float ln_max_h_change = hydro_properties->log_max_h_change; const int is_fixdt = (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt; @@ -803,6 +939,10 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { TIMER_TIC +#ifdef TASK_VERBOSE + OUT; +#endif + /* No children? */ if (!c->split) { @@ -813,70 +953,78 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { struct gpart *const gp = &gparts[k]; /* If the g-particle has no counterpart and needs to be kicked */ - if (gp->id < 0 && (is_fixdt || gp->ti_end <= ti_current)) { + if (gp->id < 0) { - /* First, finish the force calculation */ - gravity_end_force(gp); + if (is_fixdt || gp->ti_end <= ti_current) { - /* Now we are ready to compute the next time-step size */ - int new_dti; + /* First, finish the force calculation */ + gravity_end_force(gp); - if (is_fixdt) { + /* Now we are ready to compute the next time-step size */ + int new_dti; - /* Now we have a time step, proceed with the kick */ - new_dti = global_dt_max * timeBase_inv; + if (is_fixdt) { - } else { + /* Now we have a time step, proceed with the kick */ + new_dti = global_dt_max * timeBase_inv; - /* Compute the next timestep (gravity condition) */ - float new_dt = gravity_compute_timestep(gp); + } else { - /* Limit timestep within the allowed range */ - new_dt = fminf(new_dt, global_dt_max); - new_dt = fmaxf(new_dt, global_dt_min); + /* Compute the next timestep (gravity condition) */ + const float new_dt_external = + gravity_compute_timestep_external(potential, constants, gp); + const float new_dt_self = + gravity_compute_timestep_self(constants, gp); - /* Convert to integer time */ - new_dti = new_dt * timeBase_inv; + float new_dt = fminf(new_dt_external, new_dt_self); - /* Recover the current timestep */ - const int current_dti = gp->ti_end - gp->ti_begin; + /* Limit timestep within the allowed range */ + new_dt = fminf(new_dt, global_dt_max); + new_dt = fmaxf(new_dt, global_dt_min); - /* Limit timestep increase */ - if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); + /* Convert to integer time */ + new_dti = new_dt * timeBase_inv; - /* Put this timestep on the time line */ - int dti_timeline = max_nr_timesteps; - while (new_dti < dti_timeline) dti_timeline /= 2; + /* Recover the current timestep */ + const int current_dti = gp->ti_end - gp->ti_begin; - /* Now we have a time step, proceed with the kick */ - new_dti = dti_timeline; - } + /* Limit timestep increase */ + if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); - /* Compute the time step for this kick */ - const int ti_start = (gp->ti_begin + gp->ti_end) / 2; - const int ti_end = gp->ti_end + new_dti / 2; - const double dt = (ti_end - ti_start) * timeBase; - const double half_dt = (ti_end - gp->ti_end) * timeBase; + /* Put this timestep on the time line */ + int dti_timeline = max_nr_timesteps; + while (new_dti < dti_timeline) dti_timeline /= 2; - /* Move particle forward in time */ - gp->ti_begin = gp->ti_end; - gp->ti_end = gp->ti_begin + new_dti; + /* Now we have a time step, proceed with the kick */ + new_dti = dti_timeline; + } - /* Kick particles in momentum space */ - gp->v_full[0] += gp->a_grav[0] * dt; - gp->v_full[1] += gp->a_grav[1] * dt; - gp->v_full[2] += gp->a_grav[2] * dt; + /* Compute the time step for this kick */ + const int ti_start = (gp->ti_begin + gp->ti_end) / 2; + const int ti_end = gp->ti_end + new_dti / 2; + const double dt = (ti_end - ti_start) * timeBase; + const double half_dt = (ti_end - gp->ti_end) * timeBase; - /* Extra kick work */ - gravity_kick_extra(gp, dt, half_dt); + /* Move particle forward in time */ + gp->ti_begin = gp->ti_end; + gp->ti_end = gp->ti_begin + new_dti; - /* Number of updated g-particles */ - g_updated++; - } + /* Kick particles in momentum space */ + gp->v_full[0] += gp->a_grav[0] * dt; + gp->v_full[1] += gp->a_grav[1] * dt; + gp->v_full[2] += gp->a_grav[2] * dt; - /* Minimal time for next end of time-step */ - ti_end_min = min(gp->ti_end, ti_end_min); - ti_end_max = max(gp->ti_end, ti_end_max); + /* Extra kick work */ + gravity_kick_extra(gp, dt, half_dt); + + /* Number of updated g-particles */ + g_updated++; + } + + /* Minimal time for next end of time-step */ + ti_end_min = min(gp->ti_end, ti_end_min); + ti_end_max = max(gp->ti_end, ti_end_max); + } } /* Now do the hydro ones... */ @@ -909,18 +1057,27 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { } else { /* Compute the next timestep (hydro condition) */ - const float new_dt_hydro = hydro_compute_timestep(p, xp); + const float new_dt_hydro = + hydro_compute_timestep(p, xp, hydro_properties); /* Compute the next timestep (gravity condition) */ float new_dt_grav = FLT_MAX; - if (p->gpart != NULL) - new_dt_grav = gravity_compute_timestep(p->gpart); + if (p->gpart != NULL) { + + const float new_dt_external = gravity_compute_timestep_external( + potential, constants, p->gpart); + const float new_dt_self = + gravity_compute_timestep_self(constants, p->gpart); + + new_dt_grav = fminf(new_dt_external, new_dt_self); + } + /* Final time-step is minimum of hydro and gravity */ float new_dt = fminf(new_dt_hydro, new_dt_grav); /* Limit change in h */ const float dt_h_change = - (p->h_dt != 0.0f) ? fabsf(const_ln_max_h_change * p->h / p->h_dt) + (p->h_dt != 0.0f) ? fabsf(ln_max_h_change * p->h / p->h_dt) : FLT_MAX; new_dt = fminf(new_dt, dt_h_change); @@ -1022,7 +1179,6 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { ti_end_min = min(p->ti_end, ti_end_min); ti_end_max = max(p->ti_end, ti_end_max); } - } /* Otherwise, aggregate data from children. */ @@ -1073,21 +1229,60 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { if (timer) TIMER_TOC(timer_kick); } +/** + * @brief Construct the cell properties from the received particles + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_dorecv_cell(struct runner *r, struct cell *c, int timer) { + + const struct part *const parts = c->parts; + const struct gpart *const gparts = c->gparts; + const size_t nr_parts = c->count; + const size_t nr_gparts = c->gcount; + // const int ti_current = r->e->ti_current; + + TIMER_TIC; + + int ti_end_min = max_nr_timesteps; + int ti_end_max = 0; + float h_max = 0.f; + + /* Collect everything... */ + for (size_t k = 0; k < nr_parts; k++) { + const int ti_end = parts[k].ti_end; + // if(ti_end < ti_current) error("Received invalid particle !"); + ti_end_min = min(ti_end_min, ti_end); + ti_end_max = max(ti_end_max, ti_end); + h_max = fmaxf(h_max, parts[k].h); + } + for (size_t k = 0; k < nr_gparts; k++) { + const int ti_end = gparts[k].ti_end; + // if(ti_end < ti_current) error("Received invalid particle !"); + ti_end_min = min(ti_end_min, ti_end); + ti_end_max = max(ti_end_max, ti_end); + } + + /* ... and store. */ + c->ti_end_min = ti_end_min; + c->ti_end_max = ti_end_max; + c->h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_cell); +} + /** * @brief The #runner main thread routine. * * @param data A pointer to this thread's data. */ - void *runner_main(void *data) { struct runner *r = (struct runner *)data; struct engine *e = r->e; struct scheduler *sched = &e->sched; - struct task *t = NULL; - struct cell *ci, *cj; - struct part *parts; - int k, nr_parts; /* Main loop. */ while (1) { @@ -1096,6 +1291,7 @@ void *runner_main(void *data) { engine_barrier(e, r->id); /* Re-set the pointer to the previous task, as there is none. */ + struct task *t = NULL; struct task *prev = NULL; /* Loop while there are tasks... */ @@ -1114,8 +1310,8 @@ void *runner_main(void *data) { } /* Get the cells. */ - ci = t->ci; - cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; t->rid = r->cpuid; /* Different types of tasks... */ @@ -1164,10 +1360,7 @@ void *runner_main(void *data) { case task_type_send: break; case task_type_recv: - parts = ci->parts; - nr_parts = ci->count; - ci->ti_end_min = ci->ti_end_max = max_nr_timesteps; - for (k = 0; k < nr_parts; k++) parts[k].ti_end = max_nr_timesteps; + runner_dorecv_cell(r, ci, 1); break; case task_type_grav_pp: if (t->cj == NULL) @@ -1184,6 +1377,9 @@ void *runner_main(void *data) { case task_type_grav_down: runner_dograv_down(r, t->ci); break; + case task_type_grav_external: + runner_dograv_external(r, t->ci, 1); + break; case task_type_part_sort: space_do_parts_sort(); break; diff --git a/src/runner.h b/src/runner.h index ff2f93db6eae9d9e85cd9c3de6f398ed0f64c681..7953b33361ca59e51e6e5ea07dde59db016239b0 100644 --- a/src/runner.h +++ b/src/runner.h @@ -1,6 +1,10 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -44,10 +48,6 @@ struct runner { /* Function prototypes. */ void runner_doghost(struct runner *r, struct cell *c); -void runner_dopair_density(struct runner *r, struct cell *ci, struct cell *cj); -void runner_doself_density(struct runner *r, struct cell *c); -void runner_dosub_density(struct runner *r, struct cell *ci, struct cell *cj, - int flags); void runner_dosort(struct runner *r, struct cell *c, int flag, int clock); void runner_dogsort(struct runner *r, struct cell *c, int flag, int clock); void runner_dokick(struct runner *r, struct cell *c, int timer); diff --git a/src/serial_io.c b/src/serial_io.c index 10eab97f1bf118a842e274b521056d0d81b32db1..f2c0dcce1cc258e7c9a43027fbdd45e299b47ae6 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -590,6 +590,7 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor * * @param e The engine containing all the system. + * @param baseName The common part of the snapshot file name. * @param us The UnitSystem used for the conversion of units in the output. * @param mpi_rank The MPI rank of this node. * @param mpi_size The number of MPI ranks. @@ -604,8 +605,9 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, * Calls #error() if an error occurs. * */ -void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, - int mpi_size, MPI_Comm comm, MPI_Info info) { +void write_output_serial(struct engine* e, const char* baseName, + struct UnitSystem* us, int mpi_rank, int mpi_size, + MPI_Comm comm, MPI_Info info) { hid_t h_file = 0, h_grp = 0, h_grpsph = 0; const size_t Ngas = e->s->nr_parts; const size_t Ntot = e->s->nr_gparts; @@ -617,16 +619,13 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, static int outputCount = 0; FILE* xmfFile = 0; - /* Number of particles of each type */ - // const size_t Ndm = Ntot - Ngas; - - /* MATTHIEU: Temporary fix to preserve master */ + /* Number of unassociated gparts */ const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; - /* MATTHIEU: End temporary fix */ /* File name */ char fileName[FILENAME_BUFFER_SIZE]; - snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%03i.hdf5", baseName, + outputCount); /* Compute offset in the file and total number of particles */ size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; @@ -646,10 +645,10 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, if (mpi_rank == 0) { /* First time, we need to create the XMF file */ - if (outputCount == 0) createXMFfile(); + if (outputCount == 0) createXMFfile(baseName); /* Prepare the XMF file for the new entry */ - xmfFile = prepareXMFfile(); + xmfFile = prepareXMFfile(baseName); /* Write the part corresponding to this specific output */ writeXMFoutputheader(xmfFile, fileName, e->time); diff --git a/src/serial_io.h b/src/serial_io.h index 74ab8326dbeeb955e354687059cdd595657285f0..b7ed6eb62d823829a473f828696c291e552effa3 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -36,8 +36,9 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, int dry_run); -void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, - int mpi_size, MPI_Comm comm, MPI_Info info); +void write_output_serial(struct engine* e, const char* baseName, + struct UnitSystem* us, int mpi_rank, int mpi_size, + MPI_Comm comm, MPI_Info info); #endif diff --git a/src/single_io.c b/src/single_io.c index 1dc71087e102ff884dba7b7d4b6dcd6339335cac..d545fb086fa4bc63fe58dee2bfe85d9586997850 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -456,7 +456,8 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor * * @param e The engine containing all the system. - * @param us The UnitSystem used for the conversion of units in the output + * @param baseName The common part of the snapshot file name. + * @param us The UnitSystem used for the conversion of units in the output. * * Creates an HDF5 output file and writes the particles contained * in the engine. If such a file already exists, it is erased and replaced @@ -466,7 +467,8 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, * Calls #error() if an error occurs. * */ -void write_output_single(struct engine* e, struct UnitSystem* us) { +void write_output_single(struct engine* e, const char* baseName, + struct UnitSystem* us) { hid_t h_file = 0, h_grp = 0; const size_t Ngas = e->s->nr_parts; @@ -478,25 +480,22 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { struct gpart* dmparts = NULL; static int outputCount = 0; - /* Number of particles of each type */ - // const size_t Ndm = Ntot - Ngas; - - /* MATTHIEU: Temporary fix to preserve master */ + /* Number of unassociated gparts */ const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; - /* MATTHIEU: End temporary fix */ long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; /* File name */ char fileName[FILENAME_BUFFER_SIZE]; - snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); + snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%03i.hdf5", baseName, + outputCount); /* First time, we need to create the XMF file */ - if (outputCount == 0) createXMFfile(); + if (outputCount == 0) createXMFfile(baseName); /* Prepare the XMF file for the new entry */ FILE* xmfFile = 0; - xmfFile = prepareXMFfile(); + xmfFile = prepareXMFfile(baseName); /* Write the part corresponding to this specific output */ writeXMFoutputheader(xmfFile, fileName, e->time); diff --git a/src/single_io.h b/src/single_io.h index 587ebe07b6fa2b984b964baf282e7ceb1003ad29..adfc5b43941b2c0d69d9ce0924164aff56864a23 100644 --- a/src/single_io.h +++ b/src/single_io.h @@ -30,7 +30,8 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, struct gpart** gparts, size_t* Ngas, size_t* Ndm, int* periodic, int dry_run); -void write_output_single(struct engine* e, struct UnitSystem* us); +void write_output_single(struct engine* e, const char* baseName, + struct UnitSystem* us); #endif diff --git a/src/space.c b/src/space.c index ca93490cb45b50ad0a74d7170813613f972cdef2..267eca1989a43caa53827695df24aa8c8f81db12 100644 --- a/src/space.c +++ b/src/space.c @@ -1,21 +1,25 @@ /******************************************************************************* -* This file is part of SWIFT. -* Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as published -* by the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU Lesser General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -******************************************************************************/ + * This file is part of SWIFT. + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ /* Config parameters. */ #include "../config.h" @@ -43,6 +47,7 @@ #include "lock.h" #include "minmax.h" #include "runner.h" +#include "tools.h" /* Shared sort structure. */ struct parallel_sort space_sort_struct; @@ -155,22 +160,24 @@ void space_rebuild_recycle(struct space *s, struct cell *c) { void space_regrid(struct space *s, double cell_max, int verbose) { - float h_max = s->cell_min / kernel_gamma / space_stretch; const size_t nr_parts = s->nr_parts; struct cell *restrict c; ticks tic = getticks(); /* Run through the parts and get the current h_max. */ // tic = getticks(); - if (s->cells != NULL) { - for (int k = 0; k < s->nr_cells; k++) { - if (s->cells[k].h_max > h_max) h_max = s->cells[k].h_max; - } - } else { - for (int k = 0; k < nr_parts; k++) { - if (s->parts[k].h > h_max) h_max = s->parts[k].h; + float h_max = s->cell_min / kernel_gamma / space_stretch; + if (nr_parts > 0) { + if (s->cells != NULL) { + for (int k = 0; k < s->nr_cells; k++) { + if (s->cells[k].h_max > h_max) h_max = s->cells[k].h_max; + } + } else { + for (size_t k = 0; k < nr_parts; k++) { + if (s->parts[k].h > h_max) h_max = s->parts[k].h; + } + s->h_max = h_max; } - s->h_max = h_max; } /* If we are running in parallel, make sure everybody agrees on @@ -198,10 +205,37 @@ void space_regrid(struct space *s, double cell_max, int verbose) { "Must have at least 3 cells in each spatial dimension when periodicity " "is switched on."); -/* In MPI-Land, we're not allowed to change the top-level cell size. */ +/* In MPI-Land, changing the top-level cell size requires that the + * global partition is recomputed and the particles redistributed. + * Be prepared to do that. */ #ifdef WITH_MPI - if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) - error("Root-level change of cell size not allowed."); + double oldh[3]; + double oldcdim[3]; + int *oldnodeIDs = NULL; + if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) { + + /* Capture state of current space. */ + oldcdim[0] = s->cdim[0]; + oldcdim[1] = s->cdim[1]; + oldcdim[2] = s->cdim[2]; + oldh[0] = s->h[0]; + oldh[1] = s->h[1]; + oldh[2] = s->h[2]; + + if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL) + error("Failed to allocate temporary nodeIDs."); + + int cid = 0; + for (int i = 0; i < s->cdim[0]; i++) { + for (int j = 0; j < s->cdim[1]; j++) { + for (int k = 0; k < s->cdim[2]; k++) { + cid = cell_getid(oldcdim, i, j, k); + oldnodeIDs[cid] = s->cells[cid].nodeID; + } + } + } + } + #endif /* Do we need to re-build the upper-level cells? */ @@ -261,6 +295,40 @@ void space_regrid(struct space *s, double cell_max, int verbose) { cdim[2]); fflush(stdout); +#ifdef WITH_MPI + if (oldnodeIDs != NULL) { + /* We have changed the top-level cell dimension, so need to redistribute + * cells around the nodes. We repartition using the old space node + * positions as a grid to resample. */ + if (s->e->nodeID == 0) + message( + "basic cell dimensions have increased - recalculating the " + "global partition."); + + if (!partition_space_to_space(oldh, oldcdim, oldnodeIDs, s)) { + + /* Failed, try another technique that requires no settings. */ + message("Failed to get a new partition, trying less optimal method"); + struct partition initial_partition; +#ifdef HAVE_METIS + initial_partition.type = INITPART_METIS_NOWEIGHT; +#else + initial_partition.type = INITPART_VECTORIZE; +#endif + partition_initial_partition(&initial_partition, s->e->nodeID, + s->e->nr_nodes, s); + } + + /* Re-distribute the particles to their new nodes. */ + engine_redistribute(s->e); + + /* Make the proxies. */ + engine_makeproxies(s->e); + + /* Finished with these. */ + free(oldnodeIDs); + } +#endif } /* re-build upper-level cells? */ // message( "rebuilding upper-level cells took %.3f %s." , // clocks_from_ticks(double)(getticks() - tic), clocks_getunit()); @@ -309,13 +377,13 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { const ticks tic = getticks(); /* Be verbose about this. */ - // message( "re)building space..." ); fflush(stdout); + // message("re)building space..."); fflush(stdout); /* Re-grid if necessary, or just re-set the cell data. */ space_regrid(s, cell_max, verbose); - int nr_parts = s->nr_parts; - int nr_gparts = s->nr_gparts; + size_t nr_parts = s->nr_parts; + size_t nr_gparts = s->nr_gparts; struct cell *restrict cells = s->cells; const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; @@ -328,7 +396,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { int *ind; if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) error("Failed to allocate temporary particle indices."); - for (int k = 0; k < nr_parts; k++) { + for (size_t k = 0; k < nr_parts; k++) { struct part *restrict p = &s->parts[k]; for (int j = 0; j < 3; j++) if (p->x[j] < 0.0) @@ -365,7 +433,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { #ifdef WITH_MPI /* Move non-local parts to the end of the list. */ const int local_nodeID = s->e->nodeID; - for (int k = 0; k < nr_parts; k++) + for (size_t k = 0; k < nr_parts;) { if (cells[ind[k]].nodeID != local_nodeID) { cells[ind[k]].count -= 1; nr_parts -= 1; @@ -385,9 +453,26 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { ind[k] = ind[nr_parts]; ind[nr_parts] = t; } + else { + /* Increment when not exchanging otherwise we need to retest "k".*/ + k++; + } + } + + /* Check that all parts are in the correct places. */ + /* for (size_t k = 0; k < nr_parts; k++) { + if (cells[ind[k]].nodeID != local_nodeID) { + error("Failed to move all non-local parts to send list"); + } + } + for (size_t k = nr_parts; k < s->nr_parts; k++) { + if (cells[ind[k]].nodeID == local_nodeID) { + error("Failed to remove local parts from send list"); + } + }*/ /* Move non-local gparts to the end of the list. */ - for (int k = 0; k < nr_gparts; k++) + for (int k = 0; k < nr_gparts;) { if (cells[gind[k]].nodeID != local_nodeID) { cells[gind[k]].gcount -= 1; nr_gparts -= 1; @@ -404,18 +489,33 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { gind[k] = gind[nr_gparts]; gind[nr_gparts] = t; } + else { + /* Increment when not exchanging otherwise we need to retest "k".*/ + k++; + } + } + + /* Check that all gparts are in the correct place (untested). */ + /* + for (size_t k = 0; k < nr_gparts; k++) { + if (cells[gind[k]].nodeID != local_nodeID) { + error("Failed to move all non-local gparts to send list"); + } + } + for (size_t k = nr_gparts; k < s->nr_gparts; k++) { + if (cells[gind[k]].nodeID == local_nodeID) { + error("Failed to remove local gparts from send list"); + } + }*/ + /* Exchange the strays, note that this potentially re-allocates the parts arrays. */ - /* TODO: This function also exchanges gparts, but this is shorted-out - until they are fully implemented. */ size_t nr_parts_exchanged = s->nr_parts - nr_parts; size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts; engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged, nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged); - /* Add post-processing, i.e. re-linking/creating of gparts here. */ - /* Set the new particle counts. */ s->nr_parts = nr_parts + nr_parts_exchanged; s->nr_gparts = nr_gparts + nr_gparts_exchanged; @@ -425,13 +525,13 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { int *ind_new; if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) error("Failed to allocate temporary particle indices."); - memcpy(ind_new, ind, sizeof(size_t) * nr_parts); + memcpy(ind_new, ind, sizeof(int) * nr_parts); free(ind); ind = ind_new; } /* Assign each particle to its cell. */ - for (int k = nr_parts; k < s->nr_parts; k++) { + for (size_t k = nr_parts; k < s->nr_parts; k++) { const struct part *const p = &s->parts[k]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); @@ -447,7 +547,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose); /* Re-link the gparts. */ - for (int k = 0; k < nr_parts; k++) + for (size_t k = 0; k < nr_parts; k++) if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; /* Verify space_sort_struct. */ @@ -1147,7 +1247,7 @@ void space_do_split(struct space *s, struct cell *c) { temp->depth = c->depth + 1; temp->split = 0; temp->h_max = 0.0; - temp->dx_max = 0.0; + temp->dx_max = 0.f; temp->nodeID = c->nodeID; temp->parent = c; c->progeny[k] = temp; @@ -1192,16 +1292,19 @@ void space_do_split(struct space *s, struct cell *c) { struct xpart *xp = &xparts[k]; const float h = p->h; const int ti_end = p->ti_end; - xp->x_old[0] = p->x[0]; - xp->x_old[1] = p->x[1]; - xp->x_old[2] = p->x[2]; + xp->x_diff[0] = 0.f; + xp->x_diff[1] = 0.f; + xp->x_diff[2] = 0.f; if (h > h_max) h_max = h; if (ti_end < ti_end_min) ti_end_min = ti_end; if (ti_end > ti_end_max) ti_end_max = ti_end; } for (int k = 0; k < gcount; k++) { - struct gpart *p = &gparts[k]; - const int ti_end = p->ti_end; + struct gpart *gp = &gparts[k]; + const int ti_end = gp->ti_end; + gp->x_diff[0] = 0.f; + gp->x_diff[1] = 0.f; + gp->x_diff[2] = 0.f; if (ti_end < ti_end_min) ti_end_min = ti_end; if (ti_end > ti_end_max) ti_end_max = ti_end; } @@ -1214,9 +1317,11 @@ void space_do_split(struct space *s, struct cell *c) { if (s->nr_parts > 0) c->owner = ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts; - else + else if (s->nr_gparts > 0) c->owner = ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues / s->nr_gparts; + else + c->owner = 0; /* Ok, there is really nothing on this rank... */ } /** @@ -1338,9 +1443,9 @@ void space_init(struct space *s, const struct swift_params *params, space_maxsize = parser_get_param_int(params, "Scheduler:cell_max_size"); space_subsize = parser_get_param_int(params, "Scheduler:cell_sub_size"); space_splitsize = parser_get_param_int(params, "Scheduler:cell_split_size"); - if(verbose) + if (verbose) message("max_size set to %d, sub_size set to %d, split_size set to %d", - space_maxsize, space_subsize, space_splitsize); + space_maxsize, space_subsize, space_splitsize); /* Check that we have enough cells */ if (s->cell_min * 3 > dim[0] || s->cell_min * 3 > dim[1] || @@ -1409,10 +1514,12 @@ void space_init(struct space *s, const struct swift_params *params, } /* Allocate the extra parts array. */ - if (posix_memalign((void *)&s->xparts, xpart_align, - Npart * sizeof(struct xpart)) != 0) - error("Failed to allocate xparts."); - bzero(s->xparts, Npart * sizeof(struct xpart)); + if (Npart > 0) { + if (posix_memalign((void *)&s->xparts, xpart_align, + Npart * sizeof(struct xpart)) != 0) + error("Failed to allocate xparts."); + bzero(s->xparts, Npart * sizeof(struct xpart)); + } /* Init the space lock. */ if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock."); diff --git a/src/space.h b/src/space.h index 88e2f6f52774651217c4ff24e25f549d8ae1e347..ad7b9e77d484c997527d32c934bf1b85d7d54e41 100644 --- a/src/space.h +++ b/src/space.h @@ -1,6 +1,10 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published diff --git a/src/swift.h b/src/swift.h index e568a28c888295affc9ec45b6d059d34f5b4bf04..7e3116c1de8bc8e6cc2f89d0d5cbe9771ffbf33a 100644 --- a/src/swift.h +++ b/src/swift.h @@ -33,6 +33,7 @@ #include "error.h" #include "gravity.h" #include "hydro.h" +#include "hydro_properties.h" #include "lock.h" #include "map.h" #include "multipole.h" @@ -40,6 +41,8 @@ #include "parser.h" #include "part.h" #include "partition.h" +#include "physical_constants.h" +#include "potentials.h" #include "queue.h" #include "runner.h" #include "scheduler.h" diff --git a/src/task.c b/src/task.c index 5f1475a46e4626e1f51db673d73fd84f86e6edb6..31c2bfcb3462b9f91547ebd631f645e190c07143 100644 --- a/src/task.c +++ b/src/task.c @@ -1,6 +1,10 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -43,10 +47,10 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", "pair", "sub", - "init", "ghost", "drift", "kick", "send", - "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", - "part_sort", "gpart_sort", "split_cell", "rewait"}; + "none", "sort", "self", "pair", "sub", + "init", "ghost", "drift", "kick", "send", + "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", + "grav_external", "part_sort", "gpart_sort", "split_cell", "rewait"}; const char *subtaskID_names[task_type_count] = {"none", "density", "force", "grav"}; diff --git a/src/task.h b/src/task.h index 9c0ba6087d772d7362a98bc40a838c6fa3713166..25cc886f4b38456a0431fb6c7d0b7b1864053dd9 100644 --- a/src/task.h +++ b/src/task.h @@ -2,6 +2,9 @@ * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -45,6 +48,7 @@ enum task_types { task_type_grav_mm, task_type_grav_up, task_type_grav_down, + task_type_grav_external, task_type_part_sort, task_type_gpart_sort, task_type_split_cell, diff --git a/src/timers.c b/src/timers.c index 2501d347c8cea608650ece4c2883dab85ceee058..b621d27c90902f06c3760cbef6a88237a2b3b95b 100644 --- a/src/timers.c +++ b/src/timers.c @@ -1,6 +1,9 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published diff --git a/src/timers.h b/src/timers.h index de2d9edb9ed54717472e5ae1222dfb33235c3e95..81e5a674eddc3662e8db567ff7fc12302320320f 100644 --- a/src/timers.h +++ b/src/timers.h @@ -1,6 +1,9 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2016 John A. Regan (john.a.regan@durham.ac.uk) + * Tom Theuns (tom.theuns@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -37,11 +40,13 @@ enum { timer_dopair_density, timer_dopair_force, timer_dopair_grav, + timer_dograv_external, timer_dosub_density, timer_dosub_force, timer_dosub_grav, timer_dopair_subset, timer_doghost, + timer_dorecv_cell, timer_gettask, timer_qget, timer_qsteal, diff --git a/src/tools.c b/src/tools.c index d25b7401a1e0515c650333b41193d54b5e155d39..7b468426b894044dc8e215f5150d9536fa2d4cdd 100644 --- a/src/tools.c +++ b/src/tools.c @@ -59,7 +59,6 @@ void factor(int value, int *f1, int *f2) { void pairs_n2(double *dim, struct part *__restrict__ parts, int N, int periodic) { - int i, j, k, count = 0; // int mj, mk; // double maxratio = 1.0; @@ -124,7 +123,6 @@ void pairs_n2(double *dim, struct part *__restrict__ parts, int N, void pairs_single_density(double *dim, long long int pid, struct part *__restrict__ parts, int N, int periodic) { - int i, k; // int mj, mk; // double maxratio = 1.0; @@ -177,19 +175,16 @@ void pairs_single_density(double *dim, long long int pid, } void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { - float r2, hi, hj, hig2, hjg2, dx[3]; struct part *pi, *pj; /* Implements a double-for loop and checks every interaction */ for (int i = 0; i < ci->count; ++i) { - pi = &ci->parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; for (int j = 0; j < cj->count; ++j) { - pj = &cj->parts[j]; /* Pairwise distance */ @@ -201,7 +196,6 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { - /* Interact */ runner_iact_nonsym_density(r2, dx, hi, pj->h, pi, pj); } @@ -210,13 +204,11 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Reverse double-for loop and checks every interaction */ for (int j = 0; j < cj->count; ++j) { - pj = &cj->parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; for (int i = 0; i < ci->count; ++i) { - pi = &ci->parts[i]; /* Pairwise distance */ @@ -228,7 +220,6 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2) { - /* Interact */ runner_iact_nonsym_density(r2, dx, hj, pi->h, pj, pi); } @@ -242,13 +233,11 @@ void self_all_density(struct runner *r, struct cell *ci) { /* Implements a double-for loop and checks every interaction */ for (int i = 0; i < ci->count; ++i) { - pi = &ci->parts[i]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; for (int j = i + 1; j < ci->count; ++j) { - pj = &ci->parts[j]; hj = pj->h; hjg2 = hj * hj * kernel_gamma2; @@ -264,14 +253,12 @@ void self_all_density(struct runner *r, struct cell *ci) { /* Hit or miss? */ if (r2 < hig2) { - /* Interact */ runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj); } /* Hit or miss? */ if (r2 < hjg2) { - dxi[0] = -dxi[0]; dxi[1] = -dxi[1]; dxi[2] = -dxi[2]; @@ -285,7 +272,6 @@ void self_all_density(struct runner *r, struct cell *ci) { void pairs_single_grav(double *dim, long long int pid, struct gpart *__restrict__ parts, int N, int periodic) { - int i, k; // int mj, mk; // double maxratio = 1.0; @@ -344,7 +330,6 @@ void pairs_single_grav(double *dim, long long int pid, */ void density_dump(int N) { - int k; float r2[4] = {0.0f, 0.0f, 0.0f, 0.0f}, hi[4], hj[4]; struct part /**pi[4], *pj[4],*/ Pi[4], Pj[4]; @@ -382,7 +367,6 @@ void density_dump(int N) { void engine_single_density(double *dim, long long int pid, struct part *__restrict__ parts, int N, int periodic) { - int i, k; double r2, dx[3]; float fdx[3], ih; @@ -429,7 +413,6 @@ void engine_single_density(double *dim, long long int pid, void engine_single_force(double *dim, long long int pid, struct part *__restrict__ parts, int N, int periodic) { - int i, k; double r2, dx[3]; float fdx[3]; @@ -471,3 +454,29 @@ void engine_single_force(double *dim, long long int pid, p.a_hydro[1], p.a_hydro[2]); fflush(stdout); } + +/** + * Returns a random number (uniformly distributed) in [a,b[ + */ +double random_uniform(double a, double b) { + return (rand() / (double)RAND_MAX) * (b - a) + a; +} + +/** + * @brief Randomly shuffle an array of particles. + */ +void shuffle_particles(struct part *parts, const int count) { + if (count > 1) { + for (int i = 0; i < count - 1; i++) { + int j = i + random_uniform(0., (double)(count - 1 - i)); + + struct part particle = parts[j]; + + parts[j] = parts[i]; + + parts[i] = particle; + } + + } else + error("Array not big enough to shuffle!"); +} diff --git a/src/tools.h b/src/tools.h index ccffc77ceb8a967fd40c3737651ba75d529eee0f..8e0212652922fb4afd1ac89a64d4a01c71ecd4a9 100644 --- a/src/tools.h +++ b/src/tools.h @@ -38,4 +38,7 @@ void self_all_density(struct runner *r, struct cell *ci); void pairs_n2(double *dim, struct part *__restrict__ parts, int N, int periodic); +double random_uniform(double a, double b); +void shuffle_particles(struct part *parts, const int count); + #endif /* SWIFT_TOOL_H */ diff --git a/src/units.c b/src/units.c index 184dbe8a0df000008dba1d7003558d83b1f08cad..8e0ff08f1d92f47afbf44366acef7038fc8675c5 100644 --- a/src/units.c +++ b/src/units.c @@ -39,7 +39,6 @@ /* Includes. */ #include "const.h" #include "error.h" -#include "units.h" /** * @brief Initialises the UnitSystem structure with the constants given in @@ -47,20 +46,23 @@ * * @param us The UnitSystem to initialize. * @param params The parsed parameter file. + * @param category The section of the parameter file to read from. */ -void units_init(struct UnitSystem* us, const struct swift_params* params) { - - us->UnitMass_in_cgs = - parser_get_param_double(params, "UnitSystem:UnitMass_in_cgs"); - us->UnitLength_in_cgs = - parser_get_param_double(params, "UnitSystem:UnitLength_in_cgs"); - const double unitVelocity = - parser_get_param_double(params, "UnitSystem:UnitVelocity_in_cgs"); +void units_init(struct UnitSystem* us, const struct swift_params* params, + const char* category) { + + char buffer[200]; + sprintf(buffer, "%s:UnitMass_in_cgs", category); + us->UnitMass_in_cgs = parser_get_param_double(params, buffer); + sprintf(buffer, "%s:UnitLength_in_cgs", category); + us->UnitLength_in_cgs = parser_get_param_double(params, buffer); + sprintf(buffer, "%s:UnitVelocity_in_cgs", category); + const double unitVelocity = parser_get_param_double(params, buffer); us->UnitTime_in_cgs = us->UnitLength_in_cgs / unitVelocity; - us->UnitCurrent_in_cgs = - parser_get_param_double(params, "UnitSystem:UnitCurrent_in_cgs"); - us->UnitTemperature_in_cgs = - parser_get_param_double(params, "UnitSystem:UnitTemp_in_cgs"); + sprintf(buffer, "%s:UnitCurrent_in_cgs", category); + us->UnitCurrent_in_cgs = parser_get_param_double(params, buffer); + sprintf(buffer, "%s:UnitTemp_in_cgs", category); + us->UnitTemperature_in_cgs = parser_get_param_double(params, buffer); } /** @@ -331,7 +333,7 @@ void units_conversion_string(char* buffer, const struct UnitSystem* us, * the desired quantity. See conversionFactor() for a working example */ double units_general_conversion_factor(const struct UnitSystem* us, - float baseUnitsExponants[5]) { + const float baseUnitsExponants[5]) { double factor = 1.; int i; @@ -349,7 +351,7 @@ double units_general_conversion_factor(const struct UnitSystem* us, * the desired quantity. See conversionFactor() for a working example */ float units_general_h_factor(const struct UnitSystem* us, - float baseUnitsExponants[5]) { + const float baseUnitsExponants[5]) { float factor_exp = 0.f; factor_exp += -baseUnitsExponants[UNIT_MASS]; @@ -367,7 +369,7 @@ float units_general_h_factor(const struct UnitSystem* us, * the desired quantity. See conversionFactor() for a working example */ float units_general_a_factor(const struct UnitSystem* us, - float baseUnitsExponants[5]) { + const float baseUnitsExponants[5]) { float factor_exp = 0.f; factor_exp += baseUnitsExponants[UNIT_LENGTH]; @@ -385,7 +387,7 @@ float units_general_a_factor(const struct UnitSystem* us, * the desired quantity. See conversionFactor() for a working example */ void units_general_conversion_string(char* buffer, const struct UnitSystem* us, - float baseUnitsExponants[5]) { + const float baseUnitsExponants[5]) { char temp[14]; double a_exp = units_general_a_factor(us, baseUnitsExponants); double h_exp = units_general_h_factor(us, baseUnitsExponants); diff --git a/src/units.h b/src/units.h index 3e349dc16787cd4052a3e9205b21dce3c3732448..24e37e177480d7f84e41df1b73e2036aa00b7220 100644 --- a/src/units.h +++ b/src/units.h @@ -92,24 +92,25 @@ enum UnitConversionFactor { UNIT_CONV_TEMPERATURE }; -void units_init(struct UnitSystem*, const struct swift_params*); +void units_init(struct UnitSystem*, const struct swift_params*, + const char* category); double units_get_base_unit(const struct UnitSystem*, enum BaseUnits); const char* units_get_base_unit_symbol(enum BaseUnits); const char* units_get_base_unit_CGS_symbol(enum BaseUnits); double units_general_conversion_factor(const struct UnitSystem* us, - float baseUnitsExponants[5]); + const float baseUnitsExponants[5]); double units_conversion_factor(const struct UnitSystem* us, enum UnitConversionFactor unit); float units_general_h_factor(const struct UnitSystem* us, - float baseUnitsExponants[5]); + const float baseUnitsExponants[5]); float units_h_factor(const struct UnitSystem* us, enum UnitConversionFactor unit); float units_general_a_factor(const struct UnitSystem* us, - float baseUnitsExponants[5]); + const float baseUnitsExponants[5]); float units_a_factor(const struct UnitSystem* us, enum UnitConversionFactor unit); void units_general_conversion_string(char* buffer, const struct UnitSystem* us, - float baseUnitsExponants[5]); + const float baseUnitsExponants[5]); void units_conversion_string(char* buffer, const struct UnitSystem* us, enum UnitConversionFactor unit); diff --git a/src/vector.h b/src/vector.h index ef2b7c4b9e42ceb61dc38c3196c1819be652926f..fa311f121f7b702f2288be0d561e520b52330457 100644 --- a/src/vector.h +++ b/src/vector.h @@ -79,6 +79,12 @@ _mm512_set1_epi64(ptrs[0])), \ 1) #define vec_gather(base, offsets) _mm512_i32gather_ps(offsets.m, base, 1) +#define FILL_VEC(a) \ + { \ + .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \ + .f[6] = a, .f[7] = a, .f[8] = a, .f[9] = a, .f[10] = a, .f[11] = a, \ + .f[12] = a, .f[13] = a, .f[14] = a, .f[15] = a \ + } #elif defined(NO__AVX__) #define VECTORIZE #define VEC_SIZE 8 @@ -107,6 +113,11 @@ #define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a) #define vec_dbl_fmin(a, b) _mm256_min_pd(a, b) #define vec_dbl_fmax(a, b) _mm256_max_pd(a, b) +#define FILL_VEC(a) \ + { \ + .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \ + .f[6] = a, .f[7] = a \ + } #ifdef __AVX2__ #define VEC_HAVE_GATHER #define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1) @@ -139,6 +150,8 @@ #define vec_dbl_ftoi(a) _mm_cvttpd_epi32(a) #define vec_dbl_fmin(a, b) _mm_min_pd(a, b) #define vec_dbl_fmax(a, b) _mm_max_pd(a, b) +#define FILL_VEC(a) \ + { .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a } #else #define VEC_SIZE 4 #endif diff --git a/src/version.c b/src/version.c index 27841a16019a69442e66b21c327f4241e440fb12..ab22c0d9ab8841dedd09aa83aa8803e468e69ce9 100644 --- a/src/version.c +++ b/src/version.c @@ -40,6 +40,9 @@ /* This object's header. */ #include "version.h" +/* Local headers. */ +#include "version_string.h" + /** * @brief Return the source code git revision * diff --git a/src/version.h.in b/src/version.h similarity index 82% rename from src/version.h.in rename to src/version.h index 7824e06b996dfbb4178b57f1b72365a1e2d24484..4b6b38d220b36e5cb340571cd89e6f52a7b21a8e 100644 --- a/src/version.h.in +++ b/src/version.h @@ -2,33 +2,24 @@ * This file is part of SWIFT. * Copyright (c) 2012 Matthieu Schaller (matthieu.schaller@durham.ac.uk). * Copyright (c) 2015 Peter W. Draper (p.w.draper@durham.ac.uk). - * + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. - * + * ******************************************************************************/ #ifndef SWIFT_VERSION_H #define SWIFT_VERSION_H -/** - * @file version.h - * @brief Package version, git revision sha and compiler info. - */ - -#define PACKAGE_VERSION "@PACKAGE_VERSION@" -#define GIT_REVISION "@GIT_REVISION@" -#define GIT_BRANCH "@GIT_BRANCH@" - const char* package_description(void); const char* package_version(void); const char* git_revision(void); @@ -36,8 +27,8 @@ const char* git_branch(void); const char* compiler_name(void); const char* compiler_version(void); const char* mpi_version(void); -const char *hdf5_version(void); -const char *metis_version(void); +const char* hdf5_version(void); +const char* metis_version(void); void greetings(void); #endif /* SWIFT_VERSION_H */ diff --git a/src/version_string.h.in b/src/version_string.h.in new file mode 100644 index 0000000000000000000000000000000000000000..0bc282e0a3b6ba6fe5bab773c10c12e6d9277c2c --- /dev/null +++ b/src/version_string.h.in @@ -0,0 +1,32 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2012 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * Copyright (c) 2015 Peter W. Draper (p.w.draper@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_VERSION_STRING_H +#define SWIFT_VERSION_STRING_H + +/** + * @file version_string.h + * @brief Package version, git revision sha and compiler info. + */ + +#define PACKAGE_VERSION "@PACKAGE_VERSION@" +#define GIT_REVISION "@GIT_REVISION@" +#define GIT_BRANCH "@GIT_BRANCH@" + +#endif /* SWIFT_VERSION_STRING_H */ diff --git a/tests/Makefile.am b/tests/Makefile.am index b53a08615c5a8c7c2c31475bf7207522f8b9a58c..d0c132ad1b6dadd749a389fb71b873120b48139a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -22,7 +22,7 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) # List of programs and scripts to run in the test suite TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh \ - test27cells.sh test27cellsPerturbed.sh testParser.sh + test27cells.sh test27cellsPerturbed.sh testParser.sh testKernel # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ diff --git a/tests/test27cells.c b/tests/test27cells.c index 7915511eed50a229a94eda6bb338607099303421..9b18dd19231fc4925d8e95ad197ff36b0ea5a105 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -31,14 +31,6 @@ enum velocity_types { velocity_rotating }; -/** - * @brief Returns a random number (uniformly distributed) in [a,b[ - */ -double random_uniform(double a, double b) { - return (rand() / (double)RAND_MAX) * (b - a) + a; -} - - /** * @brief Constructs a cell and all of its particle in a valid state prior to * a DOPAIR or DOSELF calcuation. @@ -46,10 +38,12 @@ double random_uniform(double a, double b) { * @param n The cube root of the number of particles. * @param offset The position of the cell offset from (0,0,0). * @param size The cell size. - * @param h The smoothing length of the particles in units of the inter-particle separation. + * @param h The smoothing length of the particles in units of the inter-particle + *separation. * @param density The density of the fluid. * @param partId The running counter of IDs. - * @param pert The perturbation to apply to the particles in the cell in units of the inter-particle separation. + * @param pert The perturbation to apply to the particles in the cell in units + *of the inter-particle separation. * @param vel The type of velocity field (0, random, divergent, rotating) */ struct cell *make_cell(size_t n, double *offset, double size, double h, @@ -127,6 +121,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->ti_end_min = 1; cell->ti_end_max = 1; + shuffle_particles(cell->parts, cell->count); + cell->sorted = 0; cell->sort = NULL; cell->sortsize = 0; @@ -145,7 +141,6 @@ void clean_up(struct cell *ci) { * @brief Initializes all particles field to be ready for a density calculation */ void zero_particle_fields(struct cell *c) { - for (size_t pid = 0; pid < c->count; pid++) { c->parts[pid].rho = 0.f; c->parts[pid].rho_dh = 0.f; @@ -157,7 +152,6 @@ void zero_particle_fields(struct cell *c) { * @brief Ends the loop by adding the appropriate coefficients */ void end_calculation(struct cell *c) { - for (size_t pid = 0; pid < c->count; pid++) { hydro_end_density(&c->parts[pid], 1); } @@ -168,7 +162,6 @@ void end_calculation(struct cell *c) { */ void dump_particle_fields(char *fileName, struct cell *main_cell, struct cell **cells) { - FILE *file = fopen(fileName, "w"); /* Write header */ @@ -205,7 +198,6 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, for (int i = 0; i < 3; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 3; ++k) { - struct cell *cj = cells[i * 9 + j * 3 + k]; if (cj == main_cell) continue; @@ -242,7 +234,6 @@ void runner_doself1_density(struct runner *r, struct cell *ci); /* And go... */ int main(int argc, char *argv[]) { - size_t runs = 0, particles = 0; double h = 1.2348, size = 1., rho = 1.; double perturbation = 0.; @@ -336,7 +327,6 @@ int main(int argc, char *argv[]) { for (int i = 0; i < 3; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 3; ++k) { - double offset[3] = {i * size, j * size, k * size}; cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho, &partId, perturbation, vel); @@ -349,7 +339,6 @@ int main(int argc, char *argv[]) { ticks time = 0; for (size_t i = 0; i < runs; ++i) { - /* Zero the fields */ for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]); diff --git a/tests/testKernel.c b/tests/testKernel.c index 5ad9cc81ea92e6ef9487489c5d560abf414e38df..5a5df57f293348cf865eb4c9fd88f22e906f089b 100644 --- a/tests/testKernel.c +++ b/tests/testKernel.c @@ -1,6 +1,7 @@ /******************************************************************************* * This file is part of SWIFT. - * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * James Willis (james.s.willis@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -17,21 +18,64 @@ * ******************************************************************************/ +#define NO__AVX__ +#include "vector.h" + #include "swift.h" +#include "kernel_hydro.h" + +#define numPoints 64 int main() { const float h = const_eta_kernel; - const int numPoints = 30; + float W[numPoints] = {0.f}; + float dW[numPoints] = {0.f}; + + printf("\nSerial Output\n"); + printf("-------------\n"); for (int i = 0; i < numPoints; ++i) { - const float x = i * 3.f / numPoints; - float W, dW; - kernel_deval(x / h, &W, &dW); + const float x = i * 2.5f / numPoints; + kernel_deval(x / h, &W[i], &dW[i]); + + printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i, h, + h * kernel_gamma, x, W[i], dW[i]); + } + + printf("\nVector Output for VEC_SIZE=%d\n", VEC_SIZE); + printf("-------------\n"); + for (int i = 0; i < numPoints; i += VEC_SIZE) { + + vector vx, vx_h; + vector W_vec, dW_vec; + + for (int j = 0; j < VEC_SIZE; j++) { + vx.f[j] = (i + j) * 2.5f / numPoints; + } + + vx_h.v = vx.v / vec_set1(h); + + kernel_deval_vec(&vx_h, &W_vec, &dW_vec); + + for (int j = 0; j < VEC_SIZE; j++) { + printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h, + h * kernel_gamma, vx.f[j], W_vec.f[j], dW_vec.f[j]); - printf("h= %f H= %f x=%f W(x,h)=%f\n", h, h * kernel_gamma, x, W); + if (fabsf(W_vec.f[j] - W[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j], + W_vec.f[j]); + return 1; + } + if (fabsf(dW_vec.f[j] - dW[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j], + dW_vec.f[j]); + return 1; + } + } } + printf("\nAll values are consistent\n"); return 0; } diff --git a/tests/testPair.c b/tests/testPair.c index 6e46b577ca63a8d3c2edce888a7485af0949813d..07e576332f81b0471f35f09dfcda21ad535adc8b 100644 --- a/tests/testPair.c +++ b/tests/testPair.c @@ -24,13 +24,6 @@ #include <unistd.h> #include "swift.h" -/** - * Returns a random number (uniformly distributed) in [a,b[ - */ -double random_uniform(double a, double b) { - return (rand() / (double)RAND_MAX) * (b - a) + a; -} - /* n is both particles per axis and box size: * particles are generated on a mesh with unit spacing */ @@ -93,6 +86,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->ti_end_min = 1; cell->ti_end_max = 1; + shuffle_particles(cell->parts, cell->count); + cell->sorted = 0; cell->sort = NULL; cell->sortsize = 0; @@ -111,7 +106,6 @@ void clean_up(struct cell *ci) { * @brief Initializes all particles field to be ready for a density calculation */ void zero_particle_fields(struct cell *c) { - for (size_t pid = 0; pid < c->count; pid++) { c->parts[pid].rho = 0.f; c->parts[pid].rho_dh = 0.f; @@ -123,7 +117,6 @@ void zero_particle_fields(struct cell *c) { * @brief Dump all the particles to a file */ void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { - FILE *file = fopen(fileName, "w"); /* Write header */ @@ -254,7 +247,6 @@ int main(int argc, char *argv[]) { time = 0; for (size_t i = 0; i < runs; ++i) { - /* Zero the fields */ zero_particle_fields(ci); zero_particle_fields(cj); diff --git a/theory/paper_pasc/pasc_paper.tex b/theory/paper_pasc/pasc_paper.tex index 87868a74d69023ee48d793cb2d0e29e71af777d6..a247192c9446475e86e0d24fff78a7a5fa557717 100644 --- a/theory/paper_pasc/pasc_paper.tex +++ b/theory/paper_pasc/pasc_paper.tex @@ -36,58 +36,48 @@ \begin{document} -%Conference -\conferenceinfo{PASC '16}{June 8--10, 2016, Lausanne, Switzerland} +\CopyrightYear{2016} +\setcopyright{acmlicensed} +\conferenceinfo{PASC '16,}{June 08 - 10, 2016, Lausanne, Switzerland} +\isbn{978-1-4503-4126-4/16/06}\acmPrice{\$15.00} +\doi{http://dx.doi.org/10.1145/2929908.2929916} + \title{{\ttlit SWIFT}: Using task-based parallelism, fully asynchronous communication, and graph partition-based domain decomposition for strong scaling on more than 100\,000 cores.} -% \title{{\ttlit SWIFT}: A task-based hybrid-parallel strongly scalable code for -% particle-based cosmological simulations} -\numberofauthors{6} +\numberofauthors{4} \author{ \alignauthor - Main~Author\\ - \affaddr{Institute}\\ - \affaddr{Department}\\ - \affaddr{University}\\ - \affaddr{City Postal code, Country}\\ - \email{\footnotesize \url{main.author@university.country}} -% \alignauthor -% Matthieu~Schaller\\ -% \affaddr{Institute for Computational Cosmology (ICC)}\\ -% \affaddr{Department of Physics}\\ -% \affaddr{Durham University}\\ -% \affaddr{Durham DH1 3LE, UK}\\ -% \email{\footnotesize \url{matthieu.schaller@durham.ac.uk}} -% \alignauthor -% Pedro~Gonnet\\ -% \affaddr{School of Engineering and Computing Sciences}\\ -% \affaddr{Durham University}\\ -% \affaddr{Durham DH1 3LE, UK}\\ -% \alignauthor -% Aidan~B.~G.~Chalk\\ -% \affaddr{School of Engineering and Computing Sciences}\\ -% \affaddr{Durham University}\\ -% \affaddr{Durham DH1 3LE, UK}\\ -% \and -% \alignauthor -% Peter~W.~Draper\\ -% \affaddr{Institute for Computational Cosmology (ICC)}\\ -% \affaddr{Department of Physics}\\ -% \affaddr{Durham University}\\ -% \affaddr{Durham DH1 3LE, UK}\\ -% %% \alignauthor -% %% Tom Theuns\\ -% %% \affaddr{Institute for Computational Cosmology}\\ -% %% \affaddr{Department of Physics}\\ -% %% \affaddr{Durham University}\\ -% %% \affaddr{Durham DH1 3LE, UK} + Matthieu~Schaller\\ + \affaddr{Institute for Computational Cosmology (ICC)}\\ + \affaddr{Department of Physics}\\ + \affaddr{Durham University}\\ + \affaddr{Durham DH1 3LE, UK}\\ + \email{\footnotesize \url{matthieu.schaller@durham.ac.uk}} + \alignauthor + Pedro~Gonnet\\ + \affaddr{School of Engineering and Computing Sciences}\\ + \affaddr{Durham University}\\ + \affaddr{Durham DH1 3LE, UK}\\ \vspace{1ex} + \affaddr{Google Switzerland GmbH}\\ + \affaddr{8002 Z\"urich, Switzerland}\\ +\and +\alignauthor + Aidan~B.~G.~Chalk\\ + \affaddr{School of Engineering and Computing Sciences}\\ + \affaddr{Durham University}\\ + \affaddr{Durham DH1 3LE, UK}\\ +\alignauthor + Peter~W.~Draper\\ + \affaddr{Institute for Computational Cosmology (ICC)}\\ + \affaddr{Department of Physics}\\ + \affaddr{Durham University}\\ + \affaddr{Durham DH1 3LE, UK}\\ } - \date{\today} \maketitle @@ -124,28 +114,12 @@ strong scaling on more than 100\,000 cores.} \end{itemize} - %% These three main aspects alongside improved cache-efficient - %% algorithms for neighbour finding allow the code to be 40x faster on - %% the same architecture than the standard code Gadget-2 widely used by - %% researchers. - In order to use these approaches, the code had to be re-written from scratch, and the algorithms therein adapted to the task-based paradigm. As a result, we can show upwards of 60\% parallel efficiency for moderate-sized problems when increasing the number of cores 512-fold, on both x86-based and Power8-based architectures. - - %% As a result, our code present excellent \emph{strong} - %% scaling on a variety of architectures, ranging from x86 Tier-2 systems to the - %% largest Tier-0 machines currently available. It displays, for instance, a - %% \emph{strong} scaling parallel efficiency of more than 60\% when going from - %% 512 to 131072 cores on a Blue Gene architecture. Similar results are obtained - %% on standard clusters of x86 CPUs. - - %% The task-based library, \qs, used as the backbone of the code is - %% itself also freely available and can be used in a wide variety of - %% other numerical problems. - + \end{abstract} @@ -356,12 +330,20 @@ SMP~Superscalar \cite{ref:SMPSuperscalar}, OpenMP~3.0 \cite{ref:Duran2009}, Intel's TBB \cite{ref:Reinders2007}, and, to some extent, Charm++ \cite{ref:Kale1993}. -For convenience, and to make experimenting with different scheduling -techniques easier, we chose to implement our own task scheduler +Since none of these existing taks-based libraries provided the flexibility +required to experiment with different scheduling and communication +techniques, (\swift is an interdisciplinary effort between +Computer Science and Astrophysics to study not only cosmological +phenomena, but also novel simulation algorithms and parallel computing techniques) +we chose to implement our own task scheduler in \swift, which has since been back-ported as the general-purpose \qs task scheduler \cite{gonnet2013quicksched}. +In \qs and \swift, task dependencies are specified explicitly, +as opposed to being implicitly derived from data dependencies, +allowing us to more easily build complex task hierarchies. This also allowed us to extend the scheduler with the concept of -task conflicts. +task conflicts and integrate the asynchronous communication +scheme described further on. Despite its advantages, and the variety of implementations, task-based parallelism is rarely used in @@ -374,9 +356,19 @@ which is usually not an option for large and complex codebases. Since we were re-implementing \swift from scratch, this was not an issue. The tree-based neighbour-finding described above was replaced with a more -task-friendly approach as described in \cite{ref:Gonnet2015}. -Particle interactions are computed within, and between pairs, of -hierarchical {\em cells} containing one or more particles. +task-friendly approach as described in \cite{ref:Gonnet2015}, in which +the domain is first decomposed into a grid of {\em cells} of edge length +larger or equal to the largest particle radius. +An initial set of interaction tasks is then defined over all cells and +pairs of neighbouring cells, such that if two particles are close enough to interact, +they are either in the same cell or they span a pair of neighbouring cells. +These initial interaction tasks are then refined by recursively +splitting cells that contain more than a certain number of particles +and replacing tasks that span a pair of split cells with tasks +spanning the neighboring sub-cells. +The resulting refined set of tasks contains all the cells and pairs of cells +over which particle interactions must be computed. + The dependencies between the tasks are set following equations \eqn{rho}, \eqn{dvdt}, and \eqn{dudt}, i.e.~such that for any cell, all the tasks computing the particle densities therein must have @@ -416,7 +408,10 @@ cores of a shared-memory machine \cite{ref:Gonnet2015}. and purple. Although the data for the yellow cell resides on Node~2, it is required for some tasks on Node~1, and thus needs to be copied over during - the computation using {\tt send}/{\tt recv} tasks (diamond-shaped).} + the computation using {\tt send}/{\tt recv} tasks + (diamond-shaped). \newline + Figure adapted from \cite{ref:Gonnet2015}. + } \label{tasks} \end{figure} @@ -871,7 +866,6 @@ Union's ERC Grant agreements 267291 ``Cosmiway'', and by {\sc intel} through establishment of the ICC as an {\sc intel} parallel computing centre (IPCC). -\nocite{*} \bibliographystyle{abbrv} \bibliography{biblio} diff --git a/theory/paper_pasc/run.sh b/theory/paper_pasc/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..b24e1b76a60858520ec2bc03b926ced642dc29e0 --- /dev/null +++ b/theory/paper_pasc/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +pdflatex pasc_paper.tex +bibtex pasc_paper.aux +pdflatex pasc_paper.tex +pdflatex pasc_paper.tex