diff --git a/.gitignore b/.gitignore index 5a860ed8d811a2a90785db4180a1ed9ea112e272..ba0028d163dae64143b0c43b465152e357d042b0 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,9 @@ tests/testRiemannExact tests/testRiemannTRRS tests/testRiemannHLLC tests/testMatrixInversion +tests/testDump +tests/testLogger +tests/benchmarkInteractions theory/latex/swift.pdf theory/SPH/Kernels/kernels.pdf diff --git a/README b/README index 9ef773cd85b408ff822b3652c3fd5507e6d95d01..0d658c333f1328b423851031c5b5d202f43df3c2 100644 --- a/README +++ b/README @@ -28,6 +28,7 @@ Valid options are: -G Run with self-gravity -n {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop. -s Run with SPH + -S Run with stars -t {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified. -v [12] Increase the level of verbosity 1: MPI-rank 0 writes diff --git a/configure.ac b/configure.ac index 9fa9a1de591d63794dde5db6a8dd733cfcaada09..4b6308e96b81bbfd0a9256bb4914f1356fbfa6f8 100644 --- a/configure.ac +++ b/configure.ac @@ -351,7 +351,7 @@ AC_ARG_WITH([tcmalloc], [with_tcmalloc="no"] ) if test "x$with_tcmalloc" != "xno"; then - if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then + if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then tclibs="-L$with_tcmalloc -ltcmalloc" else tclibs="-ltcmalloc" @@ -361,7 +361,7 @@ if test "x$with_tcmalloc" != "xno"; then # Could just have the minimal version. if test "$have_tcmalloc" = "no"; then - if test "x$with_tcmalloc" != "xyes" && test "x$with_tcmalloc" != "x"; then + if test "x$with_tcmalloc" != "xyes" -a "x$with_tcmalloc" != "x"; then tclibs="-L$with_tcmalloc -ltcmalloc_minimal" else tclibs="-ltcmalloc_minimal" @@ -394,7 +394,7 @@ AC_ARG_WITH([profiler], [with_profiler="yes"] ) if test "x$with_profiler" != "xno"; then - if test "x$with_profiler" != "xyes" && test "x$with_profiler" != "x"; then + if test "x$with_profiler" != "xyes" -a "x$with_profiler" != "x"; then proflibs="-L$with_profiler -lprofiler" else proflibs="-lprofiler" @@ -411,6 +411,38 @@ fi AC_SUBST([PROFILER_LIBS]) AM_CONDITIONAL([HAVEPROFILER],[test -n "$PROFILER_LIBS"]) +# Check for jemalloc another fast malloc that is good with contention. +have_jemalloc="no" +AC_ARG_WITH([jemalloc], + [AS_HELP_STRING([--with-jemalloc], + [use jemalloc library or specify the directory with lib @<:@yes/no@:>@] + )], + [with_jemalloc="$withval"], + [with_jemalloc="no"] +) +if test "x$with_jemalloc" != "xno"; then + if test "x$with_jemalloc" != "xyes" -a "x$with_jemalloc" != "x"; then + jelibs="-L$with_jemalloc -ljemalloc" + else + jelibs="-ljemalloc" + fi + AC_CHECK_LIB([jemalloc],[malloc_usable_size],[have_jemalloc="yes"],[have_jemalloc="no"], + $jelibs) + + if test "$have_jemalloc" = "yes"; then + JEMALLOC_LIBS="$jelibs" + else + JEMALLOC_LIBS="" + fi +fi +AC_SUBST([JEMALLOC_LIBS]) +AM_CONDITIONAL([HAVEJEMALLOC],[test -n "$JEMALLOC_LIBS"]) + +# Don't allow both tcmalloc and jemalloc. +if test "x$have_tcmalloc" != "xno" -a "x$have_jemalloc" != "xno"; then + AC_MSG_ERROR([Cannot use tcmalloc at same time as jemalloc]) +fi + # Check for HDF5. This is required. AX_LIB_HDF5 @@ -734,9 +766,6 @@ case "$with_potential" in isothermal) AC_DEFINE([EXTERNAL_POTENTIAL_ISOTHERMAL], [1], [Isothermal external potential]) ;; - softened-isothermal) - AC_DEFINE([EXTERNAL_POTENTIAL_SOFTENED_ISOTHERMAL], [1], [Softened isothermal external potential]) - ;; disc-patch) AC_DEFINE([EXTERNAL_POTENTIAL_DISC_PATCH], [1], [Disc-patch external potential]) ;; @@ -781,6 +810,7 @@ AC_MSG_RESULT([ FFTW3 enabled : $have_fftw3 libNUMA enabled : $have_numa Using tcmalloc : $have_tcmalloc + Using jemalloc : $have_jemalloc CPU profiler : $have_profiler Hydro scheme : $with_hydro @@ -795,5 +825,8 @@ AC_MSG_RESULT([ Debugging checks : $enable_debugging_checks ]) +# Make sure the latest git revision string gets included +touch src/version.c + # Generate output. AC_OUTPUT diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 2a5aeba7d1db0b1e1e56a9a6eed3059aba6a09ff..0df1f91194b6d1e7e98cb1b75be7d3eaaca7fc32 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -762,6 +762,7 @@ WARN_LOGFILE = INPUT = @top_srcdir@ @top_srcdir@/src @top_srcdir@/tests @top_srcdir@/examples INPUT += @top_srcdir@/src/hydro/Minimal INPUT += @top_srcdir@/src/gravity/Default +INPUT += @top_srcdir@/src/stars/Default INPUT += @top_srcdir@/src/riemann INPUT += @top_srcdir@/src/potential/point_mass INPUT += @top_srcdir@/src/cooling/const_du diff --git a/examples/CoolingBox/coolingBox.yml b/examples/CoolingBox/coolingBox.yml index b90ae61e5c862753227b82ebcec4cbf8f3083fab..7b8dbf4bddf8d994dabf34ea68ea55b32a3b4d8a 100644 --- a/examples/CoolingBox/coolingBox.yml +++ b/examples/CoolingBox/coolingBox.yml @@ -1,27 +1,27 @@ # Define the system of units to use internally. InternalUnitSystem: - UnitMass_in_cgs: 2.0e33 # Solar masses - UnitLength_in_cgs: 3.0857e21 # Kiloparsecs - UnitVelocity_in_cgs: 1.0e5 # Time unit is cooling time - UnitCurrent_in_cgs: 1 # Amperes - UnitTemp_in_cgs: 1 # Kelvin + UnitMass_in_cgs: 2.0e33 # Solar masses + UnitLength_in_cgs: 3.0857e21 # Kiloparsecs + UnitVelocity_in_cgs: 1.0e5 # Kilometers per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin # Parameters governing the time integration TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 4. # The end time of the simulation (in internal units). - dt_min: 1e-4 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). + time_end: 0.25 # The end time of the simulation (in internal units). + dt_min: 1e-5 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). # Parameters governing the snapshots Snapshots: basename: coolingBox # Common part of the name of output files time_first: 0. # Time of the first output (in internal units) - delta_time: 1.0e-1 # Time difference between consecutive outputs (in internal units) + delta_time: 1e-2 # Time difference between consecutive outputs (in internal units) # Parameters governing the conserved quantities statistics Statistics: - delta_time: 1e-2 # Time between statistics output + delta_time: 1e-3 # Time between statistics output # Parameters for the hydrodynamics scheme SPH: @@ -35,8 +35,8 @@ InitialConditions: # Dimensionless pre-factor for the time-step condition LambdaCooling: - lambda_cgs: 1.0e-22 # Cooling rate (in cgs units) - minimum_temperature: 1.0e4 # Minimal temperature (Kelvin) - mean_molecular_weight: 0.59 # Mean molecular weight - hydrogen_mass_abundance: 0.75 # Hydrogen mass abundance (dimensionless) - cooling_tstep_mult: 1.0 # Dimensionless pre-factor for the time-step condition + lambda_cgs: 1.0e-22 # Cooling rate (in cgs units) + minimum_temperature: 1.0e4 # Minimal temperature (Kelvin) + mean_molecular_weight: 0.59 # Mean molecular weight + hydrogen_mass_abundance: 0.75 # Hydrogen mass abundance (dimensionless) + cooling_tstep_mult: 1.0 # Dimensionless pre-factor for the time-step condition diff --git a/examples/CoolingBox/energy_plot.py b/examples/CoolingBox/energy_plot.py index 00e6fd1dfa0ee9bfbb9b5147282776f635b060f5..c8948e7e209c2786ffdecbb2b8b606e73d703238 100644 --- a/examples/CoolingBox/energy_plot.py +++ b/examples/CoolingBox/energy_plot.py @@ -1,99 +1,128 @@ +import matplotlib +matplotlib.use("Agg") +from pylab import * +import h5py + +# Plot parameters +params = {'axes.labelsize': 10, +'axes.titlesize': 10, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 10, +'ytick.labelsize': 10, +'text.usetex': True, + 'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.145, +'figure.subplot.right' : 0.99, +'figure.subplot.bottom' : 0.11, +'figure.subplot.top' : 0.99, +'figure.subplot.wspace' : 0.15, +'figure.subplot.hspace' : 0.12, +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + + import numpy as np -import matplotlib.pyplot as plt import h5py as h5 import sys +# File containing the total energy stats_filename = "./energy.txt" + +# First snapshot snap_filename = "coolingBox_000.hdf5" -#plot_dir = "./" -#some constants in cgs units +# Some constants in cgs units k_b = 1.38E-16 #boltzmann m_p = 1.67e-24 #proton mass -#initial conditions set in makeIC.py -rho = 3.2e3 -P = 4.5e6 -#n_H_cgs = 0.0001 -gamma = 5./3. + +# Initial conditions set in makeIC.py T_init = 1.0e5 -#Read the units parameters from the snapshot +# Read the initial state of the gas f = h5.File(snap_filename,'r') +rho = np.mean(f["/PartType0/Density"]) +pressure = np.mean(f["/PartType0/Pressure"]) + +# Read the units parameters from the snapshot units = f["InternalCodeUnits"] unit_mass = units.attrs["Unit mass in cgs (U_M)"] unit_length = units.attrs["Unit length in cgs (U_L)"] unit_time = units.attrs["Unit time in cgs (U_t)"] + +# Read the properties of the cooling function parameters = f["Parameters"] cooling_lambda = float(parameters.attrs["LambdaCooling:lambda_cgs"]) min_T = float(parameters.attrs["LambdaCooling:minimum_temperature"]) mu = float(parameters.attrs["LambdaCooling:mean_molecular_weight"]) X_H = float(parameters.attrs["LambdaCooling:hydrogen_mass_abundance"]) -#get number of particles -header = f["Header"] -n_particles = header.attrs["NumPart_ThisFile"][0] +# Read the adiabatic index +gamma = float(f["HydroScheme"].attrs["Adiabatic index"]) + +print "Initial density :", rho +print "Initial pressure:", pressure +print "Adiabatic index :", gamma -#read energy and time arrays +# Read energy and time arrays array = np.genfromtxt(stats_filename,skip_header = 1) time = array[:,0] -kin_plus_therm = array[:,2] -radiated = array[:,6] total_mass = array[:,1] - -#ignore first row where there are just zeros -time = time[1:] -kin_plus_therm = kin_plus_therm[1:] -radiated = radiated[1:] -total_mass = total_mass[1:] - -total_energy = kin_plus_therm + radiated +total_energy = array[:,2] +kinetic_energy = array[:,3] +internal_energy = array[:,4] +radiated_energy = array[:,8] initial_energy = total_energy[0] -#conversions to cgs + +# Conversions to cgs rho_cgs = rho * unit_mass / (unit_length)**3 time_cgs = time * unit_time -initial_energy_cgs = initial_energy/total_mass[0] * unit_length**2 / (unit_time)**2 -n_H_cgs = X_H * rho_cgs / m_p +total_energy_cgs = total_energy / total_mass[0] * unit_length**2 / (unit_time)**2 +kinetic_energy_cgs = kinetic_energy / total_mass[0] * unit_length**2 / (unit_time)**2 +internal_energy_cgs = internal_energy / total_mass[0] * unit_length**2 / (unit_time)**2 +radiated_energy_cgs = radiated_energy / total_mass[0] * unit_length**2 / (unit_time)**2 -#find the energy floor +# Find the energy floor u_floor_cgs = k_b * min_T / (mu * m_p * (gamma - 1.)) -#find analytic solution -analytic_time_cgs = np.linspace(0,time_cgs[-1],1000) +# Find analytic solution +initial_energy_cgs = initial_energy/total_mass[0] * unit_length**2 / (unit_time)**2 +n_H_cgs = X_H * rho_cgs / m_p du_dt_cgs = -cooling_lambda * n_H_cgs**2 / rho_cgs +cooling_time_cgs = (initial_energy_cgs/(-du_dt_cgs))[0] +analytic_time_cgs = np.linspace(0, cooling_time_cgs * 1.8, 1000) u_analytic_cgs = du_dt_cgs*analytic_time_cgs + initial_energy_cgs -cooling_time_cgs = initial_energy_cgs/(-du_dt_cgs) - -for i in range(u_analytic_cgs.size): - if u_analytic_cgs[i]<u_floor_cgs: - u_analytic_cgs[i] = u_floor_cgs - -#rescale analytic solution -u_analytic = u_analytic_cgs/initial_energy_cgs - -#put time in units of cooling_time -time=time_cgs/cooling_time_cgs -analytic_time = analytic_time_cgs/cooling_time_cgs - -#rescale (numerical) energy by initial energy -radiated /= initial_energy -kin_plus_therm /= initial_energy -total_energy = kin_plus_therm + radiated -plt.plot(time,kin_plus_therm,'kd',label = "Kinetic + thermal energy") -plt.plot(time,radiated,'bo',label = "Radiated energy") -plt.plot(time,total_energy,'g',label = "Total energy") -plt.plot(analytic_time,u_analytic,'r',lw = 2.0,label = "Analytic Solution") -#plt.plot(analytic_time,1-u_analytic,'k',lw = 2.0) -#plt.plot((cooling_time,cooling_time),(0,1),'b',label = "Cooling time") -#plt.plot((time[1]-time_cgs[0],time_cgs[1]-time_cgs[0]),(0,1),'m',label = "First output") -#plt.title(r"$n_H = %1.1e \, \mathrm{cm}^{-3}$" %n_H_cgs) -plt.xlabel("Time / cooling time") -plt.ylabel("Energy / Initial energy") -#plt.ylim(0,1.1) -plt.ylim(0.999,1.001) -#plt.xlim(0,min(10,time[-1])) -plt.legend(loc = "upper right") -if (int(sys.argv[1])==0): - plt.show() -else: - plt.savefig(full_plot_filename,format = "png") - plt.close() +u_analytic_cgs[u_analytic_cgs < u_floor_cgs] = u_floor_cgs + +print "Cooling time:", cooling_time_cgs, "[s]" + +# Read snapshots +u_snapshots_cgs = zeros(25) +t_snapshots_cgs = zeros(25) +for i in range(25): + snap = h5.File("coolingBox_%0.3d.hdf5"%i,'r') + u_snapshots_cgs[i] = sum(snap["/PartType0/InternalEnergy"][:] * snap["/PartType0/Masses"][:]) / total_mass[0] * unit_length**2 / (unit_time)**2 + t_snapshots_cgs[i] = snap["/Header"].attrs["Time"] * unit_time + + +figure() +plot(time_cgs, total_energy_cgs, 'r-', lw=1.6, label="Gas total energy") +plot(t_snapshots_cgs, u_snapshots_cgs, 'rD', ms=3) +plot(time_cgs, radiated_energy_cgs, 'g-', lw=1.6, label="Radiated energy") +plot(time_cgs, total_energy_cgs + radiated_energy_cgs, 'b-', lw=0.6, label="Gas total + radiated") + +plot(analytic_time_cgs, u_analytic_cgs, '--', color='k', alpha=0.8, lw=1.0, label="Analytic solution") + +legend(loc="upper right", fontsize=8, frameon=False, handlelength=3, ncol=1) +xlabel("${\\rm{Time~[s]}}$", labelpad=0) +ylabel("${\\rm{Energy~[erg]}}$") +xlim(0, 1.5*cooling_time_cgs) +ylim(0, 1.5*u_analytic_cgs[0]) + +savefig("energy.png", dpi=200) + + diff --git a/examples/CoolingBox/getGlass.sh b/examples/CoolingBox/getGlass.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffd92e88deae6e91237059adac2a6c2067caee46 --- /dev/null +++ b/examples/CoolingBox/getGlass.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/glassCube_32.hdf5 diff --git a/examples/CoolingBox/makeIC.py b/examples/CoolingBox/makeIC.py index 5de012a17af4eef71e56548602e7956faef529f5..f863e174b1fcd404ae178fe324c7a165598b4af0 100644 --- a/examples/CoolingBox/makeIC.py +++ b/examples/CoolingBox/makeIC.py @@ -1,6 +1,6 @@ ############################################################################### # This file is part of SWIFT. - # Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durhama.ac.uk) # Matthieu Schaller (matthieu.schaller@durham.ac.uk) # # This program is free software: you can redistribute it and/or modify @@ -22,13 +22,11 @@ import h5py import sys from numpy import * -# Generates a swift IC file containing a cartesian distribution of particles -# at a constant density and pressure in a cubic box +# Generates a SWIFT IC file with a constant density and pressure # Parameters periodic= 1 # 1 For periodic box boxSize = 1 # 1 kiloparsec -L = int(sys.argv[1]) # Number of particles along one axis rho = 3.2e3 # Density in code units (3.2e6 is 0.1 hydrogen atoms per cm^3) P = 4.5e6 # Pressure in code units (at 10^5K) gamma = 5./3. # Gas adiabatic index @@ -36,12 +34,17 @@ eta = 1.2349 # 48 ngbs with cubic spline kernel fileName = "coolingBox.hdf5" #--------------------------------------------------- -numPart = L**3 -mass = boxSize**3 * rho / numPart -print mass -internalEnergy = P / ((gamma - 1.)*rho) -#-------------------------------------------------- +# Read id, position and h from glass +glass = h5py.File("glassCube_32.hdf5", "r") +ids = glass["/PartType0/ParticleIDs"][:] +pos = glass["/PartType0/Coordinates"][:,:] * boxSize +h = glass["/PartType0/SmoothingLength"][:] * boxSize + +# Compute basic properties +numPart = size(pos) / 3 +mass = boxSize**3 * rho / numPart +internalEnergy = P / ((gamma - 1.) * rho) #File file = h5py.File(fileName, 'w') @@ -57,11 +60,11 @@ grp.attrs["NumFilesPerSnapshot"] = 1 grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] grp.attrs["Flag_Entropy_ICs"] = 0 -#Runtime parameters +# Runtime parameters grp = file.create_group("/RuntimePars") grp.attrs["PeriodicBoundariesOn"] = periodic -#Units +# Units grp = file.create_group("/Units") grp.attrs["Unit length in cgs (U_L)"] = 3.0857e21 grp.attrs["Unit mass in cgs (U_M)"] = 2.0e33 @@ -75,35 +78,26 @@ grp = file.create_group("/PartType0") v = zeros((numPart, 3)) ds = grp.create_dataset('Velocities', (numPart, 3), 'f') ds[()] = v -v = zeros(1) m = full((numPart, 1), mass) ds = grp.create_dataset('Masses', (numPart,1), 'f') ds[()] = m -m = zeros(1) -h = full((numPart, 1), eta * boxSize / L) -ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f') +h = reshape(h, (numPart, 1)) +ds = grp.create_dataset('SmoothingLength', (numPart, 1), 'f') ds[()] = h -h = zeros(1) u = full((numPart, 1), internalEnergy) ds = grp.create_dataset('InternalEnergy', (numPart,1), 'f') ds[()] = u -u = zeros(1) - -ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1)) +ids = reshape(ids, (numPart, 1)) ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L') -ds[()] = ids + 1 -x = ids % L; -y = ((ids - x) / L) % L; -z = (ids - x - L * y) / L**2; -coords = zeros((numPart, 3)) -coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L) -coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L) -coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L) +ds[()] = ids + ds = grp.create_dataset('Coordinates', (numPart, 3), 'd') -ds[()] = coords +ds[()] = pos file.close() + +print numPart diff --git a/examples/CoolingBox/run.sh b/examples/CoolingBox/run.sh index cb3264808d57b435c9f65bf5a684a94ff9f878fd..19e787df716145c1f5aa7744f4c7204c1c7f1064 100755 --- a/examples/CoolingBox/run.sh +++ b/examples/CoolingBox/run.sh @@ -1,14 +1,20 @@ + #!/bin/bash # Generate the initial conditions if they are not present. -echo "Generating initial conditions for the cooling box example..." - -python makeIC.py 10 - -../swift -s -C -t 16 coolingBox.yml - -#-C 2>&1 | tee output.log +if [ ! -e glassCube_32.hdf5 ] +then + echo "Fetching initial glass file for the cooling box example..." + ./getGlass.sh +fi +if [ ! -e coolingBox.hdf5 ] +then + echo "Generating initial conditions for the cooling box example..." + python makeIC.py +fi -python energy_plot.py 0 +# Run SWIFT +../swift -s -C -t 1 coolingBox.yml -#python test_energy_conservation.py 0 +# Check energy conservation and cooling rate +python energy_plot.py diff --git a/examples/CoolingBox/test_energy_conservation.py b/examples/CoolingBox/test_energy_conservation.py deleted file mode 100644 index bb15071c0668d71580015351ce75ce18390c8cf0..0000000000000000000000000000000000000000 --- a/examples/CoolingBox/test_energy_conservation.py +++ /dev/null @@ -1,116 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -import h5py as h5 -import sys - -stats_filename = "./energy.txt" -snap_filename = "coolingBox_000.hdf5" -#plot_dir = "./" -n_snaps = 41 -time_end = 4.0 -dt_snap = 0.1 -#some constants in cgs units -k_b = 1.38E-16 #boltzmann -m_p = 1.67e-24 #proton mass -#initial conditions set in makeIC.py -rho = 4.8e3 -P = 4.5e6 -#n_H_cgs = 0.0001 -gamma = 5./3. -T_init = 1.0e5 - -#find the sound speed - -#Read the units parameters from the snapshot -f = h5.File(snap_filename,'r') -units = f["InternalCodeUnits"] -unit_mass = units.attrs["Unit mass in cgs (U_M)"] -unit_length = units.attrs["Unit length in cgs (U_L)"] -unit_time = units.attrs["Unit time in cgs (U_t)"] -parameters = f["Parameters"] -cooling_lambda = float(parameters.attrs["LambdaCooling:lambda_cgs"]) -min_T = float(parameters.attrs["LambdaCooling:minimum_temperature"]) -mu = float(parameters.attrs["LambdaCooling:mean_molecular_weight"]) -X_H = float(parameters.attrs["LambdaCooling:hydrogen_mass_abundance"]) - -#get number of particles -header = f["Header"] -n_particles = header.attrs["NumPart_ThisFile"][0] -#read energy and time arrays -array = np.genfromtxt(stats_filename,skip_header = 1) -time = array[:,0] -total_energy = array[:,2] -total_mass = array[:,1] - -time = time[1:] -total_energy = total_energy[1:] -total_mass = total_mass[1:] - -#conversions to cgs -rho_cgs = rho * unit_mass / (unit_length)**3 -time_cgs = time * unit_time -u_init_cgs = total_energy[0]/(total_mass[0]) * unit_length**2 / (unit_time)**2 -n_H_cgs = X_H * rho_cgs / m_p - -#find the sound speed in cgs -c_s = np.sqrt((gamma - 1.)*k_b*T_init/(mu*m_p)) -#assume box size is unit length -sound_crossing_time = unit_length/c_s - -print "Sound speed = %g cm/s" %c_s -print "Sound crossing time = %g s" %sound_crossing_time -#find the energy floor -u_floor_cgs = k_b * min_T / (mu * m_p * (gamma - 1.)) -#find analytic solution -analytic_time_cgs = np.linspace(time_cgs[0],time_cgs[-1],1000) -du_dt_cgs = -cooling_lambda * n_H_cgs**2 / rho_cgs -u_analytic = du_dt_cgs*(analytic_time_cgs - analytic_time_cgs[0]) + u_init_cgs -cooling_time = u_init_cgs/(-du_dt_cgs) - -#put time in units of sound crossing time -time=time_cgs/sound_crossing_time -analytic_time = analytic_time_cgs/sound_crossing_time -#rescale energy to initial energy -total_energy /= total_energy[0] -u_analytic /= u_init_cgs -u_floor_cgs /= u_init_cgs -# plot_title = r"$\Lambda \, = \, %1.1g \mathrm{erg}\mathrm{cm^3}\mathrm{s^{-1}} \, \, T_{init} = %1.1g\mathrm{K} \, \, T_{floor} = %1.1g\mathrm{K} \, \, n_H = %1.1g\mathrm{cm^{-3}}$" %(cooling_lambda,T_init,T_floor,n_H) -# plot_filename = "energy_plot_creasey_no_cooling_T_init_1p0e5_n_H_0p1.png" -#analytic_solution = np.zeros(n_snaps-1) -for i in range(u_analytic.size): - if u_analytic[i]<u_floor_cgs: - u_analytic[i] = u_floor_cgs -plt.plot(time-time[0],total_energy,'k',label = "Numerical solution from energy.txt") -plt.plot(analytic_time-analytic_time[0],u_analytic,'r',lw = 2.0,label = "Analytic Solution") - -#now get energies from the snapshots -snapshot_time = np.linspace(0,time_end,num = n_snaps) -snapshot_time = snapshot_time[1:] -snapshot_time_cgs = snapshot_time * unit_time -snapshot_time = snapshot_time_cgs/ sound_crossing_time -snapshot_time -= snapshot_time[0] -snapshot_energy = np.zeros(n_snaps) -for i in range(0,n_snaps): - snap_filename = "coolingBox_%03d.hdf5" %i - f = h5.File(snap_filename,'r') - snapshot_internal_energy_array = np.array(f["PartType0/InternalEnergy"]) - total_internal_energy = np.sum(snapshot_internal_energy_array) - velocity_array = np.array(f["PartType0/Velocities"]) - total_kinetic_energy = 0.5*np.sum(velocity_array**2) - snapshot_energy[i] = total_internal_energy + total_kinetic_energy -snapshot_energy/=snapshot_energy[0] -snapshot_energy = snapshot_energy[1:] - -plt.plot(snapshot_time,snapshot_energy,'bd',label = "Numerical solution from snapshots") - -#plt.title(r"$n_H = %1.1e \, \mathrm{cm}^{-3}$" %n_H_cgs) -plt.xlabel("Time (sound crossing time)") -plt.ylabel("Energy/Initial energy") -plt.ylim(0.99,1.01) -#plt.xlim(0,min(10,time[-1])) -plt.legend(loc = "upper right") -if (int(sys.argv[1])==0): - plt.show() -else: - plt.savefig(full_plot_filename,format = "png") - plt.close() diff --git a/examples/CoolingHalo/cooling_halo.yml b/examples/CoolingHalo/cooling_halo.yml index c06b099eb0dd06d39040e0ecc8e8f1320a89ac6b..e8978ad6c96017d9b5fbe35346555e6b59bc7e7d 100644 --- a/examples/CoolingHalo/cooling_halo.yml +++ b/examples/CoolingHalo/cooling_halo.yml @@ -37,7 +37,7 @@ InitialConditions: shift_z: 0. # External potential parameters -SoftenedIsothermalPotential: +IsothermalPotential: position_x: 0. # location of centre of isothermal potential in internal units position_y: 0. position_z: 0. diff --git a/examples/CoolingHaloWithSpin/cooling_halo.yml b/examples/CoolingHaloWithSpin/cooling_halo.yml index 684dd11fcf7adc9477d199e599dfb5b76faa91f6..fc5094f9f5dcae62bb936d2b5510f41e3c70504e 100644 --- a/examples/CoolingHaloWithSpin/cooling_halo.yml +++ b/examples/CoolingHaloWithSpin/cooling_halo.yml @@ -9,8 +9,8 @@ InternalUnitSystem: # Parameters governing the time integration TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 10. # The end time of the simulation (in internal units). - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). + time_end: 10. # The end time of the simulation (in internal units). + dt_min: 1e-5 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-1 # The maximal time-step size of the simulation (in internal units). # Parameters governing the conserved quantities statistics @@ -34,13 +34,13 @@ InitialConditions: file_name: CoolingHalo.hdf5 # The file to read # External potential parameters -SoftenedIsothermalPotential: - position_x: 0. # location of centre of isothermal potential in internal units +IsothermalPotential: + position_x: 0. # Location of centre of isothermal potential in internal units position_y: 0. position_z: 0. - vrot: 200. # rotation speed of isothermal potential in internal units - timestep_mult: 0.03 # controls time step - epsilon: 1.0 #softening for the isothermal potential + vrot: 200. # Rotation speed of isothermal potential in internal units + timestep_mult: 0.03 # Controls time step + epsilon: 1.0 # Softening for the isothermal potential # Cooling parameters LambdaCooling: diff --git a/examples/CoolingHaloWithSpin/density_profile.py b/examples/CoolingHaloWithSpin/density_profile.py index ea282328e5b75530a128eab2dec5f065e46cf819..fb88ddd6aea71603a6f6fcb36b13771106737e6a 100644 --- a/examples/CoolingHaloWithSpin/density_profile.py +++ b/examples/CoolingHaloWithSpin/density_profile.py @@ -28,7 +28,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"]) unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"]) unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"]) unit_time_cgs = unit_length_cgs / unit_velocity_cgs -v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"]) +v_c = float(params.attrs["IsothermalPotential:vrot"]) v_c_cgs = v_c * unit_velocity_cgs lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"]) X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"]) @@ -101,18 +101,18 @@ for i in range(n_snaps): rho_0 = density[0] rho_analytic_init = rho_0 * (radial_bin_mids/r_0)**(-2) - plt.plot(radial_bin_mids,density/rho_analytic_init,'ko',label = "Average density of shell") - #plt.plot(t,rho_analytic,label = "Initial analytic density profile" + plt.plot(radial_bin_mids,density,'ko',label = "Average density of shell") + plt.plot(radial_bin_mids,rho_analytic_init,label = "Initial analytic density profile") plt.xlabel(r"$r / r_{vir}$") - plt.ylabel(r"$\rho / \rho_{init})$") + plt.ylabel(r"$(\rho / \rho_{init})$") plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c)) #plt.ylim((1.e-2,1.e1)) - plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,20),'r',label = "Cooling radius") + plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(1.0e-4,1.0e4),'r',label = "Cooling radius") plt.xlim((radial_bin_mids[0],max_r)) - plt.ylim((0,20)) - plt.plot((0,max_r),(1,1)) + plt.ylim((1.0e-4,1.0e4)) + #plt.plot((0,max_r),(1,1)) #plt.xscale('log') - #plt.yscale('log') + plt.yscale('log') plt.legend(loc = "upper right") plot_filename = "density_profile_%03d.png" %i plt.savefig(plot_filename,format = "png") diff --git a/examples/CoolingHaloWithSpin/internal_energy_profile.py b/examples/CoolingHaloWithSpin/internal_energy_profile.py index a3e470cc24a939c9bc915371e927d9bd39196bff..5f71d69ca7a978de242559f84ec390faa86a27f0 100644 --- a/examples/CoolingHaloWithSpin/internal_energy_profile.py +++ b/examples/CoolingHaloWithSpin/internal_energy_profile.py @@ -46,7 +46,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"]) unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"]) unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"]) unit_time_cgs = unit_length_cgs / unit_velocity_cgs -v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"]) +v_c = float(params.attrs["IsothermalPotential:vrot"]) v_c_cgs = v_c * unit_velocity_cgs lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"]) X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"]) diff --git a/examples/CoolingHaloWithSpin/makeIC.py b/examples/CoolingHaloWithSpin/makeIC.py index 8970fbaa70578532a4f41bab7a096d8fa3565d26..a6d57868ad7542498b27007a5c3ef9234b9feb84 100644 --- a/examples/CoolingHaloWithSpin/makeIC.py +++ b/examples/CoolingHaloWithSpin/makeIC.py @@ -36,6 +36,7 @@ h = 0.67777 # hubble parameter gamma = 5./3. eta = 1.2349 spin_lambda = 0.05 #spin parameter +f_b = 0.2 #baryon fraction # First set unit velocity and then the circular velocity parameter for the isothermal potential const_unit_velocity_in_cgs = 1.e5 #kms^-1 @@ -99,6 +100,8 @@ grp.attrs["PeriodicBoundariesOn"] = periodic # set seed for random number np.random.seed(1234) +gas_mass = f_b * np.sqrt(3.) / 2. #virial mass of halo is 1, virial radius is 1, enclosed mass scales with r +gas_particle_mass = gas_mass / float(N) # Positions # r^(-2) distribution corresponds to uniform distribution in radius @@ -164,12 +167,12 @@ N = x_coords.size print "Number of particles in the box = " , N #make the coords and radius arrays again -coords_2 = np.zeros((N,3)) -coords_2[:,0] = x_coords -coords_2[:,1] = y_coords -coords_2[:,2] = z_coords +coords= np.zeros((N,3)) +coords[:,0] = x_coords +coords[:,1] = y_coords +coords[:,2] = z_coords -radius = np.sqrt((coords_2[:,0]-boxSize/2.)**2 + (coords_2[:,1]-boxSize/2.)**2 + (coords_2[:,2]-boxSize/2.)**2) +radius = np.sqrt((coords[:,0]-boxSize/2.)**2 + (coords[:,1]-boxSize/2.)**2 + (coords[:,2]-boxSize/2.)**2) #now give particle's velocities v = np.zeros((N,3)) @@ -184,7 +187,7 @@ print "J =", J omega = np.zeros((N,3)) for i in range(N): omega[i,2] = 3.*J / radius[i] - v[i,:] = np.cross(omega[i,:],(coords_2[i,:]-boxSize/2.)) + v[i,:] = np.cross(omega[i,:],(coords[i,:]-boxSize/2.)) # Header grp = file.create_group("/Header") @@ -202,16 +205,15 @@ grp.attrs["Dimension"] = 3 grp = file.create_group("/PartType0") ds = grp.create_dataset('Coordinates', (N, 3), 'd') -ds[()] = coords_2 -coords_2 = np.zeros(1) +ds[()] = coords +coords = np.zeros(1) ds = grp.create_dataset('Velocities', (N, 3), 'f') ds[()] = v v = np.zeros(1) # All particles of equal mass -mass = 1. / N -m = np.full((N,),mass) +m = np.full((N,),gas_particle_mass) ds = grp.create_dataset('Masses', (N, ), 'f') ds[()] = m m = np.zeros(1) diff --git a/examples/CoolingHaloWithSpin/run.sh b/examples/CoolingHaloWithSpin/run.sh index 3a0d9c02000e760b030a96107038d3c6163f3227..131fbf3cb10d2014546683b5f43194840544fd55 100755 --- a/examples/CoolingHaloWithSpin/run.sh +++ b/examples/CoolingHaloWithSpin/run.sh @@ -4,7 +4,8 @@ echo "Generating initial conditions for the isothermal potential box example..." python makeIC.py 10000 -../swift -g -s -C -t 16 cooling_halo.yml 2>&1 | tee output.log +# Run SWIFT with external potential, SPH and cooling +../swift -g -s -C -t 1 cooling_halo.yml 2>&1 | tee output.log # python radial_profile.py 10 diff --git a/examples/CoolingHaloWithSpin/velocity_profile.py b/examples/CoolingHaloWithSpin/velocity_profile.py index d64d255b18482bc26578f21f46199aa3540ae7b5..07df8e1b0751307513c30a5b128773b193c3a9cd 100644 --- a/examples/CoolingHaloWithSpin/velocity_profile.py +++ b/examples/CoolingHaloWithSpin/velocity_profile.py @@ -46,7 +46,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"]) unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"]) unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"]) unit_time_cgs = unit_length_cgs / unit_velocity_cgs -v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"]) +v_c = float(params.attrs["IsothermalPotential:vrot"]) v_c_cgs = v_c * unit_velocity_cgs header = f["Header"] N = header.attrs["NumPart_Total"][0] diff --git a/examples/EAGLE_100/README b/examples/EAGLE_100/README new file mode 100644 index 0000000000000000000000000000000000000000..e3af3c0e1281f8e9ba9e0aae3fa6dd8475359a47 --- /dev/null +++ b/examples/EAGLE_100/README @@ -0,0 +1,16 @@ +ICs extracted from the EAGLE suite of simulations. + +WARNING: The ICs are 217GB in size. They contain ~3.4G DM particles, +~3.2G gas particles and ~170M star particles + +The particle distribution here is the snapshot 27 (z=0.1) of the 100Mpc +Ref-model. h- and a- factors from the original Gadget code have been +corrected for. Variables not used in a pure hydro & gravity code have +been removed. +Everything is ready to be run without cosmological integration. + +The particle load of the main EAGLE simulation can be reproduced by +running these ICs on 4096 cores. + +MD5 checksum of the ICs: +2301ea73e14207b541bbb04163c5269e EAGLE_ICs_100.hdf5 diff --git a/examples/EAGLE_100/eagle_100.yml b/examples/EAGLE_100/eagle_100.yml new file mode 100644 index 0000000000000000000000000000000000000000..a9b83b81f085e66b36d115c5265b66d6093ffdfb --- /dev/null +++ b/examples/EAGLE_100/eagle_100.yml @@ -0,0 +1,35 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.989e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.085678e24 # Mpc in centimeters + UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1e-2 # The end time of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: eagle # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 1e-3 # Time difference between consecutive outputs (in internal units) + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./EAGLE_ICs_100.hdf5 # The file to read + diff --git a/examples/EAGLE_100/getIC.sh b/examples/EAGLE_100/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..227df3f9f79d294cd8ccbfd3b72b02dfbea2ebd6 --- /dev/null +++ b/examples/EAGLE_100/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/EAGLE_ICs_100.hdf5 diff --git a/examples/EAGLE_100/run.sh b/examples/EAGLE_100/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..6ef47d5d98172cc8a318242923ede37332bd5590 --- /dev/null +++ b/examples/EAGLE_100/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e EAGLE_ICs_100.hdf5 ] +then + echo "Fetching initial conditions for the EAGLE 100Mpc example..." + ./getIC.sh +fi + +../swift -s -t 16 eagle_100.yml 2>&1 | tee output.log + diff --git a/examples/ExternalPointMass/energy_plot.py b/examples/ExternalPointMass/energy_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..a75fcb835d33b3695170aab822092556f12db7d1 --- /dev/null +++ b/examples/ExternalPointMass/energy_plot.py @@ -0,0 +1,121 @@ +import matplotlib +matplotlib.use("Agg") +from pylab import * +import h5py + +# Plot parameters +params = {'axes.labelsize': 10, +'axes.titlesize': 10, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 10, +'ytick.labelsize': 10, +'text.usetex': True, + 'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.145, +'figure.subplot.right' : 0.99, +'figure.subplot.bottom' : 0.11, +'figure.subplot.top' : 0.99, +'figure.subplot.wspace' : 0.15, +'figure.subplot.hspace' : 0.12, +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + + +import numpy as np +import h5py as h5 +import sys + +# File containing the total energy +stats_filename = "./energy.txt" + +# First snapshot +snap_filename = "pointMass_000.hdf5" +f = h5.File(snap_filename,'r') + +# Read the units parameters from the snapshot +units = f["InternalCodeUnits"] +unit_mass = units.attrs["Unit mass in cgs (U_M)"] +unit_length = units.attrs["Unit length in cgs (U_L)"] +unit_time = units.attrs["Unit time in cgs (U_t)"] + +G = 6.67408e-8 * unit_mass * unit_time**2 / unit_length**3 + +# Read the header +header = f["Header"] +box_size = float(header.attrs["BoxSize"][0]) + +# Read the properties of the potential +parameters = f["Parameters"] +mass = float(parameters.attrs["PointMassPotential:mass"]) +centre = [box_size/2, box_size/2, box_size/2] +f.close() + +# Read the statistics summary +file_energy = np.loadtxt("energy.txt") +time_stats = file_energy[:,0] +E_kin_stats = file_energy[:,3] +E_pot_stats = file_energy[:,5] +E_tot_stats = E_kin_stats + E_pot_stats + +# Read the snapshots +time_snap = np.zeros(402) +E_kin_snap = np.zeros(402) +E_pot_snap = np.zeros(402) +E_tot_snap = np.zeros(402) +Lz_snap = np.zeros(402) + +# Read all the particles from the snapshots +for i in range(402): + snap_filename = "pointMass_%0.3d.hdf5"%i + f = h5.File(snap_filename,'r') + + pos_x = f["PartType1/Coordinates"][:,0] + pos_y = f["PartType1/Coordinates"][:,1] + pos_z = f["PartType1/Coordinates"][:,2] + vel_x = f["PartType1/Velocities"][:,0] + vel_y = f["PartType1/Velocities"][:,1] + vel_z = f["PartType1/Velocities"][:,2] + m = f["/PartType1/Masses"][:] + + r = np.sqrt((pos_x[:] - centre[0])**2 + (pos_y[:] - centre[1])**2 + (pos_z[:] - centre[2])**2) + Lz = (pos_x[:] - centre[0]) * vel_y[:] - (pos_y[:] - centre[1]) * vel_x[:] + + time_snap[i] = f["Header"].attrs["Time"] + E_kin_snap[i] = np.sum(0.5 * m * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2)) + E_pot_snap[i] = np.sum(-mass * m * G / r) + E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i] + Lz_snap[i] = np.sum(Lz) + +# Plot energy evolution +figure() +plot(time_stats, E_kin_stats, "r-", lw=0.5, label="Kinetic energy") +plot(time_stats, E_pot_stats, "g-", lw=0.5, label="Potential energy") +plot(time_stats, E_tot_stats, "k-", lw=0.5, label="Total energy") + +plot(time_snap[::10], E_kin_snap[::10], "rD", lw=0.5, ms=2) +plot(time_snap[::10], E_pot_snap[::10], "gD", lw=0.5, ms=2) +plot(time_snap[::10], E_tot_snap[::10], "kD", lw=0.5, ms=2) + +legend(loc="center right", fontsize=8, frameon=False, handlelength=3, ncol=1) +xlabel("${\\rm{Time}}$", labelpad=0) +ylabel("${\\rm{Energy}}$",labelpad=0) +xlim(0, 8) + +savefig("energy.png", dpi=200) + +# Plot angular momentum evolution +figure() +plot(time_snap, Lz_snap, "k-", lw=0.5, ms=2) + +xlabel("${\\rm{Time}}$", labelpad=0) +ylabel("${\\rm{Angular~momentum}}$",labelpad=0) +xlim(0, 8) + +savefig("angular_momentum.png", dpi=200) + + diff --git a/examples/ExternalPointMass/externalPointMass.yml b/examples/ExternalPointMass/externalPointMass.yml index 621a66bbc39838ac8d3d8a8a3992b2a7be3157a8..20b5bb3aa613d553d8c401e968d8ebfc0572e610 100644 --- a/examples/ExternalPointMass/externalPointMass.yml +++ b/examples/ExternalPointMass/externalPointMass.yml @@ -9,7 +9,7 @@ InternalUnitSystem: # Parameters governing the time integration TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 1. # The end time of the simulation (in internal units). + time_end: 8. # The end time of the simulation (in internal units). dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). @@ -31,7 +31,7 @@ SPH: # Parameters related to the initial conditions InitialConditions: - file_name: Sphere.hdf5 # The file to read + file_name: PointMass.hdf5 # The file to read shift_x: 50. # A shift to apply to all particles read from the ICs (in internal units). shift_y: 50. shift_z: 50. diff --git a/examples/ExternalPointMass/makeIC.py b/examples/ExternalPointMass/makeIC.py index 326183398933c88d7348e72e00343064b3e3a64c..ba415daf9e03058239599cc08039fc89e0929393 100644 --- a/examples/ExternalPointMass/makeIC.py +++ b/examples/ExternalPointMass/makeIC.py @@ -24,10 +24,10 @@ import numpy import math import random -# Generates a random distriution of particles, for motion in an external potnetial centred at (0,0,0) +# Generates a random distriution of particles, for motion in an external potential centred at (0,0,0) # physical constants in cgs -NEWTON_GRAVITY_CGS = 6.672e-8 +NEWTON_GRAVITY_CGS = 6.67408e-8 SOLAR_MASS_IN_CGS = 1.9885e33 PARSEC_IN_CGS = 3.0856776e18 @@ -39,34 +39,28 @@ const_unit_velocity_in_cgs = (1e5) print "UnitMass_in_cgs: ", const_unit_mass_in_cgs print "UnitLength_in_cgs: ", const_unit_length_in_cgs print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs +print "UnitTime_in_cgs: ", const_unit_length_in_cgs / const_unit_velocity_in_cgs # derived units const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs) const_G = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs))) -print 'G=', const_G +print '---------------------' +print 'G in internal units: ', const_G # Parameters -periodic= 1 # 1 For periodic box -boxSize = 100. # -Radius = boxSize / 4. # maximum radius of particles -G = const_G -Mass = 1e10 +periodic = 1 # 1 For periodic box +boxSize = 100. # +max_radius = boxSize / 4. # maximum radius of particles +Mass = 1e10 +print "Mass at the centre: ", Mass -N = int(sys.argv[1]) # Number of particles -L = N**(1./3.) +numPart = int(sys.argv[1]) # Number of particles +mass = 1. -# these are not used but necessary for I/O -rho = 2. # Density -P = 1. # Pressure -gamma = 5./3. # Gas adiabatic index -fileName = "Sphere.hdf5" +fileName = "PointMass.hdf5" -#--------------------------------------------------- -numPart = N -mass = 1 -internalEnergy = P / ((gamma - 1.)*rho) #-------------------------------------------------- @@ -98,25 +92,26 @@ grp.attrs["Unit current in cgs (U_I)"] = 1. grp.attrs["Unit temperature in cgs (U_T)"] = 1. #Particle group -#grp0 = file.create_group("/PartType0") grp1 = file.create_group("/PartType1") + #generate particle positions -radius = Radius * (numpy.random.rand(N))**(1./3.) -ctheta = -1. + 2 * numpy.random.rand(N) -stheta = numpy.sqrt(1.-ctheta**2) -phi = 2 * math.pi * numpy.random.rand(N) +radius = max_radius * (numpy.random.rand(numPart))**(1./3.) +print '---------------------' +print 'Radius: minimum = ',min(radius) +print 'Radius: maximum = ',max(radius) +radius = numpy.sort(radius) r = numpy.zeros((numPart, 3)) -# r[:,0] = radius * stheta * numpy.cos(phi) -# r[:,1] = radius * stheta * numpy.sin(phi) -# r[:,2] = radius * ctheta r[:,0] = radius -# -speed = numpy.sqrt(G * Mass / radius) -v = numpy.zeros((numPart, 3)) + +#generate particle velocities +speed = numpy.sqrt(const_G * Mass / radius) omega = speed / radius period = 2.*math.pi/omega -print 'period = minimum = ',min(period), ' maximum = ',max(period) +print '---------------------' +print 'Period: minimum = ',min(period) +print 'Period: maximum = ',max(period) +v = numpy.zeros((numPart, 3)) v[:,0] = -omega * r[:,1] v[:,1] = omega * r[:,0] @@ -129,17 +124,6 @@ ds = grp1.create_dataset('Masses', (numPart,), 'f') ds[()] = m m = numpy.zeros(1) -h = numpy.full((numPart, ), 1.1255 * boxSize / L) -ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f') -ds[()] = h -h = numpy.zeros(1) - -u = numpy.full((numPart, ), internalEnergy) -ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f') -ds[()] = u -u = numpy.zeros(1) - - ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False) ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L') ds[()] = ids diff --git a/examples/ExternalPointMass/run.sh b/examples/ExternalPointMass/run.sh index 4ac513f09cb8ac8dcefc256a68478e215b8bc320..e074c384c4e002a161c7d8258e9068663204099f 100755 --- a/examples/ExternalPointMass/run.sh +++ b/examples/ExternalPointMass/run.sh @@ -1,10 +1,13 @@ #!/bin/bash # Generate the initial conditions if they are not present. -if [ ! -e Sphere.hdf5 ] +if [ ! -e PointMass.hdf5 ] then echo "Generating initial conditions for the point mass potential box example..." python makeIC.py 10000 fi +rm -rf pointMass_*.hdf5 ../swift -g -t 1 externalPointMass.yml 2>&1 | tee output.log + +python energy_plot.py diff --git a/examples/ExternalPointMass/test.pro b/examples/ExternalPointMass/test.pro deleted file mode 100644 index 21c10e9d27daa45b085c6a659ba3cf7260f017fb..0000000000000000000000000000000000000000 --- a/examples/ExternalPointMass/test.pro +++ /dev/null @@ -1,65 +0,0 @@ -; -; test energy / angular momentum conservation of test problem -; -@physunits - -indir = '/gpfs/data/tt/Codes/Swift-git/swiftsim/examples/' -basefile = 'output_' -nfiles = 657 -nfollow = 100 ; number of particles to follow -eout = fltarr(nfollow, nfiles) -ekin = fltarr(nfollow, nfiles) -epot = fltarr(nfollow, nfiles) -tout = fltarr(nfiles) -; set properties of potential -uL = 1e3 * phys.pc ; unit of length -uM = phys.msun ; unit of mass -uV = 1d5 ; unit of velocity - -; derived units -constG = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ; -pcentre = [50.,50.,50.] * 1d3 * pc / uL -mextern = 1d10 * msun / uM -; -; -; -ifile = 0 -for ifile=0,nfiles-1 do begin -;for ifile=0,3 do begin - inf = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5' - time = h5ra(inf, 'Header','Time') - p = h5rd(inf,'PartType1/Coordinates') - v = h5rd(inf,'PartType1/Velocities') - id = h5rd(inf,'PartType1/ParticleIDs') - indx = sort(id) -; - id = id[indx] - for ic=0,2 do begin - tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx] - tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx] - endfor -; calculate energy - dd = size(p,/dimen) & npart = dd[1] - ener = fltarr(npart) - dr = fltarr(npart) & dv = dr - for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2 - for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2 - dr = sqrt(dr) -; print,'time = ',time,p[0,0],v[0,0],id[0] - ek = 0.5 * dv - ep = - constG * mextern / dr - ener = ek + ep - tout(ifile) = time - eout(*,ifile) = ener[0:nfollow-1] - ekin(*,ifile) = ek[0:nfollow-1] - epot(*,ifile) = ep[0:nfollow-1] -endfor - -; calculate relative energy change -de = 0.0 * eout -for ifile=1, nfiles -1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0] - - -end - - diff --git a/examples/Feedback/feedback.pro b/examples/Feedback/feedback.pro deleted file mode 100644 index 02d616fc82f0aeb7011d022d13db9d1d1030e89c..0000000000000000000000000000000000000000 --- a/examples/Feedback/feedback.pro +++ /dev/null @@ -1,24 +0,0 @@ -base = 'Feedback' -inf = 'Feedback_005.hdf5' - -blast = [5.650488e-01, 5.004371e-01, 5.010494e-01] ; location of blast -pos = h5rd(inf,'PartType0/Coordinates') -vel = h5rd(inf,'PartType0/Velocities') -rho = h5rd(inf,'PartType0/Density') -utherm = h5rd(inf,'PartType0/InternalEnergy') - -; shift to centre -for ic=0,2 do pos[ic,*] = pos[ic,*] - blast[ic] - -;; distance from centre -dist = fltarr(n_elements(rho)) -for ic=0,2 do dist = dist + pos[ic,*]^2 -dist = sqrt(dist) - -; radial velocity -vr = fltarr(n_elements(rho)) -for ic=0,2 do vr = vr + pos[ic,*]*vel[ic,*] -vr = vr / dist - -; -end diff --git a/examples/Feedback/feedback.yml b/examples/Feedback/feedback.yml deleted file mode 100644 index de4f7abef1ef538a97a5e38c72b4db5ce2647976..0000000000000000000000000000000000000000 --- a/examples/Feedback/feedback.yml +++ /dev/null @@ -1,43 +0,0 @@ -# Define the system of units to use internally. -InternalUnitSystem: - UnitMass_in_cgs: 1 # Grams - UnitLength_in_cgs: 1 # Centimeters - UnitVelocity_in_cgs: 1 # Centimeters per second - UnitCurrent_in_cgs: 1 # Amperes - UnitTemp_in_cgs: 1 # Kelvin - -# Parameters governing the time integration -TimeIntegration: - time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 5e-2 # The end time of the simulation (in internal units). - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). - -# Parameters governing the snapshots -Snapshots: - basename: Feedback # Common part of the name of output files - time_first: 0. # Time of the first output (in internal units) - delta_time: 1e-2 # Time difference between consecutive outputs (in internal units) - -# Parameters governing the conserved quantities statistics -Statistics: - delta_time: 1e-3 # Time between statistics output - -# Parameters for the hydrodynamics scheme -SPH: - resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. - -# Parameters related to the initial conditions -InitialConditions: - file_name: ./Feedback.hdf5 # The file to read - -# Parameters for feedback - -SN: - time: 0.001 # time the SN explodes (internal units) - energy: 1.0 # energy of the explosion (internal units) - x: 0.5 # x-position of explostion (internal units) - y: 0.5 # y-position of explostion (internal units) - z: 0.5 # z-position of explostion (internal units) diff --git a/examples/Feedback/makeIC.py b/examples/Feedback/makeIC.py deleted file mode 100644 index bd1081a9c275616038f5fa4e3eb943c36cb4c3eb..0000000000000000000000000000000000000000 --- a/examples/Feedback/makeIC.py +++ /dev/null @@ -1,109 +0,0 @@ -############################################################################### - # This file is part of SWIFT. - # Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk), - # Matthieu Schaller (matthieu.schaller@durham.ac.uk) - # 2016 Tom Theuns (tom.theuns@durham.ac.uk) - # - # This program is free software: you can redistribute it and/or modify - # it under the terms of the GNU Lesser General Public License as published - # by the Free Software Foundation, either version 3 of the License, or - # (at your option) any later version. - # - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. - # - # You should have received a copy of the GNU Lesser General Public License - # along with this program. If not, see <http://www.gnu.org/licenses/>. - # - ############################################################################## - -import h5py -import sys -from numpy import * - -# Generates a swift IC file containing a cartesian distribution of particles -# at a constant density and pressure in a cubic box - -# Parameters -periodic= 1 # 1 For periodic box -boxSize = 1. -L = int(sys.argv[1]) # Number of particles along one axis -rho = 1. # Density -P = 1.e-6 # Pressure -gamma = 5./3. # Gas adiabatic index -eta = 1.2349 # 48 ngbs with cubic spline kernel -fileName = "Feedback.hdf5" - -#--------------------------------------------------- -numPart = L**3 -mass = boxSize**3 * rho / numPart -internalEnergy = P / ((gamma - 1.)*rho) - -#-------------------------------------------------- - -#File -file = h5py.File(fileName, 'w') - -# Header -grp = file.create_group("/Header") -grp.attrs["BoxSize"] = boxSize -grp.attrs["NumPart_Total"] = [numPart, 0, 0, 0, 0, 0] -grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] -grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0] -grp.attrs["Time"] = 0.0 -grp.attrs["NumFilesPerSnapshot"] = 1 -grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] -grp.attrs["Flag_Entropy_ICs"] = 0 - -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - -#Units -grp = file.create_group("/Units") -grp.attrs["Unit length in cgs (U_L)"] = 1. -grp.attrs["Unit mass in cgs (U_M)"] = 1. -grp.attrs["Unit time in cgs (U_t)"] = 1. -grp.attrs["Unit current in cgs (U_I)"] = 1. -grp.attrs["Unit temperature in cgs (U_T)"] = 1. - -#Particle group -grp = file.create_group("/PartType0") - -v = zeros((numPart, 3)) -ds = grp.create_dataset('Velocities', (numPart, 3), 'f') -ds[()] = v -v = zeros(1) - -m = full((numPart, 1), mass) -ds = grp.create_dataset('Masses', (numPart,1), 'f') -ds[()] = m -m = zeros(1) - -h = full((numPart, 1), eta * boxSize / L) -ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f') -ds[()] = h -h = zeros(1) - -u = full((numPart, 1), internalEnergy) -ds = grp.create_dataset('InternalEnergy', (numPart,1), 'f') -ds[()] = u -u = zeros(1) - - -ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1)) -ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L') -ds[()] = ids + 1 -x = ids % L; -y = ((ids - x) / L) % L; -z = (ids - x - L * y) / L**2; -coords = zeros((numPart, 3)) -coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L) -coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L) -coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L) -ds = grp.create_dataset('Coordinates', (numPart, 3), 'd') -ds[()] = coords - -file.close() diff --git a/examples/HydrostaticHalo/hydrostatic.yml b/examples/HydrostaticHalo/hydrostatic.yml index 39a91a4ec475a70ef4e61b9cdc59b8221a74093e..d20d6018f323de0628a0500d8ba767018711fd0a 100644 --- a/examples/HydrostaticHalo/hydrostatic.yml +++ b/examples/HydrostaticHalo/hydrostatic.yml @@ -34,7 +34,7 @@ InitialConditions: file_name: Hydrostatic.hdf5 # The file to read # External potential parameters -SoftenedIsothermalPotential: +IsothermalPotential: position_x: 0. # location of centre of isothermal potential in internal units position_y: 0. position_z: 0. diff --git a/examples/IsothermalPotential/energy_plot.py b/examples/IsothermalPotential/energy_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..0afa6fa93fa2a992e6ddeab3c9d33538c0b41de3 --- /dev/null +++ b/examples/IsothermalPotential/energy_plot.py @@ -0,0 +1,120 @@ +import matplotlib +matplotlib.use("Agg") +from pylab import * +import h5py + +# Plot parameters +params = {'axes.labelsize': 10, +'axes.titlesize': 10, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 10, +'ytick.labelsize': 10, +'text.usetex': True, + 'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.145, +'figure.subplot.right' : 0.99, +'figure.subplot.bottom' : 0.11, +'figure.subplot.top' : 0.99, +'figure.subplot.wspace' : 0.15, +'figure.subplot.hspace' : 0.12, +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + + +import numpy as np +import h5py as h5 +import sys + +# File containing the total energy +stats_filename = "./energy.txt" + +# First snapshot +snap_filename = "Isothermal_000.hdf5" +f = h5.File(snap_filename,'r') + +# Read the units parameters from the snapshot +units = f["InternalCodeUnits"] +unit_mass = units.attrs["Unit mass in cgs (U_M)"] +unit_length = units.attrs["Unit length in cgs (U_L)"] +unit_time = units.attrs["Unit time in cgs (U_t)"] + +# Read the header +header = f["Header"] +box_size = float(header.attrs["BoxSize"][0]) + +# Read the properties of the potential +parameters = f["Parameters"] +R200 = 100 +Vrot = float(parameters.attrs["IsothermalPotential:vrot"]) +centre = [box_size/2, box_size/2, box_size/2] +f.close() + +# Read the statistics summary +file_energy = np.loadtxt("energy.txt") +time_stats = file_energy[:,0] +E_kin_stats = file_energy[:,3] +E_pot_stats = file_energy[:,5] +E_tot_stats = E_kin_stats + E_pot_stats + +# Read the snapshots +time_snap = np.zeros(402) +E_kin_snap = np.zeros(402) +E_pot_snap = np.zeros(402) +E_tot_snap = np.zeros(402) +Lz_snap = np.zeros(402) + +# Read all the particles from the snapshots +for i in range(402): + snap_filename = "Isothermal_%0.3d.hdf5"%i + f = h5.File(snap_filename,'r') + + pos_x = f["PartType1/Coordinates"][:,0] + pos_y = f["PartType1/Coordinates"][:,1] + pos_z = f["PartType1/Coordinates"][:,2] + vel_x = f["PartType1/Velocities"][:,0] + vel_y = f["PartType1/Velocities"][:,1] + vel_z = f["PartType1/Velocities"][:,2] + mass = f["/PartType1/Masses"][:] + + r = np.sqrt((pos_x[:] - centre[0])**2 + (pos_y[:] - centre[1])**2 + (pos_z[:] - centre[2])**2) + Lz = (pos_x[:] - centre[0]) * vel_y[:] - (pos_y[:] - centre[1]) * vel_x[:] + + time_snap[i] = f["Header"].attrs["Time"] + E_kin_snap[i] = np.sum(0.5 * mass * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2)) + E_pot_snap[i] = np.sum(-mass * Vrot**2 * log(r)) + E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i] + Lz_snap[i] = np.sum(Lz) + +# Plot energy evolution +figure() +plot(time_stats, E_kin_stats, "r-", lw=0.5, label="Kinetic energy") +plot(time_stats, E_pot_stats, "g-", lw=0.5, label="Potential energy") +plot(time_stats, E_tot_stats, "k-", lw=0.5, label="Total energy") + +plot(time_snap[::10], E_kin_snap[::10], "rD", lw=0.5, ms=2) +plot(time_snap[::10], E_pot_snap[::10], "gD", lw=0.5, ms=2) +plot(time_snap[::10], E_tot_snap[::10], "kD", lw=0.5, ms=2) + +legend(loc="center right", fontsize=8, frameon=False, handlelength=3, ncol=1) +xlabel("${\\rm{Time}}$", labelpad=0) +ylabel("${\\rm{Energy}}$",labelpad=0) +xlim(0, 8) + +savefig("energy.png", dpi=200) + +# Plot angular momentum evolution +figure() +plot(time_snap, Lz_snap, "k-", lw=0.5, ms=2) + +xlabel("${\\rm{Time}}$", labelpad=0) +ylabel("${\\rm{Angular~momentum}}$",labelpad=0) +xlim(0, 8) + +savefig("angular_momentum.png", dpi=200) + + diff --git a/examples/IsothermalPotential/isothermal.yml b/examples/IsothermalPotential/isothermal.yml index 0de99779f07591a5b71be11b75bc56ec741ddaed..8d9ec3875e405d95a89b3486bca5fd3465a3e20d 100644 --- a/examples/IsothermalPotential/isothermal.yml +++ b/examples/IsothermalPotential/isothermal.yml @@ -15,7 +15,7 @@ TimeIntegration: # Parameters governing the conserved quantities statistics Statistics: - delta_time: 1e-2 # Time between statistics output + delta_time: 1e-3 # Time between statistics output # Parameters governing the snapshots Snapshots: @@ -23,25 +23,18 @@ Snapshots: time_first: 0. # Time of the first output (in internal units) delta_time: 0.02 # Time difference between consecutive outputs (in internal units) -# Parameters for the hydrodynamics scheme -SPH: - resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 1. # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. - max_smoothing_length: 40. # Maximal smoothing length allowed (in internal units). - # Parameters related to the initial conditions InitialConditions: file_name: Isothermal.hdf5 # The file to read - shift_x: 100. # A shift to apply to all particles read from the ICs (in internal units). - shift_y: 100. - shift_z: 100. + shift_x: 200. # Shift all particles to be in the potential + shift_y: 200. + shift_z: 200. # External potential parameters IsothermalPotential: - position_x: 100. # location of centre of isothermal potential in internal units - position_y: 100. - position_z: 100. + position_x: 0. # location of centre of isothermal potential in internal units + position_y: 0. + position_z: 0. vrot: 200. # rotation speed of isothermal potential in internal units - timestep_mult: 0.03 # controls time step - + timestep_mult: 0.01 # controls time step + epsilon: 0. # No softening at the centre of the halo diff --git a/examples/IsothermalPotential/makeIC.py b/examples/IsothermalPotential/makeIC.py index 976119f0a312c5acc81fab943ba3cf5769102269..7d1c5361f9a255365517226e49c55a8a50c4d6ce 100644 --- a/examples/IsothermalPotential/makeIC.py +++ b/examples/IsothermalPotential/makeIC.py @@ -30,10 +30,10 @@ import random # all particles move in the xy plane, and start at y=0 # physical constants in cgs -NEWTON_GRAVITY_CGS = 6.672e-8 +NEWTON_GRAVITY_CGS = 6.67408e-8 SOLAR_MASS_IN_CGS = 1.9885e33 PARSEC_IN_CGS = 3.0856776e18 -PROTON_MASS_IN_CGS = 1.6726231e24 +PROTON_MASS_IN_CGS = 1.672621898e24 YEAR_IN_CGS = 3.154e+7 # choice of units @@ -66,17 +66,12 @@ N = int(sys.argv[1]) # Number of particles icirc = int(sys.argv[2]) # if = 0, all particles are on circular orbits, if = 1, Lz/Lcirc uniform in ]0,1[ L = N**(1./3.) -# these are not used but necessary for I/O -rho = 2. # Density -P = 1. # Pressure -gamma = 5./3. # Gas adiabatic index fileName = "Isothermal.hdf5" #--------------------------------------------------- numPart = N mass = 1 -internalEnergy = P / ((gamma - 1.)*rho) #-------------------------------------------------- @@ -111,7 +106,6 @@ grp.attrs["PeriodicBoundariesOn"] = periodic numpy.random.seed(1234) #Particle group -#grp0 = file.create_group("/PartType0") grp1 = file.create_group("/PartType1") #generate particle positions radius = Radius * (numpy.random.rand(N))**(1./3.) @@ -119,10 +113,8 @@ ctheta = -1. + 2 * numpy.random.rand(N) stheta = numpy.sqrt(1.-ctheta**2) phi = 2 * math.pi * numpy.random.rand(N) r = numpy.zeros((numPart, 3)) -#r[:,0] = radius * stheta * numpy.cos(phi) -#r[:,1] = radius * stheta * numpy.sin(phi) -#r[:,2] = radius * ctheta r[:,0] = radius + # speed = vrot v = numpy.zeros((numPart, 3)) @@ -146,17 +138,6 @@ ds = grp1.create_dataset('Masses', (numPart,), 'f') ds[()] = m m = numpy.zeros(1) -h = numpy.full((numPart, ), 1.1255 * boxSize / L, dtype='f') -ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f') -ds[()] = h -h = numpy.zeros(1) - -u = numpy.full((numPart, ), internalEnergy, dtype='f') -ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f') -ds[()] = u -u = numpy.zeros(1) - - ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False, dtype='L') ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L') ds[()] = ids diff --git a/examples/IsothermalPotential/run.sh b/examples/IsothermalPotential/run.sh index 28a3cc0910f986f84bcd603091543643356f1c4a..976fbddc01cf7a3dcbb114d437ddb8f03b4d54bd 100755 --- a/examples/IsothermalPotential/run.sh +++ b/examples/IsothermalPotential/run.sh @@ -7,4 +7,7 @@ then python makeIC.py 1000 1 fi +rm -rf Isothermal_*.hdf5 ../swift -g -t 1 isothermal.yml 2>&1 | tee output.log + +python energy_plot.py diff --git a/examples/IsothermalPotential/test.pro b/examples/IsothermalPotential/test.pro deleted file mode 100644 index edfa50121d2e5adb7e039f3c38d6d4c0b4d5e34f..0000000000000000000000000000000000000000 --- a/examples/IsothermalPotential/test.pro +++ /dev/null @@ -1,168 +0,0 @@ -; -; test energy / angular momentum conservation of test problem -; - -iplot = 1 ; if iplot = 1, make plot of E/Lz conservation, else, simply compare final and initial energy - -; set physical constants -@physunits - -indir = './' -basefile = 'Isothermal_' - -; set properties of potential -uL = 1e3 * phys.pc ; unit of length -uM = phys.msun ; unit of mass -uV = 1d5 ; unit of velocity -vrot = 200. ; km/s -r200 = 100. ; virial radius - -; derived units -constG = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ; -pcentre = [100.,100.,100.] * 1d3 * pc / uL - -; -infile = indir + basefile + '*' -spawn,'ls -1 '+infile,res -nfiles = n_elements(res) - - - -; choose: calculate change of energy and Lz, comparing first and last -; snapshots for all particles, or do so for a subset - -; compare all -ifile = 0 -inf = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5' -id = h5rd(inf,'PartType1/ParticleIDs') -nfollow = n_elements(id) - -; follow a subset -nfollow = 500 ; number of particles to follow - -; -if (iplot eq 1) then begin - nskip = 1 - nsave = nfiles -endif else begin - nskip = nfiles - 2 - nsave = 2 -endelse - -; -lout = fltarr(nfollow, nsave) ; Lz -xout = fltarr(nfollow, nsave) ; x -yout = fltarr(nfollow, nsave) ; y -zout = fltarr(nfollow, nsave) ; z -eout = fltarr(nfollow, nsave) ; energies -ekin = fltarr(nfollow, nsave) -epot = fltarr(nfollow, nsave) -tout = fltarr(nsave) - - - -ifile = 0 -isave = 0 -for ifile=0,nfiles-1,nskip do begin - inf = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5' - time = h5ra(inf, 'Header','Time') - p = h5rd(inf,'PartType1/Coordinates') - v = h5rd(inf,'PartType1/Velocities') - id = h5rd(inf,'PartType1/ParticleIDs') - indx = sort(id) -; - id = id[indx] - for ic=0,2 do begin - tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx] - tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx] - endfor - - -; calculate energy - dd = size(p,/dimen) & npart = dd[1] - ener = fltarr(npart) - dr = fltarr(npart) & dv = dr - for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2 - for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2 - xout[*,isave] = p[0,0:nfollow-1]-pcentre[0] - yout[*,isave] = p[1,0:nfollow-1]-pcentre[1] - zout[*,isave] = p[2,0:nfollow-1]-pcentre[2] - Lz = (p[0,*]-pcentre[0]) * v[1,*] - (p[1,*]-pcentre[1]) * v[0,*] - dr = sqrt(dr) -; print,'time = ',time,p[0,0],v[0,0],id[0] - ek = 0.5 * dv -; ep = - constG * mextern / dr - ep = -vrot*vrot * (1 + alog(r200/dr)) - ener = ek + ep - tout(isave) = time - lout[*,isave] = lz[0:nfollow-1] - eout(*,isave) = ener[0:nfollow-1] - ekin(*,isave) = ek[0:nfollow-1] - epot(*,isave) = ep[0:nfollow-1] - -; write some output -; print,' time= ',time,' e= ',eout[0],' Lz= ',lz[0],format='(%a %f %a -; %f)' - print,format='('' time= '',f7.1,'' E= '',f9.2,'' Lz= '',e9.2)', time,eout[0],lz[0] - isave = isave + 1 - -endfor -x0 = reform(xout[0,*]) -y0 = reform(xout[1,*]) -z0 = reform(xout[2,*]) - -; calculate relative energy change -de = 0.0 * eout -dl = 0.0 * lout -nsave = isave -for ifile=1, nsave-1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0] -for ifile=1, nsave-1 do dl[*,ifile] = (lout[*,ifile] - lout[*,0])/lout[*,0] - - -; calculate statistics of energy changes -print,' relatve energy change: (per cent) ',minmax(de) * 100. -print,' relative Lz change: (per cent) ',minmax(dl) * 100. - -; plot enery and Lz conservation for some particles -if(iplot eq 1) then begin -; plot results on energy conservation for some particles - nplot = min(10, nfollow) - win,0 - xr = [min(tout), max(tout)] - yr = [-2,2]*1d-2 ; in percent - plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='time',ytitle='dE/E, dL/L (%)' - for i=0,nplot-1 do oplot,tout,de[i,*] - for i=0,nplot-1 do oplot,tout,dl[i,*],color=red - legend,['dE/E','dL/L'],linestyle=[0,0],color=[black,red],box=0,/bottom,/left - screen_to_png,'e-time.png' - -; plot orbits of those particles - win,2 - xr = [-100,100] - yr = xr - plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/iso,/nodata,xtitle='x',ytitle='y' - color = floor(findgen(nplot)*255/float(nplot)) - for i=0,nplot-1 do oplot,xout[i,*],yout[i,*],color=color(i) - screen_to_png,'orbit.png' - -; plot radial position of these particles - win,4 - xr = [min(tout), max(tout)] - yr = [0,80] - plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='t',ytitle='r' - color = floor(findgen(nplot)*255/float(nplot)) -for i=0,nplot-1 do begin dr = sqrt(reform(xout[i,*])^2 + reform(yout[i,*])^2) & oplot,tout,dr,color=color[i] & endfor - screen_to_png,'r-time.png' - -; make histogram of energy changes at end - win,6 - ohist,de,x,y,-0.05,0.05,0.001 - plot,x,y,psym=10,xtitle='de (%)' - screen_to_png,'de-hist.png' - - -endif - -end - - diff --git a/examples/Makefile.am b/examples/Makefile.am index 4da84788a485dacd2103fe85ad3e729ade6b582a..dd13fb7eb4b82fbbfbb1ae450e20d01b13f2a455 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -24,7 +24,7 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) AM_LDFLAGS = $(HDF5_LDFLAGS) # Extra libraries. -EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) +EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) # MPI libraries. MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) @@ -63,11 +63,11 @@ EXTRA_DIST = BigCosmoVolume/makeIC.py \ EAGLE_12/eagle_12.yml EAGLE_12/getIC.sh EAGLE_12/README EAGLE_12/run.sh \ EAGLE_25/eagle_25.yml EAGLE_25/getIC.sh EAGLE_25/README EAGLE_25/run.sh \ EAGLE_50/eagle_50.yml EAGLE_50/getIC.sh EAGLE_50/README EAGLE_50/run.sh \ - ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/test.pro \ + ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/energy_plot.py \ GreshoVortex_2D/getGlass.sh GreshoVortex_2D/gresho.yml GreshoVortex_2D/makeIC.py GreshoVortex_2D/plotSolution.py GreshoVortex_2D/run.sh \ HydrostaticHalo/README HydrostaticHalo/hydrostatic.yml HydrostaticHalo/makeIC.py HydrostaticHalo/run.sh \ HydrostaticHalo/density_profile.py HydrostaticHalo/velocity_profile.py HydrostaticHalo/internal_energy_profile.py HydrostaticHalo/test_energy_conservation.py \ - IsothermalPotential/README IsothermalPotential/run.sh IsothermalPotential/test.pro IsothermalPotential/isothermal.yml IsothermalPotential/makeIC.py \ + IsothermalPotential/README IsothermalPotential/run.sh IsothermalPotential/energy_plot.py IsothermalPotential/isothermal.yml IsothermalPotential/makeIC.py \ KelvinHelmholtz_2D/kelvinHelmholtz.yml KelvinHelmholtz_2D/makeIC.py KelvinHelmholtz_2D/plotSolution.py KelvinHelmholtz_2D/run.sh \ MultiTypes/makeIC.py MultiTypes/multiTypes.yml MultiTypes/run.sh \ PerturbedBox_2D/makeIC.py PerturbedBox_2D/perturbedPlane.yml \ diff --git a/examples/MultiTypes/makeIC.py b/examples/MultiTypes/makeIC.py index 229450b67c02258553b588483d7cbd4fef887817..88330a6de25e23bf007615f9e9ca24e66065413c 100644 --- a/examples/MultiTypes/makeIC.py +++ b/examples/MultiTypes/makeIC.py @@ -36,6 +36,9 @@ eta = 1.2349 # 48 ngbs with cubic spline kernel rhoDM = 1. Ldm = int(sys.argv[2]) # Number of particles along one axis +massStars = 0.1 +Lstars = int(sys.argv[3]) # Number of particles along one axis + fileName = "multiTypes.hdf5" #--------------------------------------------------- @@ -46,6 +49,10 @@ internalEnergy = P / ((gamma - 1.)*rhoGas) numDM = Ldm**3 massDM = boxSize**3 * rhoDM / numDM +numStars = Lstars**3 +massStars = massDM * massStars + + #-------------------------------------------------- #File @@ -54,9 +61,9 @@ file = h5py.File(fileName, 'w') # Header grp = file.create_group("/Header") grp.attrs["BoxSize"] = boxSize -grp.attrs["NumPart_Total"] = [numGas, numDM, 0, 0, 0, 0] +grp.attrs["NumPart_Total"] = [numGas, numDM, 0, 0, numStars, 0] grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] -grp.attrs["NumPart_ThisFile"] = [numGas, numDM, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = [numGas, numDM, 0, 0, numStars, 0] grp.attrs["Time"] = 0.0 grp.attrs["NumFilesPerSnapshot"] = 1 grp.attrs["MassTable"] = [0.0, massDM, 0.0, 0.0, 0.0, 0.0] @@ -142,4 +149,33 @@ coords[:,2] = x[:,0] * boxSize / Ldm + boxSize / (2*Ldm) ds = grp.create_dataset('Coordinates', (numDM, 3), 'd') ds[()] = coords + + +# Star Particle group +grp = file.create_group("/PartType4") + +v = zeros((numStars, 3)) +ds = grp.create_dataset('Velocities', (numStars, 3), 'f') +ds[()] = v +v = zeros(1) + +m = full((numStars, 1), massStars) +ds = grp.create_dataset('Masses', (numStars,1), 'f') +ds[()] = m +m = zeros(1) + +ids = linspace(0, numStars, numStars, endpoint=False).reshape((numStars,1)) +ds = grp.create_dataset('ParticleIDs', (numStars, 1), 'L') +ds[()] = ids + Lgas**3 + 1 +x = ids % Ldm; +y = ((ids - x) / Ldm) % Ldm; +z = (ids - x - Ldm * y) / Ldm**2; +coords = zeros((numStars, 3)) +coords[:,0] = z[:,0] * boxSize / Ldm + boxSize / (2*Ldm) +coords[:,1] = y[:,0] * boxSize / Ldm + boxSize / (2*Ldm) +coords[:,2] = x[:,0] * boxSize / Ldm + boxSize / (2*Ldm) +ds = grp.create_dataset('Coordinates', (numStars, 3), 'd') +ds[()] = coords + + file.close() diff --git a/examples/MultiTypes/run.sh b/examples/MultiTypes/run.sh index 57465ce0ba6dde3988359df990f2a93323dbc617..508a5097f8961f446a51204e889875e33d4f634e 100755 --- a/examples/MultiTypes/run.sh +++ b/examples/MultiTypes/run.sh @@ -4,7 +4,7 @@ if [ ! -e multiTypes.hdf5 ] then echo "Generating initial conditions for the multitype box example..." - python makeIC.py 50 60 + python makeIC.py 17 24 12 fi -../swift -s -g -t 16 multiTypes.yml 2>&1 | tee output.log +../swift -s -g -S -t 1 multiTypes.yml 2>&1 | tee output.log diff --git a/examples/Stellar_Disk/makeIC.py b/examples/Stellar_Disk/makeIC.py new file mode 100644 index 0000000000000000000000000000000000000000..b5ec65d3fa76d8b377bdaacf3b43a36ab560115e --- /dev/null +++ b/examples/Stellar_Disk/makeIC.py @@ -0,0 +1,3 @@ +import numpy as np +import h5py as h5 + diff --git a/examples/Stellar_Disk/stellar_disk.yml b/examples/Stellar_Disk/stellar_disk.yml new file mode 100644 index 0000000000000000000000000000000000000000..1d17f96be7cf476f1959544fd221d7c8b7919915 --- /dev/null +++ b/examples/Stellar_Disk/stellar_disk.yml @@ -0,0 +1 @@ +IsothermalPotential \ No newline at end of file diff --git a/examples/main.c b/examples/main.c index 8b00c569e4e151fc00a695376528949462e529c6..9426597bca62174f68b09a2813aa3381962c4fe0 100644 --- a/examples/main.c +++ b/examples/main.c @@ -83,6 +83,7 @@ void print_help_message() { "Execute a fixed number of time steps. When unset use the time_end " "parameter to stop."); printf(" %2s %8s %s\n", "-s", "", "Run with SPH"); + printf(" %2s %8s %s\n", "-S", "", "Run with stars"); printf(" %2s %8s %s\n", "-t", "{int}", "The number of threads to use on each MPI rank. Defaults to 1 if not " "specified."); @@ -156,6 +157,7 @@ int main(int argc, char *argv[]) { int with_cooling = 0; int with_self_gravity = 0; int with_hydro = 0; + int with_stars = 0; int with_fp_exceptions = 0; int with_drift_all = 0; int verbose = 0; @@ -165,7 +167,7 @@ int main(int argc, char *argv[]) { /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "acCdDef:FgGhn:st:v:y:")) != -1) switch (c) { + while ((c = getopt(argc, argv, "acCdDef:FgGhn:sSt:v:y:")) != -1) switch (c) { case 'a': with_aff = 1; break; @@ -213,6 +215,9 @@ int main(int argc, char *argv[]) { case 's': with_hydro = 1; break; + case 'S': + with_stars = 1; + break; case 't': if (sscanf(optarg, "%d", &nr_threads) != 1) { if (myrank == 0) @@ -269,6 +274,9 @@ int main(int argc, char *argv[]) { /* Genesis 1.1: And then, there was time ! */ clocks_set_cpufreq(cpufreq); + /* How vocal are we ? */ + const int talking = (verbose == 1 && myrank == 0) || (verbose == 2); + if (myrank == 0 && dry_run) message( "Executing a dry run. No i/o or time integration will be performed."); @@ -281,7 +289,7 @@ int main(int argc, char *argv[]) { /* Report host name(s). */ #ifdef WITH_MPI - if (myrank == 0 || verbose > 1) { + if (talking) { message("Rank %d running on: %s", myrank, hostname()); } #else @@ -290,27 +298,27 @@ int main(int argc, char *argv[]) { /* Do we have debugging checks ? */ #ifdef SWIFT_DEBUG_CHECKS - message("WARNING: Debugging checks activated. Code will be slower !"); + if (myrank == 0) + message("WARNING: Debugging checks activated. Code will be slower !"); #endif /* Do we choke on FP-exceptions ? */ if (with_fp_exceptions) { feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); - if (myrank == 0) message("Floating point exceptions will be reported."); + if (myrank == 0) + message("WARNING: Floating point exceptions will be reported."); } /* How large are the parts? */ if (myrank == 0) { message("sizeof(struct part) is %4zi bytes.", sizeof(struct part)); message("sizeof(struct xpart) is %4zi bytes.", sizeof(struct xpart)); + message("sizeof(struct spart) is %4zi bytes.", sizeof(struct spart)); message("sizeof(struct gpart) is %4zi bytes.", sizeof(struct gpart)); message("sizeof(struct task) is %4zi bytes.", sizeof(struct task)); message("sizeof(struct cell) is %4zi bytes.", sizeof(struct cell)); } - /* How vocal are we ? */ - const int talking = (verbose == 1 && myrank == 0) || (verbose == 2); - /* Read the parameter file */ struct swift_params *params = malloc(sizeof(struct swift_params)); if (params == NULL) error("Error allocating memory for the parameter file."); @@ -358,7 +366,7 @@ int main(int argc, char *argv[]) { /* Initialise the hydro properties */ struct hydro_props hydro_properties; - hydro_props_init(&hydro_properties, params); + if (with_hydro) hydro_props_init(&hydro_properties, params); /* Read particles and space information from (GADGET) ICs */ char ICfileName[200] = ""; @@ -366,26 +374,32 @@ int main(int argc, char *argv[]) { if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); fflush(stdout); + /* Get ready to read particles of all kinds */ struct part *parts = NULL; struct gpart *gparts = NULL; - size_t Ngas = 0, Ngpart = 0; + struct spart *sparts = NULL; + size_t Ngas = 0, Ngpart = 0, Nspart = 0; double dim[3] = {0., 0., 0.}; int periodic = 0; int flag_entropy_ICs = 0; if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) - read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &Ngas, &Ngpart, - &periodic, &flag_entropy_ICs, myrank, nr_nodes, - MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); + read_ic_parallel(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, + &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, with_hydro, + (with_external_gravity || with_self_gravity), with_stars, + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); #else - read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &Ngas, &Ngpart, - &periodic, &flag_entropy_ICs, myrank, nr_nodes, MPI_COMM_WORLD, - MPI_INFO_NULL, dry_run); + read_ic_serial(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, &Ngpart, + &Nspart, &periodic, &flag_entropy_ICs, with_hydro, + (with_external_gravity || with_self_gravity), with_stars, + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL, dry_run); #endif #else - read_ic_single(ICfileName, &us, dim, &parts, &gparts, &Ngas, &Ngpart, - &periodic, &flag_entropy_ICs, dry_run); + read_ic_single(ICfileName, &us, dim, &parts, &gparts, &sparts, &Ngas, &Ngpart, + &Nspart, &periodic, &flag_entropy_ICs, with_hydro, + (with_external_gravity || with_self_gravity), with_stars, + dry_run); #endif if (myrank == 0) { clocks_gettime(&toc); @@ -394,40 +408,40 @@ int main(int argc, char *argv[]) { fflush(stdout); } - /* Discard gparts if we don't have gravity - * (Better implementation of i/o will come)*/ - if (!with_external_gravity && !with_self_gravity) { - free(gparts); - gparts = NULL; - for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL; - Ngpart = 0; +#ifdef SWIFT_DEBUG_CHECKS + /* Check once and for all that we don't have unwanted links */ + if (!with_stars) { + for (size_t k = 0; k < Ngpart; ++k) + if (gparts[k].type == swift_type_star) error("Linking problem"); } if (!with_hydro) { - free(parts); - parts = NULL; for (size_t k = 0; k < Ngpart; ++k) - if (gparts[k].id_or_neg_offset < 0) error("Linking problem"); - Ngas = 0; + if (gparts[k].type == swift_type_gas) error("Linking problem"); } +#endif /* Get the total number of particles across all nodes. */ - long long N_total[2] = {0, 0}; + long long N_total[3] = {0, 0, 0}; #if defined(WITH_MPI) - long long N_long[2] = {Ngas, Ngpart}; - MPI_Reduce(&N_long, &N_total, 2, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); + long long N_long[3] = {Ngas, Ngpart, Nspart}; + MPI_Reduce(&N_long, &N_total, 3, MPI_LONG_LONG_INT, MPI_SUM, 0, + MPI_COMM_WORLD); #else N_total[0] = Ngas; N_total[1] = Ngpart; + N_total[2] = Nspart; #endif if (myrank == 0) - message("Read %lld gas particles and %lld gparts from the ICs.", N_total[0], - N_total[1]); + message( + "Read %lld gas particles, %lld star particles and %lld gparts from the " + "ICs.", + N_total[0], N_total[2], N_total[1]); /* Initialize the space with these data. */ if (myrank == 0) clocks_gettime(&tic); struct space s; - space_init(&s, params, dim, parts, gparts, Ngas, Ngpart, periodic, - with_self_gravity, talking, dry_run); + space_init(&s, params, dim, parts, gparts, sparts, Ngas, Ngpart, Nspart, + periodic, with_self_gravity, talking, dry_run); if (myrank == 0) { clocks_gettime(&toc); message("space_init took %.3f %s.", clocks_diff(&tic, &toc), @@ -487,6 +501,7 @@ int main(int argc, char *argv[]) { if (with_cosmology) engine_policies |= engine_policy_cosmology; if (with_cooling) engine_policies |= engine_policy_cooling; if (with_sourceterms) engine_policies |= engine_policy_sourceterms; + if (with_stars) engine_policies |= engine_policy_stars; /* Initialize the engine with the space and policies. */ if (myrank == 0) clocks_gettime(&tic); @@ -508,11 +523,16 @@ int main(int argc, char *argv[]) { /* Get some info to the user. */ if (myrank == 0) { + long long N_DM = N_total[1] - N_total[2] - N_total[0]; + message( + "Running on %lld gas particles, %lld star particles and %lld DM " + "particles (%lld gravity particles)", + N_total[0], N_total[2], N_total[1] > 0 ? N_DM : 0, N_total[1]); message( - "Running on %lld gas particles and %lld DM particles from t=%.3e until " - "t=%.3e with %d threads and %d queues (dt_min=%.3e, dt_max=%.3e)...", - N_total[0], N_total[1], e.timeBegin, e.timeEnd, e.nr_threads, - e.sched.nr_queues, e.dt_min, e.dt_max); + "from t=%.3e until t=%.3e with %d threads and %d queues (dt_min=%.3e, " + "dt_max=%.3e)...", + e.timeBegin, e.timeEnd, e.nr_threads, e.sched.nr_queues, e.dt_min, + e.dt_max); fflush(stdout); } @@ -543,8 +563,9 @@ int main(int argc, char *argv[]) { /* Legend */ if (myrank == 0) - printf("# %6s %14s %14s %10s %10s %16s [%s]\n", "Step", "Time", "Time-step", - "Updates", "g-Updates", "Wall-clock time", clocks_getunit()); + printf("# %6s %14s %14s %10s %10s %10s %16s [%s]\n", "Step", "Time", + "Time-step", "Updates", "g-Updates", "s-Updates", "Wall-clock time", + clocks_getunit()); /* Main simulation loop */ for (int j = 0; !engine_is_done(&e) && e.step != nsteps; j++) { diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 6300d4831a50187b84c0de3d438e7692ef5719a3..6a35968e65f2fca0202320aea22bf75bd5d5e1b8 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -82,15 +82,7 @@ IsothermalPotential: position_z: 100. vrot: 200. # Rotation speed of isothermal potential (internal units) timestep_mult: 0.03 # Dimensionless pre-factor for the time-step condition - -# External potential parameters -SoftenedIsothermalPotential: - position_x: 0. # Location of centre of isothermal potential with respect to centre of the box (internal units) - position_y: 0. - position_z: 0. - vrot: 200. # rotation speed of isothermal potential (internal units) epsilon: 0.1 # Softening size (internal units) - timestep_mult: 0.03 # controls time step # Disk-patch potential parameters DiscPatchPotential: diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index 6295c81a5f2fdb1e726cdf0a8fb43713004800f1..978448b3cd049c6ff31a92c7255851390ccc700c 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -55,40 +55,44 @@ PLOT_PARAMS = {"axes.labelsize": 10, pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost", - "extra_ghost", "kick", "send", "recv", - "grav_gather_m", "grav_fft", "grav_mm", "grav_up", - "grav_external", "cooling", "count"] - -TASKCOLOURS = {"none": "black", - "sort": "lightblue", - "self": "greenyellow", - "pair": "navy", - "sub_self": "greenyellow", - "sub_pair": "navy", - "init": "indigo", - "ghost": "cyan", - "extra_ghost": "cyan", - "kick": "green", - "send": "yellow", - "recv": "magenta", - "grav_gather_m": "mediumorchid", - "grav_fft": "mediumnightblue", - "grav_mm": "mediumturquoise", - "grav_up": "mediumvioletred", - "grav_external": "darkred", - "cooling": "darkblue", - "count": "powerblue"} - -SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"] - -SUBCOLOURS = {"none": "black", - "density": "red", - "gradient": "powerblue", - "force": "blue", - "grav": "indigo", - "tend": "grey", - "count": "black"} +TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", + "init", "ghost", "extra_ghost", "drift", "kick1", "kick2", + "timestep", "send", "recv", "grav_gather_m", "grav_fft", + "grav_mm", "grav_up", "cooling", "sourceterms", "count"] +SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", + "tend", "xv", "rho", "gpart", "count"] + +# Task/subtypes of interest. +FULLTYPES = ["self/force", "self/density", "sub_self/force", + "sub_self/density", "pair/force", "pair/density", "sub_pair/force", + "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", + "recv/tend", "send/tend"] + +# Get a number of colours for the various types. +colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon", + "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki", + "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen", + "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey", + "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue", + "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink", + "pink"] +maxcolours = len(colours) + +# Set colours of task/subtype. +TASKCOLOURS = {} +ncolours = 0 +for task in TASKTYPES: + TASKCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours + +SUBCOLOURS = {} +for task in SUBTYPES: + SUBCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours + +for task in FULLTYPES: + SUBCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours # Show docs if help is requested. if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ): @@ -149,39 +153,26 @@ num_lines = pl.size(data) / 10 for line in range(num_lines): thread = int(data[line,0]) tasks[thread].append({}) - tasks[thread][-1]["type"] = TASKTYPES[int(data[line,1])] - tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,2])] + tasktype = TASKTYPES[int(data[line,1])] + subtype = SUBTYPES[int(data[line,2])] + tasks[thread][-1]["type"] = tasktype + tasks[thread][-1]["subtype"] = subtype tic = int(data[line,4]) / CPU_CLOCK * 1000 toc = int(data[line,5]) / CPU_CLOCK * 1000 tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc tasks[thread][-1]["t"] = (toc + tic)/ 2 + if "self" in tasktype or "pair" in tasktype: + fulltype = tasktype + "/" + subtype + if fulltype in SUBCOLOURS: + tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype] + else: + tasks[thread][-1]["colour"] = SUBCOLOURS[subtype] + else: + tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype] -combtasks = {} -combtasks[-1] = [] -for i in range(nthread): - combtasks[i] = [] - for thread in range(nthread): tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"]) - lasttype = "" - types = [] - for task in tasks[thread]: - if task["type"] not in types: - types.append(task["type"]) - if lasttype == "" or not lasttype == task["type"]: - combtasks[thread].append({}) - combtasks[thread][-1]["type"] = task["type"] - combtasks[thread][-1]["subtype"] = task["subtype"] - combtasks[thread][-1]["tic"] = task["tic"] - combtasks[thread][-1]["toc"] = task["toc"] - if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub": - combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]] - else: - combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]] - lasttype = task["type"] - else: - combtasks[thread][-1]["toc"] = task["toc"] typesseen = [] fig = pl.figure() @@ -192,11 +183,11 @@ tictoc = np.zeros(2) for i in range(nthread): # Collect ranges and colours into arrays. - tictocs = np.zeros(len(combtasks[i])*2) - colours = np.empty(len(combtasks[i])*2, dtype='object') + tictocs = np.zeros(len(tasks[i])*2) + colours = np.empty(len(tasks[i])*2, dtype='object') coloursseen = [] j = 0 - for task in combtasks[i]: + for task in tasks[i]: tictocs[j] = task["tic"] tictocs[j+1] = task["toc"] colours[j] = task["colour"] diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py index 734918b8cbf388ef8f1a064e014cfd28775edde2..c95bfa1fd2d087cc907b57201c1a1397cbeb1460 100755 --- a/examples/plot_tasks_MPI.py +++ b/examples/plot_tasks_MPI.py @@ -63,40 +63,44 @@ PLOT_PARAMS = {"axes.labelsize": 10, pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", - "ghost", "extra_ghost", "kick", "send", "recv", - "grav_gather_m", "grav_fft", "grav_mm", "grav_up", - "grav_external", "cooling", "count"] - -TASKCOLOURS = {"none": "black", - "sort": "lightblue", - "self": "greenyellow", - "pair": "navy", - "sub_self": "greenyellow", - "sub_pair": "navy", - "init": "indigo", - "ghost": "cyan", - "extra_ghost": "cyan", - "kick": "green", - "send": "yellow", - "recv": "magenta", - "grav_gather_m": "mediumorchid", - "grav_fft": "mediumnightblue", - "grav_mm": "mediumturquoise", - "grav_up": "mediumvioletred", - "grav_external": "darkred", - "cooling": "darkblue", - "count": "powerblue"} - -SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"] - -SUBCOLOURS = {"none": "black", - "density": "red", - "gradient": "powerblue", - "force": "blue", - "grav": "indigo", - "tend": "grey", - "count": "black"} +TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", + "init", "ghost", "extra_ghost", "drift", "kick1", "kick2", + "timestep", "send", "recv", "grav_gather_m", "grav_fft", + "grav_mm", "grav_up", "cooling", "sourceterms", "count"] +SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", + "tend", "xv", "rho", "gpart", "count"] + +# Task/subtypes of interest. +FULLTYPES = ["self/force", "self/density", "sub_self/force", + "sub_self/density", "pair/force", "pair/density", "sub_pair/force", + "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", + "recv/tend", "send/tend"] + +# Get a number of colours for the various types. +colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon", + "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki", + "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen", + "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey", + "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue", + "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink", + "pink"] +maxcolours = len(colours) + +# Set colours of task/subtype. +TASKCOLOURS = {} +ncolours = 0 +for task in TASKTYPES: + TASKCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours + +SUBCOLOURS = {} +for task in SUBTYPES: + SUBCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours + +for task in FULLTYPES: + SUBCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours # Show docs if help is requested. if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ): @@ -185,39 +189,26 @@ for rank in range(nranks): for line in range(num_lines): thread = int(data[line,1]) tasks[thread].append({}) - tasks[thread][-1]["type"] = TASKTYPES[int(data[line,2])] - tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,3])] + tasktype = TASKTYPES[int(data[line,2])] + subtype = SUBTYPES[int(data[line,3])] + tasks[thread][-1]["type"] = tasktype + tasks[thread][-1]["subtype"] = subtype tic = int(data[line,5]) / CPU_CLOCK * 1000 toc = int(data[line,6]) / CPU_CLOCK * 1000 tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc tasks[thread][-1]["t"] = (toc + tic)/ 2 - - combtasks = {} - combtasks[-1] = [] - for i in range(nthread): - combtasks[i] = [] + if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype: + fulltype = tasktype + "/" + subtype + if fulltype in SUBCOLOURS: + tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype] + else: + tasks[thread][-1]["colour"] = SUBCOLOURS[subtype] + else: + tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype] for thread in range(nthread): tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"]) - lasttype = "" - types = [] - for task in tasks[thread]: - if task["type"] not in types: - types.append(task["type"]) - if lasttype == "" or not lasttype == task["type"]: - combtasks[thread].append({}) - combtasks[thread][-1]["type"] = task["type"] - combtasks[thread][-1]["subtype"] = task["subtype"] - combtasks[thread][-1]["tic"] = task["tic"] - combtasks[thread][-1]["toc"] = task["toc"] - if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub": - combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]] - else: - combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]] - lasttype = task["type"] - else: - combtasks[thread][-1]["toc"] = task["toc"] fig = pl.figure() ax = fig.add_subplot(1,1,1) @@ -227,11 +218,11 @@ for rank in range(nranks): for i in range(nthread): # Collect ranges and colours into arrays. - tictocs = np.zeros(len(combtasks[i])*2) - colours = np.empty(len(combtasks[i])*2, dtype='object') + tictocs = np.zeros(len(tasks[i])*2) + colours = np.empty(len(tasks[i])*2, dtype='object') coloursseen = [] j = 0 - for task in combtasks[i]: + for task in tasks[i]: tictocs[j] = task["tic"] tictocs[j+1] = task["toc"] colours[j] = task["colour"] diff --git a/src/Makefile.am b/src/Makefile.am index 826ec687d0b9c72768a798ba692755258320227f..515b8aed02e92334e92fb8414a5b4e90db5cbbe1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -25,7 +25,7 @@ AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0 GIT_CMD = @GIT_CMD@ # Additional dependencies for shared libraries. -EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) +EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) $(JEMALLOC_LIBS) # MPI libraries. MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) @@ -44,7 +44,8 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \ physical_constants.h physical_constants_cgs.h potential.h version.h \ hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \ - sourceterms_struct.h statistics.h memswap.h profiler.h + sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \ + dump.h logger.h active.h timeline.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ @@ -53,13 +54,13 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ physical_constants.c potential.c hydro_properties.c \ runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \ - statistics.c profiler.c + statistics.c runner_doiact_vec.c profiler.c dump.c logger.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ - kernel_long_gravity.h vector.h runner_doiact.h runner_doiact_grav.h runner_doiact_fft.h \ - units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ - dimension.h equation_of_state.h active.h \ + kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \ + runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ + dimension.h equation_of_state.h part_type.h \ gravity.h gravity_io.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \ @@ -77,13 +78,15 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h hydro/Gizmo/hydro_debug.h hydro/Gizmo/hydro_part.h \ riemann/riemann_hllc.h riemann/riemann_trrs.h \ riemann/riemann_exact.h riemann/riemann_vacuum.h \ + stars.h stars_io.h \ + stars/Default/star.h stars/Default/star_iact.h stars/Default/star_io.h \ + stars/Default/star_debug.h stars/Default/star_part.h \ potential/none/potential.h potential/point_mass/potential.h \ potential/isothermal/potential.h potential/disc_patch/potential.h \ - potential/softened_isothermal/potential.h \ cooling/none/cooling.h cooling/none/cooling_struct.h \ cooling/const_du/cooling.h cooling/const_du/cooling_struct.h \ cooling/const_lambda/cooling.h cooling/const_lambda/cooling_struct.h \ - memswap.h + memswap.h dump.h logger.h # Sources and flags for regular library diff --git a/src/active.h b/src/active.h index e33f8baf6e5bd5d799e122e4e04610a7cab443bf..0c22a745fed4fbdf72ef1377fad45b78c86f178f 100644 --- a/src/active.h +++ b/src/active.h @@ -26,33 +26,28 @@ #include "cell.h" #include "engine.h" #include "part.h" +#include "timeline.h" /** * @brief Check that a cell been drifted to the current time. * - * Only used for debugging. Calls error() if the cell has not - * been drifted. Does nothing if SWIFT_DEBUG_CHECKS is not defined. - * * @param c The #cell. * @param e The #engine containing information about the current time. + * @return 1 if the #cell has been drifted to the current time, 0 otherwise. */ -__attribute__((always_inline)) INLINE static void cell_is_drifted( +__attribute__((always_inline)) INLINE static int cell_is_drifted( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS if (c->ti_old > e->ti_current) error( - "Cell has been drifted too far forward in time! c->ti_old=%d " - "e->ti_current=%d", - c->ti_old, e->ti_current); - - if (c->ti_old != e->ti_current) { - error( - "Cell has not been drifted to the current time c->ti_old=%d, " - "e->ti_current=%d", - c->ti_old, e->ti_current); - } + "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) " + "and e->ti_current=%lld (t=%e)", + c->ti_old, c->ti_old * e->timeBase, e->ti_current, + e->ti_current * e->timeBase); #endif + + return (c->ti_old == e->ti_current); } /** @@ -60,14 +55,18 @@ __attribute__((always_inline)) INLINE static void cell_is_drifted( * * @param c The #cell. * @param e The #engine containing information about the current time. + * @return 1 if the #cell contains at least an active particle, 0 otherwise. */ __attribute__((always_inline)) INLINE static int cell_is_active( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS if (c->ti_end_min < e->ti_current) - error("cell in an impossible time-zone! c->ti_end_min=%d e->ti_current=%d", - c->ti_end_min, e->ti_current); + error( + "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " + "e->ti_current=%lld (t=%e)", + c->ti_end_min, c->ti_end_min * e->timeBase, e->ti_current, + e->ti_current * e->timeBase); #endif return (c->ti_end_min == e->ti_current); @@ -78,14 +77,17 @@ __attribute__((always_inline)) INLINE static int cell_is_active( * * @param c The #cell. * @param e The #engine containing information about the current time. + * @return 1 if all particles in a #cell are active, 0 otherwise. */ __attribute__((always_inline)) INLINE static int cell_is_all_active( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS if (c->ti_end_max < e->ti_current) - error("cell in an impossible time-zone! c->ti_end_max=%d e->ti_current=%d", - c->ti_end_max, e->ti_current); + error( + "cell in an impossible time-zone! c->ti_end_max=%lld " + "e->ti_current=%lld", + c->ti_end_max, e->ti_current); #endif return (c->ti_end_max == e->ti_current); @@ -96,17 +98,23 @@ __attribute__((always_inline)) INLINE static int cell_is_all_active( * * @param p The #part. * @param e The #engine containing information about the current time. + * @return 1 if the #part is active, 0 otherwise. */ __attribute__((always_inline)) INLINE static int part_is_active( const struct part *p, const struct engine *e) { + const integertime_t ti_current = e->ti_current; + const integertime_t ti_end = get_integer_time_end(ti_current, p->time_bin); + #ifdef SWIFT_DEBUG_CHECKS - if (p->ti_end < e->ti_current) - error("particle in an impossible time-zone! p->ti_end=%d e->ti_current=%d", - p->ti_end, e->ti_current); + if (ti_end < ti_current) + error( + "particle in an impossible time-zone! p->ti_end=%lld " + "e->ti_current=%lld", + ti_end, ti_current); #endif - return (p->ti_end == e->ti_current); + return (ti_end == ti_current); } /** @@ -114,18 +122,47 @@ __attribute__((always_inline)) INLINE static int part_is_active( * * @param gp The #gpart. * @param e The #engine containing information about the current time. + * @return 1 if the #gpart is active, 0 otherwise. */ __attribute__((always_inline)) INLINE static int gpart_is_active( const struct gpart *gp, const struct engine *e) { + const integertime_t ti_current = e->ti_current; + const integertime_t ti_end = get_integer_time_end(ti_current, gp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_end < ti_current) + error( + "g-particle in an impossible time-zone! gp->ti_end=%lld " + "e->ti_current=%lld", + ti_end, ti_current); +#endif + + return (ti_end == ti_current); +} + +/** + * @brief Is this s-particle active ? + * + * @param sp The #spart. + * @param e The #engine containing information about the current time. + * @return 1 if the #spart is active, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int spart_is_active( + const struct spart *sp, const struct engine *e) { + + const integertime_t ti_current = e->ti_current; + const integertime_t ti_end = get_integer_time_end(ti_current, sp->time_bin); + #ifdef SWIFT_DEBUG_CHECKS - if (gp->ti_end < e->ti_current) + if (ti_end < ti_current) error( - "g-particle in an impossible time-zone! gp->ti_end=%d e->ti_current=%d", - gp->ti_end, e->ti_current); + "s-particle in an impossible time-zone! gp->ti_end=%lld " + "e->ti_current=%lld", + ti_end, ti_current); #endif - return (gp->ti_end == e->ti_current); + return (ti_end == ti_current); } #endif /* SWIFT_ACTIVE_H */ diff --git a/src/align.h b/src/align.h index 84e2909c0866c18f0f8378df9d0efc8d0f6545b5..915af33e6e2ba59be1a0849c4de0e2f1bd5b0d96 100644 --- a/src/align.h +++ b/src/align.h @@ -19,9 +19,13 @@ #ifndef SWIFT_ALIGN_H #define SWIFT_ALIGN_H +/** + * @brief The default struct alignment in SWIFT. + */ +#define SWIFT_STRUCT_ALIGNMENT 32 /** * @brief Defines alignment of structures */ -#define SWIFT_STRUCT_ALIGN __attribute__((aligned(32))) +#define SWIFT_STRUCT_ALIGN __attribute__((aligned(SWIFT_STRUCT_ALIGNMENT))) #endif /* SWIFT_ALIGN_H */ diff --git a/src/cache.h b/src/cache.h new file mode 100644 index 0000000000000000000000000000000000000000..19d61b657b3aa1fe8675ee413fcde146071381e9 --- /dev/null +++ b/src/cache.h @@ -0,0 +1,183 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (jame.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_CACHE_H +#define SWIFT_CACHE_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers */ +#include "cell.h" +#include "error.h" +#include "part.h" +#include "vector.h" + +#define NUM_VEC_PROC 2 +#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE) +#define C2_CACHE_ALIGN sizeof(float) * VEC_SIZE + +/* Cache struct to hold a local copy of a cells' particle + * properties required for density/force calculations.*/ +struct cache { + + /* Particle x position. */ + float *restrict x __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + /* Particle y position. */ + float *restrict y __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + /* Particle z position. */ + float *restrict z __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + /* Particle smoothing length. */ + float *restrict h __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + /* Particle mass. */ + float *restrict m __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + /* Particle x velocity. */ + float *restrict vx __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + /* Particle y velocity. */ + float *restrict vy __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + /* Particle z velocity. */ + float *restrict vz __attribute__((aligned(sizeof(float) * VEC_SIZE))); + + /* Cache size. */ + int count; +}; + +/* Secondary cache struct to hold a list of interactions between two + * particles.*/ +struct c2_cache { + + /* Separation between two particles squared. */ + float r2q[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN))); + + /* x separation between two particles. */ + float dxq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN))); + + /* y separation between two particles. */ + float dyq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN))); + + /* z separation between two particles. */ + float dzq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN))); + + /* Mass of particle pj. */ + float mq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN))); + + /* x velocity of particle pj. */ + float vxq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN))); + + /* y velocity of particle pj. */ + float vyq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN))); + + /* z velocity of particle pj. */ + float vzq[C2_CACHE_SIZE] __attribute__((aligned(C2_CACHE_ALIGN))); +}; + +/** + * @brief Allocate memory and initialise cache. + * + * @param c The cache. + * @param count Number of particles to allocate space for. + */ +__attribute__((always_inline)) INLINE void cache_init(struct cache *c, + size_t count) { + + /* Align cache on correct byte boundary and pad cache size to include 2 vector + * lengths for remainder operations. */ + unsigned long alignment = sizeof(float) * VEC_SIZE; + unsigned int sizeBytes = (count + (2 * VEC_SIZE)) * sizeof(float); + int error = 0; + + /* Free memory if cache has already been allocated. */ + if (c->count > 0) { + free(c->x); + free(c->y); + free(c->z); + free(c->m); + free(c->vx); + free(c->vy); + free(c->vz); + free(c->h); + } + + error += posix_memalign((void **)&c->x, alignment, sizeBytes); + error += posix_memalign((void **)&c->y, alignment, sizeBytes); + error += posix_memalign((void **)&c->z, alignment, sizeBytes); + error += posix_memalign((void **)&c->m, alignment, sizeBytes); + error += posix_memalign((void **)&c->vx, alignment, sizeBytes); + error += posix_memalign((void **)&c->vy, alignment, sizeBytes); + error += posix_memalign((void **)&c->vz, alignment, sizeBytes); + error += posix_memalign((void **)&c->h, alignment, sizeBytes); + + if (error != 0) + error("Couldn't allocate cache, no. of particles: %d", (int)count); + c->count = count; +} + +/** + * @brief Populate cache by reading in the particles in unsorted order. + * + * @param ci The #cell. + * @param ci_cache The cache. + */ +__attribute__((always_inline)) INLINE void cache_read_particles( + const struct cell *const ci, struct cache *const ci_cache) { + +#if defined(GADGET2_SPH) + + /* Shift the particles positions to a local frame so single precision can be + * used instead of double precision. */ + for (int i = 0; i < ci->count; i++) { + ci_cache->x[i] = ci->parts[i].x[0] - ci->loc[0]; + ci_cache->y[i] = ci->parts[i].x[1] - ci->loc[1]; + ci_cache->z[i] = ci->parts[i].x[2] - ci->loc[2]; + ci_cache->h[i] = ci->parts[i].h; + + ci_cache->m[i] = ci->parts[i].mass; + ci_cache->vx[i] = ci->parts[i].v[0]; + ci_cache->vy[i] = ci->parts[i].v[1]; + ci_cache->vz[i] = ci->parts[i].v[2]; + } + +#endif +} + +/** + * @brief Clean the memory allocated by a #cache object. + * + * @param c The #cache to clean. + */ +static INLINE void cache_clean(struct cache *c) { + if (c->count > 0) { + free(c->x); + free(c->y); + free(c->z); + free(c->m); + free(c->vx); + free(c->vy); + free(c->vz); + free(c->h); + } +} + +#endif /* SWIFT_CACHE_H */ diff --git a/src/cell.c b/src/cell.c index e2767cdaa9e1189ec87b5ef51cc578c91f8cfe4c..c31f28236a21bdc452f1aa9585ead5c98dd65c4c 100644 --- a/src/cell.c +++ b/src/cell.c @@ -49,6 +49,7 @@ /* Local headers. */ #include "active.h" #include "atomic.h" +#include "drift.h" #include "error.h" #include "gravity.h" #include "hydro.h" @@ -98,8 +99,10 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { c->h_max = pc->h_max; c->ti_end_min = pc->ti_end_min; c->ti_end_max = pc->ti_end_max; + c->ti_old = pc->ti_old; c->count = pc->count; c->gcount = pc->gcount; + c->scount = pc->scount; c->tag = pc->tag; /* Number of new cells created. */ @@ -108,9 +111,11 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { /* Fill the progeny recursively, depth-first. */ for (int k = 0; k < 8; k++) if (pc->progeny[k] >= 0) { - struct cell *temp = space_getcell(s); + struct cell *temp; + space_getcells(s, 1, &temp); temp->count = 0; temp->gcount = 0; + temp->scount = 0; temp->loc[0] = c->loc[0]; temp->loc[1] = c->loc[1]; temp->loc[2] = c->loc[2]; @@ -191,6 +196,31 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) { return c->gcount; } +/** + * @brief Link the cells recursively to the given #spart array. + * + * @param c The #cell. + * @param sparts The #spart array. + * + * @return The number of particles linked. + */ +int cell_link_sparts(struct cell *c, struct spart *sparts) { + + c->sparts = sparts; + + /* Fill the progeny recursively, depth-first. */ + if (c->split) { + int offset = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) + offset += cell_link_sparts(c->progeny[k], &sparts[offset]); + } + } + + /* Return the total number of linked particles. */ + return c->scount; +} + /** * @brief Pack the data of the given cell and all it's sub-cells. * @@ -208,8 +238,10 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->h_max = c->h_max; pc->ti_end_min = c->ti_end_min; pc->ti_end_max = c->ti_end_max; + pc->ti_old = c->ti_old; pc->count = c->count; pc->gcount = c->gcount; + pc->scount = c->scount; c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag; /* Fill in the progeny, depth-first recursion. */ @@ -239,7 +271,7 @@ int cell_pack(struct cell *c, struct pcell *pc) { * * @return The number of packed cells. */ -int cell_pack_ti_ends(struct cell *c, int *ti_ends) { +int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends) { #ifdef WITH_MPI @@ -270,7 +302,7 @@ int cell_pack_ti_ends(struct cell *c, int *ti_ends) { * * @return The number of cells created. */ -int cell_unpack_ti_ends(struct cell *c, int *ti_ends) { +int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends) { #ifdef WITH_MPI @@ -421,6 +453,70 @@ int cell_glocktree(struct cell *c) { } } +/** + * @brief Lock a cell for access to its array of #spart and hold its parents. + * + * @param c The #cell. + * @return 0 on success, 1 on failure + */ +int cell_slocktree(struct cell *c) { + + TIMER_TIC + + /* First of all, try to lock this cell. */ + if (c->shold || lock_trylock(&c->slock) != 0) { + TIMER_TOC(timer_locktree); + return 1; + } + + /* Did somebody hold this cell in the meantime? */ + if (c->shold) { + + /* Unlock this cell. */ + if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell."); + + /* Admit defeat. */ + TIMER_TOC(timer_locktree); + return 1; + } + + /* Climb up the tree and lock/hold/unlock. */ + struct cell *finger; + for (finger = c->parent; finger != NULL; finger = finger->parent) { + + /* Lock this cell. */ + if (lock_trylock(&finger->slock) != 0) break; + + /* Increment the hold. */ + atomic_inc(&finger->shold); + + /* Unlock the cell. */ + if (lock_unlock(&finger->slock) != 0) error("Failed to unlock cell."); + } + + /* If we reached the top of the tree, we're done. */ + if (finger == NULL) { + TIMER_TOC(timer_locktree); + return 0; + } + + /* Otherwise, we hit a snag. */ + else { + + /* Undo the holds up to finger. */ + for (struct cell *finger2 = c->parent; finger2 != finger; + finger2 = finger2->parent) + atomic_dec(&finger2->shold); + + /* Unlock this cell. */ + if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell."); + + /* Admit defeat. */ + TIMER_TOC(timer_locktree); + return 1; + } +} + /** * @brief Unlock a cell's parents for access to #part array. * @@ -459,39 +555,80 @@ void cell_gunlocktree(struct cell *c) { TIMER_TOC(timer_locktree); } +/** + * @brief Unlock a cell's parents for access to #spart array. + * + * @param c The #cell. + */ +void cell_sunlocktree(struct cell *c) { + + TIMER_TIC + + /* First of all, try to unlock this cell. */ + if (lock_unlock(&c->slock) != 0) error("Failed to unlock cell."); + + /* Climb up the tree and unhold the parents. */ + for (struct cell *finger = c->parent; finger != NULL; finger = finger->parent) + atomic_dec(&finger->shold); + + TIMER_TOC(timer_locktree); +} + /** * @brief Sort the parts into eight bins along the given pivots. * * @param c The #cell array to be sorted. * @param parts_offset Offset of the cell parts array relative to the * space's parts array, i.e. c->parts - s->parts. + * @param sparts_offset Offset of the cell sparts array relative to the + * space's sparts array, i.e. c->sparts - s->sparts. * @param buff A buffer with at least max(c->count, c->gcount) entries, * used for sorting indices. + * @param sbuff A buffer with at least max(c->scount, c->gcount) entries, + * used for sorting indices for the sparts. + * @param gbuff A buffer with at least max(c->count, c->gcount) entries, + * used for sorting indices for the gparts. */ -void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { +void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, + struct cell_buff *buff, struct cell_buff *sbuff, + struct cell_buff *gbuff) { - const int count = c->count, gcount = c->gcount; + const int count = c->count, gcount = c->gcount, scount = c->scount; struct part *parts = c->parts; struct xpart *xparts = c->xparts; struct gpart *gparts = c->gparts; + struct spart *sparts = c->sparts; const double pivot[3] = {c->loc[0] + c->width[0] / 2, c->loc[1] + c->width[1] / 2, c->loc[2] + c->width[2] / 2}; int bucket_count[8] = {0, 0, 0, 0, 0, 0, 0, 0}; int bucket_offset[9]; - /* If the buff is NULL, allocate it, and remember to free it. */ - const int allocate_buffer = (buff == NULL); - if (allocate_buffer && - (buff = (int *)malloc(sizeof(int) * max(count, gcount))) == NULL) - error("Failed to allocate temporary indices."); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that the buffs are OK. */ + for (int k = 0; k < count; k++) { + if (buff[k].x[0] != parts[k].x[0] || buff[k].x[1] != parts[k].x[1] || + buff[k].x[2] != parts[k].x[2]) + error("Inconsistent buff contents."); + } + for (int k = 0; k < gcount; k++) { + if (gbuff[k].x[0] != gparts[k].x[0] || gbuff[k].x[1] != gparts[k].x[1] || + gbuff[k].x[2] != gparts[k].x[2]) + error("Inconsistent gbuff contents."); + } + for (int k = 0; k < scount; k++) { + if (sbuff[k].x[0] != sparts[k].x[0] || sbuff[k].x[1] != sparts[k].x[1] || + sbuff[k].x[2] != sparts[k].x[2]) + error("Inconsistent sbuff contents."); + } +#endif /* SWIFT_DEBUG_CHECKS */ /* Fill the buffer with the indices. */ for (int k = 0; k < count; k++) { - const int bid = (parts[k].x[0] > pivot[0]) * 4 + - (parts[k].x[1] > pivot[1]) * 2 + (parts[k].x[2] > pivot[2]); + const int bid = (buff[k].x[0] > pivot[0]) * 4 + + (buff[k].x[1] > pivot[1]) * 2 + (buff[k].x[2] > pivot[2]); bucket_count[bid]++; - buff[k] = bid; + buff[k].ind = bid; } /* Set the buffer offsets. */ @@ -505,23 +642,25 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { for (int bucket = 0; bucket < 8; bucket++) { for (int k = bucket_offset[bucket] + bucket_count[bucket]; k < bucket_offset[bucket + 1]; k++) { - int bid = buff[k]; + int bid = buff[k].ind; if (bid != bucket) { struct part part = parts[k]; struct xpart xpart = xparts[k]; + struct cell_buff temp_buff = buff[k]; while (bid != bucket) { int j = bucket_offset[bid] + bucket_count[bid]++; - while (buff[j] == bid) { + while (buff[j].ind == bid) { j++; bucket_count[bid]++; } memswap(&parts[j], &part, sizeof(struct part)); memswap(&xparts[j], &xpart, sizeof(struct xpart)); - memswap(&buff[j], &bid, sizeof(int)); + memswap(&buff[j], &temp_buff, sizeof(struct cell_buff)); + bid = temp_buff.ind; } parts[k] = part; xparts[k] = xpart; - buff[k] = bid; + buff[k] = temp_buff; } bucket_count[bid]++; } @@ -535,9 +674,18 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { } /* Re-link the gparts. */ - if (count > 0 && gcount > 0) part_relink_gparts(parts, count, parts_offset); + if (count > 0 && gcount > 0) + part_relink_gparts_to_parts(parts, count, parts_offset); #ifdef SWIFT_DEBUG_CHECKS + /* Check that the buffs are OK. */ + for (int k = 1; k < count; k++) { + if (buff[k].ind < buff[k - 1].ind) error("Buff not sorted."); + if (buff[k].x[0] != parts[k].x[0] || buff[k].x[1] != parts[k].x[1] || + buff[k].x[2] != parts[k].x[2]) + error("Inconsistent buff contents (k=%i).", k); + } + /* Verify that _all_ the parts have been assigned to a cell. */ for (int k = 1; k < 8; k++) if (&c->progeny[k - 1]->parts[c->progeny[k - 1]->count] != @@ -564,18 +712,95 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { c->progeny[2]->parts[k].x[1] <= pivot[1] || c->progeny[2]->parts[k].x[2] > pivot[2]) error("Sorting failed (progeny=2)."); + for (int k = 0; k < c->progeny[3]->count; k++) + if (c->progeny[3]->parts[k].x[0] > pivot[0] || + c->progeny[3]->parts[k].x[1] <= pivot[1] || + c->progeny[3]->parts[k].x[2] <= pivot[2]) + error("Sorting failed (progeny=3)."); + for (int k = 0; k < c->progeny[4]->count; k++) + if (c->progeny[4]->parts[k].x[0] <= pivot[0] || + c->progeny[4]->parts[k].x[1] > pivot[1] || + c->progeny[4]->parts[k].x[2] > pivot[2]) + error("Sorting failed (progeny=4)."); + for (int k = 0; k < c->progeny[5]->count; k++) + if (c->progeny[5]->parts[k].x[0] <= pivot[0] || + c->progeny[5]->parts[k].x[1] > pivot[1] || + c->progeny[5]->parts[k].x[2] <= pivot[2]) + error("Sorting failed (progeny=5)."); + for (int k = 0; k < c->progeny[6]->count; k++) + if (c->progeny[6]->parts[k].x[0] <= pivot[0] || + c->progeny[6]->parts[k].x[1] <= pivot[1] || + c->progeny[6]->parts[k].x[2] > pivot[2]) + error("Sorting failed (progeny=6)."); + for (int k = 0; k < c->progeny[7]->count; k++) + if (c->progeny[7]->parts[k].x[0] <= pivot[0] || + c->progeny[7]->parts[k].x[1] <= pivot[1] || + c->progeny[7]->parts[k].x[2] <= pivot[2]) + error("Sorting failed (progeny=7)."); #endif - /* Now do the same song and dance for the gparts. */ + /* Now do the same song and dance for the sparts. */ + for (int k = 0; k < 8; k++) bucket_count[k] = 0; + + /* Fill the buffer with the indices. */ + for (int k = 0; k < scount; k++) { + const int bid = (sbuff[k].x[0] > pivot[0]) * 4 + + (sbuff[k].x[1] > pivot[1]) * 2 + (sbuff[k].x[2] > pivot[2]); + bucket_count[bid]++; + sbuff[k].ind = bid; + } + + /* Set the buffer offsets. */ + bucket_offset[0] = 0; + for (int k = 1; k <= 8; k++) { + bucket_offset[k] = bucket_offset[k - 1] + bucket_count[k - 1]; + bucket_count[k - 1] = 0; + } + + /* Run through the buckets, and swap particles to their correct spot. */ + for (int bucket = 0; bucket < 8; bucket++) { + for (int k = bucket_offset[bucket] + bucket_count[bucket]; + k < bucket_offset[bucket + 1]; k++) { + int bid = sbuff[k].ind; + if (bid != bucket) { + struct spart spart = sparts[k]; + struct cell_buff temp_buff = sbuff[k]; + while (bid != bucket) { + int j = bucket_offset[bid] + bucket_count[bid]++; + while (sbuff[j].ind == bid) { + j++; + bucket_count[bid]++; + } + memswap(&sparts[j], &spart, sizeof(struct spart)); + memswap(&sbuff[j], &temp_buff, sizeof(struct cell_buff)); + bid = temp_buff.ind; + } + sparts[k] = spart; + sbuff[k] = temp_buff; + } + bucket_count[bid]++; + } + } + + /* Store the counts and offsets. */ + for (int k = 0; k < 8; k++) { + c->progeny[k]->scount = bucket_count[k]; + c->progeny[k]->sparts = &c->sparts[bucket_offset[k]]; + } + + /* Re-link the gparts. */ + if (scount > 0 && gcount > 0) + part_relink_gparts_to_sparts(sparts, scount, sparts_offset); + + /* Finally, do the same song and dance for the gparts. */ for (int k = 0; k < 8; k++) bucket_count[k] = 0; /* Fill the buffer with the indices. */ for (int k = 0; k < gcount; k++) { - const int bid = (gparts[k].x[0] > pivot[0]) * 4 + - (gparts[k].x[1] > pivot[1]) * 2 + - (gparts[k].x[2] > pivot[2]); + const int bid = (gbuff[k].x[0] > pivot[0]) * 4 + + (gbuff[k].x[1] > pivot[1]) * 2 + (gbuff[k].x[2] > pivot[2]); bucket_count[bid]++; - buff[k] = bid; + gbuff[k].ind = bid; } /* Set the buffer offsets. */ @@ -589,20 +814,22 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { for (int bucket = 0; bucket < 8; bucket++) { for (int k = bucket_offset[bucket] + bucket_count[bucket]; k < bucket_offset[bucket + 1]; k++) { - int bid = buff[k]; + int bid = gbuff[k].ind; if (bid != bucket) { struct gpart gpart = gparts[k]; + struct cell_buff temp_buff = gbuff[k]; while (bid != bucket) { int j = bucket_offset[bid] + bucket_count[bid]++; - while (buff[j] == bid) { + while (gbuff[j].ind == bid) { j++; bucket_count[bid]++; } memswap(&gparts[j], &gpart, sizeof(struct gpart)); - memswap(&buff[j], &bid, sizeof(int)); + memswap(&gbuff[j], &temp_buff, sizeof(struct cell_buff)); + bid = temp_buff.ind; } gparts[k] = gpart; - buff[k] = bid; + gbuff[k] = temp_buff; } bucket_count[bid]++; } @@ -616,7 +843,11 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { /* Re-link the parts. */ if (count > 0 && gcount > 0) - part_relink_parts(gparts, gcount, parts - parts_offset); + part_relink_parts_to_gparts(gparts, gcount, parts - parts_offset); + + /* Re-link the sparts. */ + if (scount > 0 && gcount > 0) + part_relink_sparts_to_gparts(gparts, gcount, sparts - sparts_offset); } /** @@ -682,9 +913,10 @@ void cell_sanitize(struct cell *c) { void cell_convert_hydro(struct cell *c, void *data) { struct part *p = c->parts; + struct xpart *xp = c->xparts; for (int i = 0; i < c->count; ++i) { - hydro_convert_quantities(&p[i]); + hydro_convert_quantities(&p[i], &xp[i]); } } @@ -711,10 +943,10 @@ void cell_clean_links(struct cell *c, void *data) { */ void cell_check_drift_point(struct cell *c, void *data) { - const int ti_current = *(int *)data; + integertime_t ti_current = *(integertime_t *)data; - if (c->ti_old != ti_current) - error("Cell in an incorrect time-zone! c->ti_old=%d ti_current=%d", + if (c->ti_old != ti_current && c->nodeID == engine_rank) + error("Cell in an incorrect time-zone! c->ti_old=%lld ti_current=%lld", c->ti_old, ti_current); } @@ -859,6 +1091,10 @@ int cell_is_drift_needed(struct cell *c, const struct engine *e) { */ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { +#ifdef WITH_MPI + struct engine *e = s->space->e; +#endif + /* Un-skip the density tasks involved with this cell. */ for (struct link *l = c->density; l != NULL; l = l->next) { struct task *t = l->t; @@ -892,8 +1128,10 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Activate the tasks to recv foreign cell ci's data. */ scheduler_activate(s, ci->recv_xv); - scheduler_activate(s, ci->recv_rho); - scheduler_activate(s, ci->recv_ti); + if (cell_is_active(ci, e)) { + scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->recv_ti); + } /* Look for the local cell cj's send tasks. */ struct link *l = NULL; @@ -903,24 +1141,34 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_rho task."); - scheduler_activate(s, l->t); - - for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_ti task."); - scheduler_activate(s, l->t); + if (cj->super->drift) + scheduler_activate(s, cj->super->drift); + else + error("Drift task missing !"); + + if (cell_is_active(cj, e)) { + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } } else if (cj->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell cj's data. */ scheduler_activate(s, cj->recv_xv); - scheduler_activate(s, cj->recv_rho); - scheduler_activate(s, cj->recv_ti); + if (cell_is_active(cj, e)) { + scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->recv_ti); + } + /* Look for the local cell ci's send tasks. */ struct link *l = NULL; for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -929,17 +1177,24 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_rho task."); - scheduler_activate(s, l->t); - - for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_ti task."); - scheduler_activate(s, l->t); + if (ci->super->drift) + scheduler_activate(s, ci->super->drift); + else + error("Drift task missing !"); + + if (cell_is_active(ci, e)) { + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } } #endif } @@ -955,7 +1210,10 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); if (c->ghost != NULL) scheduler_activate(s, c->ghost); if (c->init != NULL) scheduler_activate(s, c->init); - if (c->kick != NULL) scheduler_activate(s, c->kick); + if (c->drift != NULL) scheduler_activate(s, c->drift); + if (c->kick1 != NULL) scheduler_activate(s, c->kick1); + if (c->kick2 != NULL) scheduler_activate(s, c->kick2); + if (c->timestep != NULL) scheduler_activate(s, c->timestep); if (c->cooling != NULL) scheduler_activate(s, c->cooling); if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); @@ -981,3 +1239,130 @@ void cell_set_super(struct cell *c, struct cell *super) { for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) cell_set_super(c->progeny[k], super); } + +/** + * @brief Recursively drifts all particles and g-particles in a cell hierarchy. + * + * @param c The #cell. + * @param e The #engine (to get ti_current). + */ +void cell_drift(struct cell *c, const struct engine *e) { + + const double timeBase = e->timeBase; + const integertime_t ti_old = c->ti_old; + const integertime_t ti_current = e->ti_current; + struct part *const parts = c->parts; + struct xpart *const xparts = c->xparts; + struct gpart *const gparts = c->gparts; + struct spart *const sparts = c->sparts; + + /* Drift from the last time the cell was drifted to the current time */ + const double dt = (ti_current - ti_old) * timeBase; + float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; + + /* Check that we are actually going to move forward. */ + if (ti_current < ti_old) error("Attempt to drift to the past"); + + /* Are we not in a leaf ? */ + if (c->split) { + + /* Loop over the progeny and collect their data. */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + cell_drift(cp, e); + dx_max = max(dx_max, cp->dx_max); + h_max = max(h_max, cp->h_max); + } + + } else if (ti_current > ti_old) { + + /* Loop over all the g-particles in the cell */ + const size_t nr_gparts = c->gcount; + for (size_t k = 0; k < nr_gparts; k++) { + + /* Get a handle on the gpart. */ + struct gpart *const gp = &gparts[k]; + + /* Drift... */ + drift_gpart(gp, dt, timeBase, ti_old, ti_current); + + /* Compute (square of) motion since last cell construction */ + const float dx2 = gp->x_diff[0] * gp->x_diff[0] + + gp->x_diff[1] * gp->x_diff[1] + + gp->x_diff[2] * gp->x_diff[2]; + dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + } + + /* Loop over all the gas particles in the cell */ + const size_t nr_parts = c->count; + for (size_t k = 0; k < nr_parts; k++) { + + /* Get a handle on the part. */ + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; + + /* Drift... */ + drift_part(p, xp, dt, timeBase, ti_old, ti_current); + + /* Compute (square of) motion since last cell construction */ + const float dx2 = xp->x_diff[0] * xp->x_diff[0] + + xp->x_diff[1] * xp->x_diff[1] + + xp->x_diff[2] * xp->x_diff[2]; + dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + + /* Maximal smoothing length */ + h_max = (h_max > p->h) ? h_max : p->h; + } + + /* Loop over all the star particles in the cell */ + const size_t nr_sparts = c->scount; + for (size_t k = 0; k < nr_sparts; k++) { + + /* Get a handle on the spart. */ + struct spart *const sp = &sparts[k]; + + /* Drift... */ + drift_spart(sp, dt, timeBase, ti_old, ti_current); + + /* Note: no need to compute dx_max as all spart have a gpart */ + } + + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); + + } else { + + h_max = c->h_max; + dx_max = c->dx_max; + } + + /* Store the values */ + c->h_max = h_max; + c->dx_max = dx_max; + + /* Update the time of the last drift */ + c->ti_old = ti_current; +} + +/** + * @brief Recursively checks that all particles in a cell have a time-step + */ +void cell_check_timesteps(struct cell *c) { +#ifdef SWIFT_DEBUG_CHECKS + + if (c->ti_end_min == 0 && c->nr_tasks > 0) + error("Cell without assigned time-step"); + + if (c->split) { + for (int k = 0; k < 8; ++k) + if (c->progeny[k] != NULL) cell_check_timesteps(c->progeny[k]); + } else { + + if (c->nodeID == engine_rank) + for (int i = 0; i < c->count; ++i) + if (c->parts[i].time_bin == 0) + error("Particle without assigned time-bin"); + } +#endif +} diff --git a/src/cell.h b/src/cell.h index 2cd13cf2ab6b934f6aab84bcbacf510270892866..5e170580015e9113aa61575b26a0de09b12e3c1f 100644 --- a/src/cell.h +++ b/src/cell.h @@ -35,6 +35,7 @@ #include "multipole.h" #include "part.h" #include "task.h" +#include "timeline.h" /* Avoid cyclic inclusions */ struct engine; @@ -52,6 +53,12 @@ struct scheduler; /* Global variables. */ extern int cell_next_tag; +/* Struct to temporarily buffer the particle locations and bin id. */ +struct cell_buff { + double x[3]; + int ind; +} SWIFT_STRUCT_ALIGN; + /* Mini struct to link cells to tasks. Used as a linked list. */ struct link { @@ -67,10 +74,10 @@ struct pcell { /* Stats on this cell's particles. */ double h_max; - int ti_end_min, ti_end_max; + integertime_t ti_end_min, ti_end_max, ti_old; /* Number of particles in this cell. */ - int count, gcount; + int count, gcount, scount; /* tag used for MPI communication. */ int tag; @@ -111,6 +118,9 @@ struct cell { /*! Pointer to the #gpart data. */ struct gpart *gparts; + /*! Pointer to the #spart data. */ + struct spart *sparts; + /*! Pointer for the sorted indices. */ struct entry *sort; @@ -147,8 +157,17 @@ struct cell { /*! The extra ghost task for complex hydro schemes */ struct task *extra_ghost; - /*! The kick task */ - struct task *kick; + /*! The drift task */ + struct task *drift; + + /*! The first kick task */ + struct task *kick1; + + /*! The second kick task */ + struct task *kick2; + + /*! The task to compute time-steps */ + struct task *timestep; /*! Task constructing the multipole from the particles */ struct task *grav_up; @@ -203,13 +222,13 @@ struct cell { #endif /*! Minimum end of (integer) time step in this cell. */ - int ti_end_min; + integertime_t ti_end_min; /*! Maximum end of (integer) time step in this cell. */ - int ti_end_max; + integertime_t ti_end_max; /*! Last (integer) time the cell's content was drifted forward in time. */ - int ti_old; + integertime_t ti_old; /*! Minimum dimension, i.e. smallest edge of this cell (min(width)). */ float dmin; @@ -223,6 +242,9 @@ struct cell { /*! Nr of #gpart in this cell. */ int gcount; + /*! Nr of #spart in this cell. */ + int scount; + /*! The size of the sort array */ int sortsize; @@ -235,6 +257,9 @@ struct cell { /*! Spin lock for various uses (#gpart case). */ swift_lock_type glock; + /*! Spin lock for various uses (#spart case). */ + swift_lock_type slock; + /*! ID of the previous owner, e.g. runner. */ int owner; @@ -244,6 +269,9 @@ struct cell { /*! Number of #gpart updated in this cell. */ int g_updated; + /*! Number of #spart updated in this cell. */ + int s_updated; + /*! ID of the node this cell lives on. */ int nodeID; @@ -253,6 +281,9 @@ struct cell { /*! Is the #gpart data of this cell being used in a sub-cell? */ int ghold; + /*! Is the #spart data of this cell being used in a sub-cell? */ + int shold; + /*! Number of tasks that are associated with this cell. */ short int nr_tasks; @@ -272,19 +303,24 @@ struct cell { ((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i))) /* Function prototypes. */ -void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff); +void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, + struct cell_buff *buff, struct cell_buff *sbuff, + struct cell_buff *gbuff); void cell_sanitize(struct cell *c); int cell_locktree(struct cell *c); void cell_unlocktree(struct cell *c); int cell_glocktree(struct cell *c); void cell_gunlocktree(struct cell *c); +int cell_slocktree(struct cell *c); +void cell_sunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); -int cell_pack_ti_ends(struct cell *c, int *ti_ends); -int cell_unpack_ti_ends(struct cell *c, int *ti_ends); +int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends); +int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends); int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); +int cell_link_sparts(struct cell *c, struct spart *sparts); void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell *c, void *data); int cell_are_neighbours(const struct cell *restrict ci, @@ -295,5 +331,7 @@ void cell_check_drift_point(struct cell *c, void *data); int cell_is_drift_needed(struct cell *c, const struct engine *e); int cell_unskip_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); +void cell_drift(struct cell *c, const struct engine *e); +void cell_check_timesteps(struct cell *c); #endif /* SWIFT_CELL_H */ diff --git a/src/common_io.c b/src/common_io.c index 1f1ec401547c81e137b4e7d836ab58cb87280d8b..82c00cf5bed7118276e0595e3d9c590d29bdda74 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -390,6 +390,8 @@ void writeCodeDescription(hid_t h_file) { H5Gclose(h_grpcode); } +#endif /* HAVE_HDF5 */ + /* ------------------------------------------------------------------------------------------------ * This part writes the XMF file descriptor enabling a visualisation through * ParaView @@ -586,6 +588,9 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) { if (gparts[i].id_or_neg_offset <= 0) error("0 or negative ID for DM particle %zu: ID=%lld", i, gparts[i].id_or_neg_offset); + + /* Set gpart type */ + gparts[i].type = swift_type_dark_matter; } } @@ -597,7 +602,7 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) { * * @param parts The array of #part freshly read in. * @param gparts The array of #gpart freshly read in with all the DM particles - *at the start + * at the start * @param Ngas The number of gas particles read in. * @param Ndm The number of DM particles read in. */ @@ -618,12 +623,53 @@ void duplicate_hydro_gparts(struct part* const parts, gparts[i + Ndm].mass = hydro_get_mass(&parts[i]); + /* Set gpart type */ + gparts[i + Ndm].type = swift_type_gas; + /* Link the particles */ gparts[i + Ndm].id_or_neg_offset = -i; parts[i].gpart = &gparts[i + Ndm]; } } +/** + * @brief Copy every #spart into the corresponding #gpart and link them. + * + * This function assumes that the DM particles and gas particles are all at + * the start of the gparts array and adds the star particles afterwards + * + * @param sparts The array of #spart freshly read in. + * @param gparts The array of #gpart freshly read in with all the DM and gas + * particles at the start. + * @param Nstars The number of stars particles read in. + * @param Ndm The number of DM and gas particles read in. + */ +void duplicate_star_gparts(struct spart* const sparts, + struct gpart* const gparts, size_t Nstars, + size_t Ndm) { + + for (size_t i = 0; i < Nstars; ++i) { + + /* Duplicate the crucial information */ + gparts[i + Ndm].x[0] = sparts[i].x[0]; + gparts[i + Ndm].x[1] = sparts[i].x[1]; + gparts[i + Ndm].x[2] = sparts[i].x[2]; + + gparts[i + Ndm].v_full[0] = sparts[i].v[0]; + gparts[i + Ndm].v_full[1] = sparts[i].v[1]; + gparts[i + Ndm].v_full[2] = sparts[i].v[2]; + + gparts[i + Ndm].mass = sparts[i].mass; + + /* Set gpart type */ + gparts[i + Ndm].type = swift_type_star; + + /* Link the particles */ + gparts[i + Ndm].id_or_neg_offset = -i; + sparts[i].gpart = &gparts[i + Ndm]; + } +} + /** * @brief Copy every DM #gpart into the dmparts array. * @@ -644,7 +690,7 @@ void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, * gparts[i].part); */ /* And collect the DM ones */ - if (gparts[i].id_or_neg_offset > 0) { + if (gparts[i].type == swift_type_dark_matter) { dmparts[count] = gparts[i]; count++; } @@ -655,5 +701,3 @@ void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, error("Collected the wrong number of dm particles (%zu vs. %zu expected)", count, Ndm); } - -#endif diff --git a/src/common_io.h b/src/common_io.h index 7aedee0f2624dcff916a8398e244009a87109915..bf1840d497c46f58568d1bed7cb3409f60e047ee 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -75,6 +75,9 @@ void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm); void duplicate_hydro_gparts(struct part* const parts, struct gpart* const gparts, size_t Ngas, size_t Ndm); +void duplicate_star_gparts(struct spart* const sparts, + struct gpart* const gparts, size_t Nstars, + size_t Ndm); void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data); diff --git a/src/cooling/const_du/cooling.h b/src/cooling/const_du/cooling.h index 448af9c3765e3bb6d4cbf4cc94e245a3976d5314..30ae644bdecbe795794505f64ba1ed767419d82b 100644 --- a/src/cooling/const_du/cooling.h +++ b/src/cooling/const_du/cooling.h @@ -62,26 +62,30 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct cooling_function_data* restrict cooling, struct part* restrict p, struct xpart* restrict xp, float dt) { - /* Get current internal energy (dt=0) */ - const float u_old = hydro_get_internal_energy(p, 0.f); + /* Internal energy floor */ + const float u_floor = cooling->min_energy; + + /* Get current internal energy */ + const float u_old = hydro_get_internal_energy(p); + + /* Current du_dt */ + const float hydro_du_dt = hydro_get_internal_energy_dt(p); /* Get cooling function properties */ - const float du_dt = -cooling->cooling_rate; - const float u_floor = cooling->min_energy; + float cooling_du_dt = -cooling->cooling_rate; - /* Constant cooling with a minimal floor */ - float u_new; - if (u_old - du_dt * dt > u_floor) { - u_new = u_old + du_dt * dt; - } else { - u_new = u_floor; + /* Integrate cooling equation to enforce energy floor */ + if (u_old + hydro_du_dt * dt < u_floor) { + cooling_du_dt = 0.f; + } else if (u_old + (hydro_du_dt + cooling_du_dt) * dt < u_floor) { + cooling_du_dt = (u_old + dt * hydro_du_dt - u_floor) / dt; } - /* Update the internal energy */ - hydro_set_internal_energy(p, u_new); + /* Update the internal energy time derivative */ + hydro_set_internal_energy_dt(p, hydro_du_dt + cooling_du_dt); /* Store the radiated energy */ - xp->cooling_data.radiated_energy += hydro_get_mass(p) * (u_old - u_new); + xp->cooling_data.radiated_energy += -hydro_get_mass(p) * cooling_du_dt * dt; } /** @@ -102,7 +106,7 @@ __attribute__((always_inline)) INLINE static float cooling_timestep( const struct UnitSystem* restrict us, const struct part* restrict p) { const float cooling_rate = cooling->cooling_rate; - const float internal_energy = hydro_get_internal_energy(p, 0); + const float internal_energy = hydro_get_internal_energy(p); return cooling->cooling_tstep_mult * internal_energy / fabsf(cooling_rate); } diff --git a/src/cooling/const_lambda/cooling.h b/src/cooling/const_lambda/cooling.h index cb9db2dc34a6014ea15a24d368a006fee3838d67..9fadd51e3c2a3c5462c8476e0aac893e3a2d530d 100644 --- a/src/cooling/const_lambda/cooling.h +++ b/src/cooling/const_lambda/cooling.h @@ -76,31 +76,29 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct cooling_function_data* restrict cooling, struct part* restrict p, struct xpart* restrict xp, float dt) { - /* Get current internal energy (dt=0) */ - const float u_old = hydro_get_internal_energy(p, 0.f); - /* Internal energy floor */ const float u_floor = cooling->min_energy; - /* Calculate du_dt */ - const float du_dt = cooling_rate(phys_const, us, cooling, p); + /* Current energy */ + const float u_old = hydro_get_internal_energy(p); - /* Integrate cooling equation, but enforce energy floor */ - float u_new; - if (u_old + du_dt * dt > u_floor) { - u_new = u_old + du_dt * dt; - } else { - u_new = u_floor; - } + /* Current du_dt */ + const float hydro_du_dt = hydro_get_internal_energy_dt(p); + + /* Calculate cooling du_dt */ + float cooling_du_dt = cooling_rate(phys_const, us, cooling, p); - /* Don't allow particle to cool too much in one timestep */ - if (u_new < 0.5f * u_old) u_new = 0.5f * u_old; + /* Integrate cooling equation to enforce energy floor */ + /* Factor of 1.5 included since timestep could potentially double */ + if (u_old + (hydro_du_dt + cooling_du_dt) * 1.5f * dt < u_floor) { + cooling_du_dt = -(u_old + 1.5f * dt * hydro_du_dt - u_floor) / (1.5f * dt); + } - /* Update the internal energy */ - hydro_set_internal_energy(p, u_new); + /* Update the internal energy time derivative */ + hydro_set_internal_energy_dt(p, hydro_du_dt + cooling_du_dt); /* Store the radiated energy */ - xp->cooling_data.radiated_energy += hydro_get_mass(p) * (u_old - u_new); + xp->cooling_data.radiated_energy += -hydro_get_mass(p) * cooling_du_dt * dt; } /** @@ -116,12 +114,11 @@ __attribute__((always_inline)) INLINE static float cooling_timestep( const struct phys_const* restrict phys_const, const struct UnitSystem* restrict us, const struct part* restrict p) { - /* Get current internal energy (dt=0) */ - const float u = hydro_get_internal_energy(p, 0.f); + /* Get current internal energy */ + const float u = hydro_get_internal_energy(p); const float du_dt = cooling_rate(phys_const, us, cooling, p); - /* If we are close to (or below) the energy floor, we ignore cooling timestep - */ + /* If we are close to (or below) the energy floor, we ignore the condition */ if (u < 1.01f * cooling->min_energy) return FLT_MAX; else diff --git a/src/debug.c b/src/debug.c index 48572df7f046944613d2598b0d340e949ad3ab7e..f5f2f4974a6f2d0e8da8fce71e98233a2ed3deeb 100644 --- a/src/debug.c +++ b/src/debug.c @@ -194,7 +194,8 @@ int checkSpacehmax(struct space *s) { /** * @brief Check if the h_max and dx_max values of a cell's hierarchy are - * consistent with the particles. Report verbosely if not. + * consistent with the particles. Also checks if particles are correctly + * in a cell. Report verbosely if not. * * @param c the top cell of the hierarchy. * @param depth the recursion depth for use in messages. Set to 0 initially. @@ -206,24 +207,50 @@ int checkCellhdxmax(const struct cell *c, int *depth) { float h_max = 0.0f; float dx_max = 0.0f; - if (!c->split) { - const size_t nr_parts = c->count; - struct part *parts = c->parts; - for (size_t k = 0; k < nr_parts; k++) { - h_max = (h_max > parts[k].h) ? h_max : parts[k].h; + int result = 1; + + const double loc_min[3] = {c->loc[0], c->loc[1], c->loc[2]}; + const double loc_max[3] = {c->loc[0] + c->width[0], c->loc[1] + c->width[1], + c->loc[2] + c->width[2]}; + + const size_t nr_parts = c->count; + struct part *parts = c->parts; + struct xpart *xparts = c->xparts; + for (size_t k = 0; k < nr_parts; k++) { + + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; + + if (p->x[0] < loc_min[0] || p->x[0] > loc_max[0] || p->x[1] < loc_min[1] || + p->x[1] > loc_max[1] || p->x[2] < loc_min[2] || p->x[2] > loc_max[2]) { + + message( + "Inconsistent part position p->x=[%e %e %e], c->loc=[%e %e %e] " + "c->width=[%e %e %e]", + p->x[0], p->x[1], p->x[2], c->loc[0], c->loc[1], c->loc[2], + c->width[0], c->width[1], c->width[2]); + + result = 0; } - } else { - for (int k = 0; k < 8; k++) + + const float dx2 = xp->x_diff[0] * xp->x_diff[0] + + xp->x_diff[1] * xp->x_diff[1] + + xp->x_diff[2] * xp->x_diff[2]; + + h_max = max(h_max, p->h); + dx_max = max(dx_max, sqrt(dx2)); + } + + if (c->split) { + for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) { struct cell *cp = c->progeny[k]; checkCellhdxmax(cp, depth); - dx_max = max(dx_max, cp->dx_max); - h_max = max(h_max, cp->h_max); } + } } /* Check. */ - int result = 1; if (c->h_max != h_max) { message("%d Inconsistent h_max: cell %f != parts %f", *depth, c->h_max, h_max); @@ -236,13 +263,6 @@ int checkCellhdxmax(const struct cell *c, int *depth) { result = 0; } - /* Check rebuild criterion. */ - if (h_max > c->dmin) { - message("%d Inconsistent c->dmin: %f > %f", *depth, h_max, c->dmin); - message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); - result = 0; - } - return result; } diff --git a/src/drift.h b/src/drift.h index bd1b35926740d49a67291ede4676f3387cd66748..687f8d8885a5fedca489f76d65ea8113101626c6 100644 --- a/src/drift.h +++ b/src/drift.h @@ -39,8 +39,8 @@ * @param ti_current Integer end of time-step */ __attribute__((always_inline)) INLINE static void drift_gpart( - struct gpart *restrict gp, float dt, double timeBase, int ti_old, - int ti_current) { + struct gpart *restrict gp, float dt, double timeBase, integertime_t ti_old, + integertime_t ti_current) { /* Drift... */ gp->x[0] += gp->v_full[0] * dt; gp->x[1] += gp->v_full[1] * dt; @@ -64,7 +64,17 @@ __attribute__((always_inline)) INLINE static void drift_gpart( */ __attribute__((always_inline)) INLINE static void drift_part( struct part *restrict p, struct xpart *restrict xp, float dt, - double timeBase, int ti_old, int ti_current) { + double timeBase, integertime_t ti_old, integertime_t ti_current) { + +#ifdef SWIFT_DEBUG_CHECKS + if (p->ti_drift != ti_old) + error( + "Particle has not been drifted to the current time p->ti_drift=%lld, " + "c->ti_old=%lld, ti_current=%lld", + p->ti_drift, ti_old, ti_current); + + p->ti_drift = ti_current; +#endif /* Drift... */ p->x[0] += xp->v_full[0] * dt; @@ -77,7 +87,7 @@ __attribute__((always_inline)) INLINE static void drift_part( p->v[2] += p->a_hydro[2] * dt; /* Predict the values of the extra fields */ - hydro_predict_extra(p, xp, dt, ti_old, ti_current, timeBase); + hydro_predict_extra(p, xp, dt); /* Compute offset since last cell construction */ xp->x_diff[0] -= xp->v_full[0] * dt; @@ -85,4 +95,23 @@ __attribute__((always_inline)) INLINE static void drift_part( xp->x_diff[2] -= xp->v_full[2] * dt; } +/** + * @brief Perform the 'drift' operation on a #spart + * + * @param sp The #spart to drift. + * @param dt The drift time-step + * @param timeBase The minimal allowed time-step size. + * @param ti_old Integer start of time-step + * @param ti_current Integer end of time-step + */ +__attribute__((always_inline)) INLINE static void drift_spart( + struct spart *restrict sp, float dt, double timeBase, integertime_t ti_old, + integertime_t ti_current) { + + /* Drift... */ + sp->x[0] += sp->v[0] * dt; + sp->x[1] += sp->v[1] * dt; + sp->x[2] += sp->v[2] * dt; +} + #endif /* SWIFT_DRIFT_H */ diff --git a/src/dump.c b/src/dump.c new file mode 100644 index 0000000000000000000000000000000000000000..2c0cf221ebd897bab0d047c196ce8a2aeddc6eae --- /dev/null +++ b/src/dump.c @@ -0,0 +1,153 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +/* This object's header. */ +#include "dump.h" + +/* Local headers. */ +#include "atomic.h" +#include "error.h" + +/** + * @brief Obtain a chunk of memory from a dump. + * + * @param d The #dump. + * @param count The number of bytes requested. + * @param offset The offset of the returned memory address within the dump file. + * @return A pointer to the memory-mapped chunk of data. + */ + +void *dump_get(struct dump *d, size_t count, size_t *offset) { + size_t local_offset = atomic_add(&d->count, count); + *offset = local_offset + d->file_offset; + return (char *)d->data + local_offset; +} + +/** + * @brief Ensure that at least size bytes are available in the #dump. + */ + +void dump_ensure(struct dump *d, size_t size) { + + /* If we have enough space already, just bail. */ + if (d->size - d->count > size) return; + + /* Unmap the current data. */ + size_t trunc_count = d->count & d->page_mask; + if (munmap(d->data, trunc_count > 0 ? trunc_count : 1) != 0) { + error("Failed to unmap %zi bytes of dump data (%s).", trunc_count, + strerror(errno)); + } + + /* Update the size and count. */ + d->file_offset += trunc_count; + d->count -= trunc_count; + d->size = (size * dump_grow_ensure_factor + ~d->page_mask) & d->page_mask; + + /* Re-allocate the file size. */ + if (posix_fallocate(d->fd, d->file_offset, d->size) != 0) { + error("Failed to pre-allocate the dump file."); + } + + /* Re-map starting at the end of the file. */ + if ((d->data = mmap(NULL, d->size, PROT_WRITE, MAP_SHARED, d->fd, + d->file_offset)) == MAP_FAILED) { + error("Failed to allocate map of size %zi bytes (%s).", d->size, + strerror(errno)); + } +} + +/** + * @brief Flush the #dump to disk. + */ + +void dump_sync(struct dump *d) { + if (msync(d->data, d->count, MS_SYNC) != 0) + error("Failed to sync memory-mapped data."); +} + +/** + * @brief Finalize the #dump. + */ + +void dump_close(struct dump *d) { + /* Unmap the data in memory. */ + if (munmap(d->data, d->count) != 0) { + error("Failed to unmap dump data (%s).", strerror(errno)); + } + + /* Truncate the file to the correct length. */ + if (ftruncate(d->fd, d->file_offset + d->count) != 0) { + error("Failed to truncate dump file (%s).", strerror(errno)); + } + + /* Close the memory-mapped file. */ + if (close(d->fd) != 0) error("Failed to close memory-mapped file."); +} + +/** + * @brief Initialize a file dump. + * + * @param d The #dump to initialize. + * @param filename The fully qualified name of the file in which to dump, + * note that it will be overwritten. + * @param size The initial buffer size for this #dump. + */ + +void dump_init(struct dump *d, const char *filename, size_t size) { + + /* Create the output file. */ + if ((d->fd = open(filename, O_CREAT | O_RDWR, 0660)) == -1) { + error("Failed to create dump file '%s' (%s).", filename, strerror(errno)); + } + + /* Adjust the size to be at least the page size. */ + const size_t page_mask = ~(sysconf(_SC_PAGE_SIZE) - 1); + size = (size + ~page_mask) & page_mask; + + /* Pre-allocate the file size. */ + if (posix_fallocate(d->fd, 0, size) != 0) { + error("Failed to pre-allocate the dump file."); + } + + /* Map memory to the created file. */ + if ((d->data = mmap(NULL, size, PROT_WRITE, MAP_SHARED, d->fd, 0)) == + MAP_FAILED) { + error("Failed to allocate map of size %zi bytes (%s).", size, + strerror(errno)); + } + + /* Init some counters. */ + d->size = size; + d->count = 0; + d->file_offset = 0; + d->page_mask = page_mask; +} diff --git a/src/dump.h b/src/dump.h new file mode 100644 index 0000000000000000000000000000000000000000..a7e934218c271d2f82b99d39f278e5af3047be6e --- /dev/null +++ b/src/dump.h @@ -0,0 +1,57 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DUMP_H +#define SWIFT_DUMP_H + +/* Includes. */ +#include "lock.h" + +/* Some constants. */ +#define dump_grow_ensure_factor 10 + +/** The dump struct. */ +struct dump { + + /* The memory-mapped data of this dump. */ + void *data; + + /* The size of the memory-mapped data, in bytes. */ + size_t size; + + /* The number of bytes that have been dumped. */ + size_t count; + + /* The offset of the data within the current file. */ + size_t file_offset; + + /* The file with which this memory is associated. */ + int fd; + + /* Mask containing the significant bits for page addresses. */ + size_t page_mask; +}; + +/* Function prototypes. */ +void dump_init(struct dump *d, const char *filename, size_t size); +void dump_ensure(struct dump *d, size_t size); +void dump_sync(struct dump *d); +void dump_close(struct dump *d); +void *dump_get(struct dump *d, size_t count, size_t *offset); + +#endif /* SWIFT_DUMP_H */ diff --git a/src/engine.c b/src/engine.c index 88406493a174a85bab9b2fc4c77024bd43821ed3..2019d54a7715e5b75e48f5673ca24d7c6cc67b0c 100644 --- a/src/engine.c +++ b/src/engine.c @@ -48,6 +48,7 @@ #include "engine.h" /* Local headers. */ +#include "active.h" #include "atomic.h" #include "cell.h" #include "clocks.h" @@ -70,6 +71,9 @@ #include "units.h" #include "version.h" +/* Particle cache size. */ +#define CACHE_SIZE 512 + const char *engine_policy_names[16] = {"none", "rand", "steal", @@ -84,7 +88,8 @@ const char *engine_policy_names[16] = {"none", "cosmology_integration", "drift_all", "cooling", - "sourceterms"}; + "sourceterms", + "stars"}; /** The rank of the engine as a global variable (for messages). */ int engine_rank; @@ -140,8 +145,25 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c, NULL, 0); - c->kick = scheduler_addtask(s, task_type_kick, task_subtype_none, 0, 0, c, - NULL, 0); + /* Add the two half kicks */ + c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0, + c, NULL, 0); + + c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0, + c, NULL, 0); + + /* Add the time-step calculation task and its dependency */ + c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none, + 0, 0, c, NULL, 0); + + scheduler_addunlock(s, c->kick2, c->timestep); + + /* Add the drift task and its dependencies. */ + c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0, + c, NULL, 0); + + scheduler_addunlock(s, c->kick1, c->drift); + scheduler_addunlock(s, c->drift, c->init); /* Generate the ghost task. */ if (is_hydro) @@ -156,13 +178,18 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { #endif /* Cooling task */ - if (is_with_cooling) + if (is_with_cooling) { c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none, 0, 0, c, NULL, 0); + + scheduler_addunlock(s, c->cooling, c->kick2); + } + /* add source terms */ - if (is_with_sourceterms) + if (is_with_sourceterms) { c->sourceterms = scheduler_addtask(s, task_type_sourceterms, task_subtype_none, 0, 0, c, NULL, 0); + } } } else { /* We are above the super-cell so need to go deeper */ @@ -210,24 +237,30 @@ void engine_redistribute(struct engine *e) { struct part *parts = s->parts; struct xpart *xparts = s->xparts; struct gpart *gparts = s->gparts; + struct spart *sparts = s->sparts; ticks tic = getticks(); /* Allocate temporary arrays to store the counts of particles to be sent and the destination of each particle */ - int *counts, *g_counts; + int *counts, *g_counts, *s_counts; if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) - error("Failed to allocate count temporary buffer."); + error("Failed to allocate counts temporary buffer."); if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) - error("Failed to allocate gcount temporary buffer."); + error("Failed to allocate g_gcount temporary buffer."); + if ((s_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate s_counts temporary buffer."); bzero(counts, sizeof(int) * nr_nodes * nr_nodes); bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes); + bzero(s_counts, sizeof(int) * nr_nodes * nr_nodes); /* Allocate the destination index arrays. */ - int *dest, *g_dest; + int *dest, *g_dest, *s_dest; if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) error("Failed to allocate dest temporary buffer."); if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) error("Failed to allocate g_dest temporary buffer."); + if ((s_dest = (int *)malloc(sizeof(int) * s->nr_sparts)) == NULL) + error("Failed to allocate s_dest temporary buffer."); /* Get destination of each particle */ for (size_t k = 0; k < s->nr_parts; k++) { @@ -255,7 +288,32 @@ void engine_redistribute(struct engine *e) { } /* Sort the particles according to their cell index. */ - space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose); + if (s->nr_parts > 0) + space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the part have been sorted correctly. */ + for (size_t k = 0; k < s->nr_parts; k++) { + const struct part *p = &s->parts[k]; + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1], + p->x[2] * s->iwidth[2]); + + /* New cell of this part */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (dest[k] != new_node) + error("part's new node index not matching sorted index."); + + if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] || + p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] || + p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2]) + error("part not sorted into the right top-level cell!"); + } +#endif /* We need to re-link the gpart partners of parts. */ if (s->nr_parts > 0) { @@ -267,7 +325,7 @@ void engine_redistribute(struct engine *e) { /* As the addresses will be invalidated by the communications, we will * instead store the absolute index from the start of the sub-array of * particles to be sent to a given node. - * Recall that gparts without partners have a negative id. + * Recall that gparts without partners have a positive id. * We will restore the pointers on the receiving node later on. */ if (dest[k] != current_dest) { current_dest = dest[k]; @@ -275,7 +333,7 @@ void engine_redistribute(struct engine *e) { } #ifdef SWIFT_DEBUG_CHECKS - if (s->parts[k].gpart->id_or_neg_offset >= 0) + if (s->parts[k].gpart->id_or_neg_offset > 0) error("Trying to link a partnerless gpart !"); #endif @@ -285,6 +343,87 @@ void engine_redistribute(struct engine *e) { } } + /* Get destination of each s-particle */ + for (size_t k = 0; k < s->nr_sparts; k++) { + + /* Periodic boundary conditions */ + for (int j = 0; j < 3; j++) { + if (sparts[k].x[j] < 0.0) + sparts[k].x[j] += dim[j]; + else if (sparts[k].x[j] >= dim[j]) + sparts[k].x[j] -= dim[j]; + } + const int cid = + cell_getid(cdim, sparts[k].x[0] * iwidth[0], sparts[k].x[1] * iwidth[1], + sparts[k].x[2] * iwidth[2]); +#ifdef SWIFT_DEBUG_CHECKS + if (cid < 0 || cid >= s->nr_cells) + error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k, + sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]); +#endif + + s_dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ + s_counts[nodeID * nr_nodes + s_dest[k]] += 1; + } + + /* Sort the particles according to their cell index. */ + if (s->nr_sparts > 0) + space_sparts_sort(s, s_dest, s->nr_sparts, 0, nr_nodes - 1, e->verbose); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the spart have been sorted correctly. */ + for (size_t k = 0; k < s->nr_sparts; k++) { + const struct spart *sp = &s->sparts[k]; + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1], + sp->x[2] * s->iwidth[2]); + + /* New cell of this spart */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (s_dest[k] != new_node) + error("spart's new node index not matching sorted index."); + + if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] || + sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] || + sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2]) + error("spart not sorted into the right top-level cell!"); + } +#endif + + /* We need to re-link the gpart partners of sparts. */ + if (s->nr_sparts > 0) { + int current_dest = s_dest[0]; + size_t count_this_dest = 0; + for (size_t k = 0; k < s->nr_sparts; ++k) { + if (s->sparts[k].gpart != NULL) { + + /* As the addresses will be invalidated by the communications, we will + * instead store the absolute index from the start of the sub-array of + * particles to be sent to a given node. + * Recall that gparts without partners have a positive id. + * We will restore the pointers on the receiving node later on. */ + if (s_dest[k] != current_dest) { + current_dest = s_dest[k]; + count_this_dest = 0; + } + +#ifdef SWIFT_DEBUG_CHECKS + if (s->sparts[k].gpart->id_or_neg_offset > 0) + error("Trying to link a partnerless gpart !"); +#endif + + s->sparts[k].gpart->id_or_neg_offset = -count_this_dest; + count_this_dest++; + } + } + } + /* Get destination of each g-particle */ for (size_t k = 0; k < s->nr_gparts; k++) { @@ -311,48 +450,96 @@ void engine_redistribute(struct engine *e) { } /* Sort the gparticles according to their cell index. */ - space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose); + if (s->nr_gparts > 0) + space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the gpart have been sorted correctly. */ + for (size_t k = 0; k < s->nr_gparts; k++) { + const struct gpart *gp = &s->gparts[k]; + + /* New cell index */ + const int new_cid = + cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1], + gp->x[2] * s->iwidth[2]); + + /* New cell of this gpart */ + const struct cell *c = &s->cells_top[new_cid]; + const int new_node = c->nodeID; + + if (g_dest[k] != new_node) + error("gpart's new node index not matching sorted index."); + + if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] || + gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] || + gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2]) + error("gpart not sorted into the right top-level cell!"); + } +#endif /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to allreduce particle transfer counts."); + /* Get all the s_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce gparticle transfer counts."); + + /* Get all the g_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce sparticle transfer counts."); + /* Report how many particles will be moved. */ if (e->verbose) { if (e->nodeID == 0) { - size_t total = 0; - size_t unmoved = 0; + size_t total = 0, g_total = 0, s_total = 0; + size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0; for (int p = 0, r = 0; p < nr_nodes; p++) { for (int s = 0; s < nr_nodes; s++) { total += counts[r]; - if (p == s) unmoved += counts[r]; + g_total += g_counts[r]; + s_total += s_counts[r]; + if (p == s) { + unmoved += counts[r]; + g_unmoved += g_counts[r]; + s_unmoved += s_counts[r]; + } r++; } } - message("%ld of %ld (%.2f%%) of particles moved", total - unmoved, total, - 100.0 * (double)(total - unmoved) / (double)total); + if (total > 0) + message("%ld of %ld (%.2f%%) of particles moved", total - unmoved, + total, 100.0 * (double)(total - unmoved) / (double)total); + if (g_total > 0) + message("%ld of %ld (%.2f%%) of g-particles moved", g_total - g_unmoved, + g_total, + 100.0 * (double)(g_total - g_unmoved) / (double)g_total); + if (s_total > 0) + message("%ld of %ld (%.2f%%) of s-particles moved", s_total - s_unmoved, + s_total, + 100.0 * (double)(s_total - s_unmoved) / (double)s_total); } } - /* Get all the g_counts from all the nodes. */ - if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, - MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to allreduce gparticle transfer counts."); - - /* Each node knows how many parts and gparts will be transferred to every - other node. We can start preparing to receive data */ + /* Each node knows how many parts, sparts and gparts will be transferred + to every other node. We can start preparing to receive data */ /* Get the new number of parts and gparts for this node */ - size_t nr_parts = 0, nr_gparts = 0; + size_t nr_parts = 0, nr_gparts = 0, nr_sparts = 0; for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID]; for (int k = 0; k < nr_nodes; k++) nr_gparts += g_counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_sparts += s_counts[k * nr_nodes + nodeID]; /* Allocate the new arrays with some extra margin */ struct part *parts_new = NULL; struct xpart *xparts_new = NULL; struct gpart *gparts_new = NULL; + struct spart *sparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, sizeof(struct part) * nr_parts * engine_redistribute_alloc_margin) != 0) @@ -365,17 +552,22 @@ void engine_redistribute(struct engine *e) { sizeof(struct gpart) * nr_gparts * engine_redistribute_alloc_margin) != 0) error("Failed to allocate new gpart data."); + if (posix_memalign((void **)&sparts_new, spart_align, + sizeof(struct spart) * nr_sparts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new spart data."); /* Prepare MPI requests for the asynchronous communications */ MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 6 * nr_nodes)) == + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 8 * nr_nodes)) == NULL) error("Failed to allocate MPI request list."); - for (int k = 0; k < 6 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; + for (int k = 0; k < 8 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; /* Emit the sends and recvs for the particle and gparticle data. */ size_t offset_send = 0, offset_recv = 0; size_t g_offset_send = 0, g_offset_recv = 0; + size_t s_offset_send = 0, s_offset_recv = 0; for (int k = 0; k < nr_nodes; k++) { /* Indices in the count arrays of the node of interest */ @@ -399,12 +591,12 @@ void engine_redistribute(struct engine *e) { /* Else, emit some communications */ } else { if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type, - k, 3 * ind_send + 0, MPI_COMM_WORLD, - &reqs[6 * k]) != MPI_SUCCESS) + k, 4 * ind_send + 0, MPI_COMM_WORLD, + &reqs[8 * k + 0]) != MPI_SUCCESS) error("Failed to isend parts to node %i.", k); if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type, - k, 3 * ind_send + 1, MPI_COMM_WORLD, - &reqs[6 * k + 1]) != MPI_SUCCESS) + k, 4 * ind_send + 1, MPI_COMM_WORLD, + &reqs[8 * k + 1]) != MPI_SUCCESS) error("Failed to isend xparts to node %i.", k); offset_send += counts[ind_send]; } @@ -425,24 +617,46 @@ void engine_redistribute(struct engine *e) { /* Else, emit some communications */ } else { if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send], - gpart_mpi_type, k, 3 * ind_send + 2, MPI_COMM_WORLD, - &reqs[6 * k + 2]) != MPI_SUCCESS) + gpart_mpi_type, k, 4 * ind_send + 2, MPI_COMM_WORLD, + &reqs[8 * k + 2]) != MPI_SUCCESS) error("Failed to isend gparts to node %i.", k); g_offset_send += g_counts[ind_send]; } } + /* Are we sending any spart ? */ + if (s_counts[ind_send] > 0) { + + /* message("Sending %d spart to node %d", s_counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ + if (k == nodeID) { + memcpy(&sparts_new[s_offset_recv], &s->sparts[s_offset_send], + sizeof(struct spart) * s_counts[ind_recv]); + s_offset_send += s_counts[ind_send]; + s_offset_recv += s_counts[ind_recv]; + + /* Else, emit some communications */ + } else { + if (MPI_Isend(&s->sparts[s_offset_send], s_counts[ind_send], + spart_mpi_type, k, 4 * ind_send + 3, MPI_COMM_WORLD, + &reqs[8 * k + 3]) != MPI_SUCCESS) + error("Failed to isend gparts to node %i.", k); + s_offset_send += s_counts[ind_send]; + } + } + /* Now emit the corresponding Irecv() */ /* Are we receiving any part/xpart from this node ? */ if (k != nodeID && counts[ind_recv] > 0) { if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k, - 3 * ind_recv + 0, MPI_COMM_WORLD, - &reqs[6 * k + 3]) != MPI_SUCCESS) + 4 * ind_recv + 0, MPI_COMM_WORLD, + &reqs[8 * k + 4]) != MPI_SUCCESS) error("Failed to emit irecv of parts from node %i.", k); if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type, - k, 3 * ind_recv + 1, MPI_COMM_WORLD, - &reqs[6 * k + 4]) != MPI_SUCCESS) + k, 4 * ind_recv + 1, MPI_COMM_WORLD, + &reqs[8 * k + 5]) != MPI_SUCCESS) error("Failed to emit irecv of xparts from node %i.", k); offset_recv += counts[ind_recv]; } @@ -450,18 +664,27 @@ void engine_redistribute(struct engine *e) { /* Are we receiving any gpart from this node ? */ if (k != nodeID && g_counts[ind_recv] > 0) { if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv], - gpart_mpi_type, k, 3 * ind_recv + 2, MPI_COMM_WORLD, - &reqs[6 * k + 5]) != MPI_SUCCESS) + gpart_mpi_type, k, 4 * ind_recv + 2, MPI_COMM_WORLD, + &reqs[8 * k + 6]) != MPI_SUCCESS) error("Failed to emit irecv of gparts from node %i.", k); g_offset_recv += g_counts[ind_recv]; } + + /* Are we receiving any spart from this node ? */ + if (k != nodeID && s_counts[ind_recv] > 0) { + if (MPI_Irecv(&sparts_new[s_offset_recv], s_counts[ind_recv], + spart_mpi_type, k, 4 * ind_recv + 3, MPI_COMM_WORLD, + &reqs[8 * k + 7]) != MPI_SUCCESS) + error("Failed to emit irecv of sparts from node %i.", k); + s_offset_recv += s_counts[ind_recv]; + } } /* Wait for all the sends and recvs to tumble in. */ - MPI_Status stats[6 * nr_nodes]; + MPI_Status stats[8 * nr_nodes]; int res; - if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { - for (int k = 0; k < 6 * nr_nodes; k++) { + if ((res = MPI_Waitall(8 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 8 * nr_nodes; k++) { char buff[MPI_MAX_ERROR_STRING]; MPI_Error_string(stats[k].MPI_ERROR, buff, &res); message("request %i has error '%s'.", k, buff); @@ -469,19 +692,23 @@ void engine_redistribute(struct engine *e) { error("Failed during waitall for part data."); } - /* We now need to restore the part<->gpart links */ - size_t offset_parts = 0, offset_gparts = 0; + /* All particles have now arrived. Time for some final operations on the + stuff we just received */ + + /* Restore the part<->gpart and spart<->gpart links */ + size_t offset_parts = 0, offset_sparts = 0, offset_gparts = 0; for (int node = 0; node < nr_nodes; ++node) { const int ind_recv = node * nr_nodes + nodeID; const size_t count_parts = counts[ind_recv]; const size_t count_gparts = g_counts[ind_recv]; + const size_t count_sparts = s_counts[ind_recv]; /* Loop over the gparts received from that node */ for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) { - /* Does this gpart have a partner ? */ - if (gparts_new[k].id_or_neg_offset <= 0) { + /* Does this gpart have a gas partner ? */ + if (gparts_new[k].type == swift_type_gas) { const ptrdiff_t partner_index = offset_parts - gparts_new[k].id_or_neg_offset; @@ -490,10 +717,22 @@ void engine_redistribute(struct engine *e) { gparts_new[k].id_or_neg_offset = -partner_index; parts_new[partner_index].gpart = &gparts_new[k]; } + + /* Does this gpart have a star partner ? */ + if (gparts_new[k].type == swift_type_star) { + + const ptrdiff_t partner_index = + offset_sparts - gparts_new[k].id_or_neg_offset; + + /* Re-link */ + gparts_new[k].id_or_neg_offset = -partner_index; + sparts_new[partner_index].gpart = &gparts_new[k]; + } } offset_parts += count_parts; offset_gparts += count_gparts; + offset_sparts += count_sparts; } #ifdef SWIFT_DEBUG_CHECKS @@ -506,41 +745,43 @@ void engine_redistribute(struct engine *e) { error("Received particle (%zu) that does not belong here (nodeID=%i).", k, cells[cid].nodeID); } - - /* Verify that the links are correct */ - for (size_t k = 0; k < nr_gparts; ++k) { - - if (gparts_new[k].id_or_neg_offset <= 0) { - - struct part *part = &parts_new[-gparts_new[k].id_or_neg_offset]; - - if (part->gpart != &gparts_new[k]) error("Linking problem !"); - - if (gparts_new[k].x[0] != part->x[0] || - gparts_new[k].x[1] != part->x[1] || gparts_new[k].x[2] != part->x[2]) - error("Linked particles are not at the same position !"); - } + for (size_t k = 0; k < nr_gparts; k++) { + const int cid = cell_getid(cdim, gparts_new[k].x[0] * iwidth[0], + gparts_new[k].x[1] * iwidth[1], + gparts_new[k].x[2] * iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received g-particle (%zu) that does not belong here (nodeID=%i).", + k, cells[cid].nodeID); } - for (size_t k = 0; k < nr_parts; ++k) { - - if (parts_new[k].gpart != NULL && - parts_new[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) { - error("Linking problem !"); - } + for (size_t k = 0; k < nr_sparts; k++) { + const int cid = cell_getid(cdim, sparts_new[k].x[0] * iwidth[0], + sparts_new[k].x[1] * iwidth[1], + sparts_new[k].x[2] * iwidth[2]); + if (cells[cid].nodeID != nodeID) + error("Received s-particle (%zu) that does not belong here (nodeID=%i).", + k, cells[cid].nodeID); } + + /* Verify that the links are correct */ + part_verify_links(parts_new, gparts_new, sparts_new, nr_parts, nr_gparts, + nr_sparts, e->verbose); #endif /* Set the new part data, free the old. */ free(parts); free(xparts); free(gparts); + free(sparts); s->parts = parts_new; s->xparts = xparts_new; s->gparts = gparts_new; + s->sparts = sparts_new; s->nr_parts = nr_parts; s->nr_gparts = nr_gparts; + s->nr_sparts = nr_sparts; s->size_parts = engine_redistribute_alloc_margin * nr_parts; s->size_gparts = engine_redistribute_alloc_margin * nr_gparts; + s->size_sparts = engine_redistribute_alloc_margin * nr_sparts; /* Clean up the temporary stuff. */ free(reqs); @@ -552,8 +793,8 @@ void engine_redistribute(struct engine *e) { int my_cells = 0; for (int k = 0; k < nr_cells; k++) if (cells[k].nodeID == nodeID) my_cells += 1; - message("node %i now has %zu parts and %zu gparts in %i cells.", nodeID, - nr_parts, nr_gparts, my_cells); + message("node %i now has %zu parts, %zu sparts and %zu gparts in %i cells.", + nodeID, nr_parts, nr_sparts, nr_gparts, my_cells); } if (e->verbose) @@ -576,6 +817,10 @@ void engine_repartition(struct engine *e) { ticks tic = getticks(); #ifdef SWIFT_DEBUG_CHECKS + /* Be verbose about this. */ + if (e->nodeID == 0 || e->verbose) message("repartitioning space"); + fflush(stdout); + /* Check that all cells have been drifted to the current time */ space_check_drift_point(e->s, e->ti_current); #endif @@ -668,20 +913,25 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, /* Create the tasks and their dependencies? */ if (t_xv == NULL) { - t_xv = scheduler_addtask(s, task_type_send, task_subtype_none, - 4 * ci->tag, 0, ci, cj, 0); - t_rho = scheduler_addtask(s, task_type_send, task_subtype_none, + + if (ci->super->drift == NULL) + ci->super->drift = scheduler_addtask( + s, task_type_drift, task_subtype_none, 0, 0, ci->super, NULL, 0); + + t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, 4 * ci->tag, + 0, ci, cj, 0); + t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho, 4 * ci->tag + 1, 0, ci, cj, 0); t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, 4 * ci->tag + 2, 0, ci, cj, 0); #ifdef EXTRA_HYDRO_LOOP - t_gradient = scheduler_addtask(s, task_type_send, task_subtype_none, + t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient, 4 * ci->tag + 3, 0, ci, cj, 0); #endif #ifdef EXTRA_HYDRO_LOOP - scheduler_addunlock(s, t_gradient, ci->super->kick); + scheduler_addunlock(s, t_gradient, ci->super->kick2); scheduler_addunlock(s, ci->super->extra_ghost, t_gradient); @@ -696,17 +946,21 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, #else /* The send_rho task should unlock the super-cell's kick task. */ - scheduler_addunlock(s, t_rho, ci->super->kick); + scheduler_addunlock(s, t_rho, ci->super->kick2); /* The send_rho task depends on the cell's ghost task. */ scheduler_addunlock(s, ci->super->ghost, t_rho); /* The send_xv task should unlock the super-cell's ghost task. */ scheduler_addunlock(s, t_xv, ci->super->ghost); + #endif - /* The super-cell's kick task should unlock the send_ti task. */ - if (t_ti != NULL) scheduler_addunlock(s, ci->super->kick, t_ti); + /* Drift before you send */ + scheduler_addunlock(s, ci->super->drift, t_xv); + + /* The super-cell's timestep task should unlock the send_ti task. */ + scheduler_addunlock(s, ci->super->timestep, t_ti); } /* Add them to the local cell. */ @@ -715,7 +969,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, #ifdef EXTRA_HYDRO_LOOP engine_addlink(e, &ci->send_gradient, t_gradient); #endif - if (t_ti != NULL) engine_addlink(e, &ci->send_ti, t_ti); + engine_addlink(e, &ci->send_ti, t_ti); } /* Recurse? */ @@ -753,14 +1007,14 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, if (t_xv == NULL && c->density != NULL) { /* Create the tasks. */ - t_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, 4 * c->tag, - 0, c, NULL, 0); - t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none, + t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 4 * c->tag, 0, + c, NULL, 0); + t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho, 4 * c->tag + 1, 0, c, NULL, 0); t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, 4 * c->tag + 2, 0, c, NULL, 0); #ifdef EXTRA_HYDRO_LOOP - t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_none, + t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient, 4 * c->tag + 3, 0, c, NULL, 0); #endif } @@ -781,7 +1035,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, } for (struct link *l = c->force; l != NULL; l = l->next) { scheduler_addunlock(s, t_gradient, l->t); - if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti); + scheduler_addunlock(s, l->t, t_ti); } if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); #else @@ -791,7 +1045,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, } for (struct link *l = c->force; l != NULL; l = l->next) { scheduler_addunlock(s, t_rho, l->t); - if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti); + scheduler_addunlock(s, l->t, t_ti); } if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); #endif @@ -894,11 +1148,12 @@ void engine_exchange_cells(struct engine *e) { /* Count the number of particles we need to import and re-allocate the buffer if needed. */ - size_t count_parts_in = 0, count_gparts_in = 0; + size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; for (int k = 0; k < nr_proxies; k++) for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { count_parts_in += e->proxies[k].cells_in[j]->count; count_gparts_in += e->proxies[k].cells_in[j]->gcount; + count_sparts_in += e->proxies[k].cells_in[j]->scount; } if (count_parts_in > s->size_parts_foreign) { if (s->parts_foreign != NULL) free(s->parts_foreign); @@ -914,20 +1169,31 @@ void engine_exchange_cells(struct engine *e) { sizeof(struct gpart) * s->size_gparts_foreign) != 0) error("Failed to allocate foreign gpart data."); } + if (count_sparts_in > s->size_sparts_foreign) { + if (s->sparts_foreign != NULL) free(s->sparts_foreign); + s->size_sparts_foreign = 1.1 * count_sparts_in; + if (posix_memalign((void **)&s->sparts_foreign, spart_align, + sizeof(struct spart) * s->size_sparts_foreign) != 0) + error("Failed to allocate foreign spart data."); + } /* Unpack the cells and link to the particle data. */ struct part *parts = s->parts_foreign; struct gpart *gparts = s->gparts_foreign; + struct spart *sparts = s->sparts_foreign; for (int k = 0; k < nr_proxies; k++) { for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { cell_link_parts(e->proxies[k].cells_in[j], parts); cell_link_gparts(e->proxies[k].cells_in[j], gparts); + cell_link_sparts(e->proxies[k].cells_in[j], sparts); parts = &parts[e->proxies[k].cells_in[j]->count]; gparts = &gparts[e->proxies[k].cells_in[j]->gcount]; + sparts = &sparts[e->proxies[k].cells_in[j]->scount]; } } s->nr_parts_foreign = parts - s->parts_foreign; s->nr_gparts_foreign = gparts - s->gparts_foreign; + s->nr_sparts_foreign = sparts - s->sparts_foreign; /* Free the pcell buffer. */ free(pcells); @@ -942,7 +1208,7 @@ void engine_exchange_cells(struct engine *e) { } /** - * @brief Exchange straying parts with other nodes. + * @brief Exchange straying particles with other nodes. * * @param e The #engine. * @param offset_parts The index in the parts array as of which the foreign @@ -955,13 +1221,20 @@ void engine_exchange_cells(struct engine *e) { * @param ind_gpart The foreign #cell ID of each gpart. * @param Ngpart The number of stray gparts, contains the number of gparts * received on return. + * @param offset_sparts The index in the sparts array as of which the foreign + * parts reside. + * @param ind_spart The foreign #cell ID of each spart. + * @param Nspart The number of stray sparts, contains the number of sparts + * received on return. * * Note that this function does not mess-up the linkage between parts and * gparts, i.e. the received particles have correct linkeage. */ void engine_exchange_strays(struct engine *e, size_t offset_parts, int *ind_part, size_t *Npart, size_t offset_gparts, - int *ind_gpart, size_t *Ngpart) { + int *ind_gpart, size_t *Ngpart, + size_t offset_sparts, int *ind_spart, + size_t *Nspart) { #ifdef WITH_MPI @@ -972,9 +1245,10 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, for (int k = 0; k < e->nr_proxies; k++) { e->proxies[k].nr_parts_out = 0; e->proxies[k].nr_gparts_out = 0; + e->proxies[k].nr_sparts_out = 0; } - /* Put the parts and gparts into the corresponding proxies. */ + /* Put the parts into the corresponding proxies. */ for (size_t k = 0; k < *Npart; k++) { /* Get the target node and proxy ID. */ const int node_id = e->s->cells_top[ind_part[k]].nodeID; @@ -1000,6 +1274,32 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k], &s->xparts[offset_parts + k], 1); } + + /* Put the sparts into the corresponding proxies. */ + for (size_t k = 0; k < *Nspart; k++) { + const int node_id = e->s->cells_top[ind_spart[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lld, x=[%e,%e,%e].", + node_id, s->sparts[offset_sparts + k].id, + s->sparts[offset_sparts + k].x[0], s->sparts[offset_sparts + k].x[1], + s->sparts[offset_sparts + k].x[2]); + + /* Re-link the associated gpart with the buffer offset of the spart. */ + if (s->sparts[offset_sparts + k].gpart != NULL) { + s->sparts[offset_sparts + k].gpart->id_or_neg_offset = + -e->proxies[pid].nr_sparts_out; + } + + /* Load the spart into the proxy */ + proxy_sparts_load(&e->proxies[pid], &s->sparts[offset_sparts + k], 1); + } + + /* Put the gparts into the corresponding proxies. */ for (size_t k = 0; k < *Ngpart; k++) { const int node_id = e->s->cells_top[ind_gpart[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) @@ -1009,15 +1309,17 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, error( "Do not have a proxy for the requested nodeID %i for part with " "id=%lli, x=[%e,%e,%e].", - node_id, s->gparts[offset_parts + k].id_or_neg_offset, - s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1], + node_id, s->gparts[offset_gparts + k].id_or_neg_offset, + s->gparts[offset_gparts + k].x[0], s->gparts[offset_gparts + k].x[1], s->gparts[offset_gparts + k].x[2]); + + /* Load the gpart into the proxy */ proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); } /* Launch the proxies. */ - MPI_Request reqs_in[3 * engine_maxproxies]; - MPI_Request reqs_out[3 * engine_maxproxies]; + MPI_Request reqs_in[4 * engine_maxproxies]; + MPI_Request reqs_out[4 * engine_maxproxies]; for (int k = 0; k < e->nr_proxies; k++) { proxy_parts_exch1(&e->proxies[k]); reqs_in[k] = e->proxies[k].req_parts_count_in; @@ -1043,14 +1345,19 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, enough space to accommodate them. */ int count_parts_in = 0; int count_gparts_in = 0; + int count_sparts_in = 0; for (int k = 0; k < e->nr_proxies; k++) { count_parts_in += e->proxies[k].nr_parts_in; count_gparts_in += e->proxies[k].nr_gparts_in; + count_sparts_in += e->proxies[k].nr_sparts_in; } if (e->verbose) { - message("sent out %zu/%zu parts/gparts, got %i/%i back.", *Npart, *Ngpart, - count_parts_in, count_gparts_in); + message("sent out %zu/%zu/%zu parts/gparts/sparts, got %i/%i/%i back.", + *Npart, *Ngpart, *Nspart, count_parts_in, count_gparts_in, + count_sparts_in); } + + /* Reallocate the particle arrays if necessary */ if (offset_parts + count_parts_in > s->size_parts) { message("re-allocating parts array."); s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; @@ -1073,6 +1380,22 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, } } } + if (offset_sparts + count_sparts_in > s->size_sparts) { + message("re-allocating sparts array."); + s->size_sparts = (offset_sparts + count_sparts_in) * engine_parts_size_grow; + struct spart *sparts_new = NULL; + if (posix_memalign((void **)&sparts_new, spart_align, + sizeof(struct spart) * s->size_sparts) != 0) + error("Failed to allocate new spart data."); + memcpy(sparts_new, s->sparts, sizeof(struct spart) * offset_sparts); + free(s->sparts); + s->sparts = sparts_new; + for (size_t k = 0; k < offset_sparts; k++) { + if (s->sparts[k].gpart != NULL) { + s->sparts[k].gpart->id_or_neg_offset = -k; + } + } + } if (offset_gparts + count_gparts_in > s->size_gparts) { message("re-allocating gparts array."); s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; @@ -1083,9 +1406,12 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); free(s->gparts); s->gparts = gparts_new; + for (size_t k = 0; k < offset_gparts; k++) { - if (s->gparts[k].id_or_neg_offset < 0) { + if (s->gparts[k].type == swift_type_gas) { s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_star) { + s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } } } @@ -1094,39 +1420,52 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, int nr_in = 0, nr_out = 0; for (int k = 0; k < e->nr_proxies; k++) { if (e->proxies[k].nr_parts_in > 0) { - reqs_in[3 * k] = e->proxies[k].req_parts_in; - reqs_in[3 * k + 1] = e->proxies[k].req_xparts_in; + reqs_in[4 * k] = e->proxies[k].req_parts_in; + reqs_in[4 * k + 1] = e->proxies[k].req_xparts_in; nr_in += 2; } else { - reqs_in[3 * k] = reqs_in[3 * k + 1] = MPI_REQUEST_NULL; + reqs_in[4 * k] = reqs_in[4 * k + 1] = MPI_REQUEST_NULL; } if (e->proxies[k].nr_gparts_in > 0) { - reqs_in[3 * k + 2] = e->proxies[k].req_gparts_in; + reqs_in[4 * k + 2] = e->proxies[k].req_gparts_in; + nr_in += 1; + } else { + reqs_in[4 * k + 2] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_sparts_in > 0) { + reqs_in[4 * k + 3] = e->proxies[k].req_sparts_in; nr_in += 1; } else { - reqs_in[3 * k + 2] = MPI_REQUEST_NULL; + reqs_in[4 * k + 3] = MPI_REQUEST_NULL; } + if (e->proxies[k].nr_parts_out > 0) { - reqs_out[3 * k] = e->proxies[k].req_parts_out; - reqs_out[3 * k + 1] = e->proxies[k].req_xparts_out; + reqs_out[4 * k] = e->proxies[k].req_parts_out; + reqs_out[4 * k + 1] = e->proxies[k].req_xparts_out; nr_out += 2; } else { - reqs_out[3 * k] = reqs_out[3 * k + 1] = MPI_REQUEST_NULL; + reqs_out[4 * k] = reqs_out[4 * k + 1] = MPI_REQUEST_NULL; } if (e->proxies[k].nr_gparts_out > 0) { - reqs_out[3 * k + 2] = e->proxies[k].req_gparts_out; + reqs_out[4 * k + 2] = e->proxies[k].req_gparts_out; nr_out += 1; } else { - reqs_out[3 * k + 2] = MPI_REQUEST_NULL; + reqs_out[4 * k + 2] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_sparts_out > 0) { + reqs_out[4 * k + 3] = e->proxies[k].req_sparts_out; + nr_out += 1; + } else { + reqs_out[4 * k + 3] = MPI_REQUEST_NULL; } } /* Wait for each part array to come in and collect the new parts from the proxies. */ - int count_parts = 0, count_gparts = 0; + int count_parts = 0, count_gparts = 0, count_sparts = 0; for (int k = 0; k < nr_in; k++) { int err, pid; - if ((err = MPI_Waitany(3 * e->nr_proxies, reqs_in, &pid, + if ((err = MPI_Waitany(4 * e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; int res; @@ -1134,21 +1473,24 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, error("MPI_Waitany failed (%s).", buff); } if (pid == MPI_UNDEFINED) break; - // message( "request from proxy %i has arrived." , pid / 3 ); - pid = 3 * (pid / 3); + // message( "request from proxy %i has arrived." , pid / 4 ); + pid = 4 * (pid / 4); /* If all the requests for a given proxy have arrived... */ if (reqs_in[pid + 0] == MPI_REQUEST_NULL && reqs_in[pid + 1] == MPI_REQUEST_NULL && - reqs_in[pid + 2] == MPI_REQUEST_NULL) { + reqs_in[pid + 2] == MPI_REQUEST_NULL && + reqs_in[pid + 3] == MPI_REQUEST_NULL) { /* Copy the particle data to the part/xpart/gpart arrays. */ - struct proxy *prox = &e->proxies[pid / 3]; + struct proxy *prox = &e->proxies[pid / 4]; memcpy(&s->parts[offset_parts + count_parts], prox->parts_in, sizeof(struct part) * prox->nr_parts_in); memcpy(&s->xparts[offset_parts + count_parts], prox->xparts_in, sizeof(struct xpart) * prox->nr_parts_in); memcpy(&s->gparts[offset_gparts + count_gparts], prox->gparts_in, sizeof(struct gpart) * prox->nr_gparts_in); + memcpy(&s->sparts[offset_sparts + count_sparts], prox->sparts_in, + sizeof(struct spart) * prox->nr_sparts_in); /* for (int k = offset; k < offset + count; k++) message( "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.", @@ -1158,23 +1500,30 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Re-link the gparts. */ for (int kk = 0; kk < prox->nr_gparts_in; kk++) { struct gpart *gp = &s->gparts[offset_gparts + count_gparts + kk]; - if (gp->id_or_neg_offset <= 0) { + + if (gp->type == swift_type_gas) { struct part *p = - &s->parts[offset_gparts + count_parts - gp->id_or_neg_offset]; + &s->parts[offset_parts + count_parts - gp->id_or_neg_offset]; gp->id_or_neg_offset = s->parts - p; p->gpart = gp; + } else if (gp->type == swift_type_star) { + struct spart *sp = + &s->sparts[offset_sparts + count_sparts - gp->id_or_neg_offset]; + gp->id_or_neg_offset = s->sparts - sp; + sp->gpart = gp; } } /* Advance the counters. */ count_parts += prox->nr_parts_in; count_gparts += prox->nr_gparts_in; + count_sparts += prox->nr_sparts_in; } } /* Wait for all the sends to have finished too. */ if (nr_out > 0) - if (MPI_Waitall(3 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != + if (MPI_Waitall(4 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) error("MPI_Waitall on sends failed."); @@ -1185,6 +1534,7 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, /* Return the number of harvested parts. */ *Npart = count_parts; *Ngpart = count_gparts; + *Nspart = count_sparts; #else error("SWIFT was not compiled with MPI support."); @@ -1441,7 +1791,7 @@ static inline void engine_make_gravity_dependencies(struct scheduler *sched, /* init --> gravity --> kick */ scheduler_addunlock(sched, c->super->init, gravity); - scheduler_addunlock(sched, gravity, c->super->kick); + scheduler_addunlock(sched, gravity, c->super->kick2); /* grav_up --> gravity ( --> kick) */ scheduler_addunlock(sched, c->super->grav_up, gravity); @@ -1460,7 +1810,7 @@ static inline void engine_make_external_gravity_dependencies( /* init --> external gravity --> kick */ scheduler_addunlock(sched, c->super->init, gravity); - scheduler_addunlock(sched, gravity, c->super->kick); + scheduler_addunlock(sched, gravity, c->super->kick2); } /** @@ -1499,7 +1849,7 @@ void engine_link_gravity_tasks(struct engine *e) { /* Gather the multipoles --> mm interaction --> kick */ scheduler_addunlock(sched, gather, t); - scheduler_addunlock(sched, t, t->ci->super->kick); + scheduler_addunlock(sched, t, t->ci->super->kick2); /* init --> mm interaction */ scheduler_addunlock(sched, t->ci->super->init, t); @@ -1578,19 +1928,24 @@ void engine_link_gravity_tasks(struct engine *e) { * @param force The force task to link. * @param c The cell. */ -static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, - struct task *density, - struct task *gradient, - struct task *force, - struct cell *c) { +static inline void engine_make_hydro_loops_dependencies( + struct scheduler *sched, struct task *density, struct task *gradient, + struct task *force, struct cell *c, int with_cooling) { /* init --> density loop --> ghost --> gradient loop --> extra_ghost */ - /* extra_ghost --> force loop --> kick */ + /* extra_ghost --> force loop */ scheduler_addunlock(sched, c->super->init, density); scheduler_addunlock(sched, density, c->super->ghost); scheduler_addunlock(sched, c->super->ghost, gradient); scheduler_addunlock(sched, gradient, c->super->extra_ghost); scheduler_addunlock(sched, c->super->extra_ghost, force); - scheduler_addunlock(sched, force, c->super->kick); + + if (with_cooling) { + /* force loop --> cooling (--> kick2) */ + scheduler_addunlock(sched, force, c->super->cooling); + } else { + /* force loop --> kick2 */ + scheduler_addunlock(sched, force, c->super->kick2); + } } #else @@ -1602,16 +1957,25 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, * @param density The density task to link. * @param force The force task to link. * @param c The cell. + * @param with_cooling Are we running with cooling switched on ? */ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, struct task *density, struct task *force, - struct cell *c) { - /* init --> density loop --> ghost --> force loop --> kick */ + struct cell *c, + int with_cooling) { + /* init --> density loop --> ghost --> force loop */ scheduler_addunlock(sched, c->super->init, density); scheduler_addunlock(sched, density, c->super->ghost); scheduler_addunlock(sched, c->super->ghost, force); - scheduler_addunlock(sched, force, c->super->kick); + + if (with_cooling) { + /* force loop --> cooling (--> kick2) */ + scheduler_addunlock(sched, force, c->super->cooling); + } else { + /* force loop --> kick2 */ + scheduler_addunlock(sched, force, c->super->kick2); + } } #endif @@ -1632,6 +1996,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { struct scheduler *sched = &e->sched; const int nr_tasks = sched->nr_tasks; const int nodeID = e->nodeID; + const int with_cooling = (e->policy & engine_policy_cooling); for (int ind = 0; ind < nr_tasks; ind++) { struct task *t = &sched->tasks[ind]; @@ -1651,7 +2016,8 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { engine_addlink(e, &t->ci->force, t3); /* Now, build all the dependencies for the hydro */ - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci); + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, + with_cooling); #else @@ -1663,7 +2029,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { engine_addlink(e, &t->ci->force, t2); /* Now, build all the dependencies for the hydro */ - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); #endif } @@ -1686,10 +2052,12 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci); + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, + with_cooling); } if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj); + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, + with_cooling); } #else @@ -1705,10 +2073,10 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); } if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, with_cooling); } #endif @@ -1736,7 +2104,8 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci); + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, + with_cooling); } #else @@ -1751,7 +2120,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); } #endif } @@ -1779,10 +2148,12 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci); + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, + with_cooling); } if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj); + engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, + with_cooling); } #else @@ -1798,25 +2169,13 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); } if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, with_cooling); } #endif } - /* Cooling tasks should depend on kick and unlock sourceterms */ - else if (t->type == task_type_cooling) { - scheduler_addunlock(sched, t->ci->kick, t); - } - /* source terms depend on cooling if performed, else on kick. It is the last - task */ - else if (t->type == task_type_sourceterms) { - if (e->policy == engine_policy_cooling) - scheduler_addunlock(sched, t->ci->cooling, t); - else - scheduler_addunlock(sched, t->ci->kick, t); - } } } @@ -1985,9 +2344,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the arguments. */ struct task *tasks = (struct task *)map_data; - const int ti_end = ((size_t *)extra_data)[0]; size_t *rebuild_space = &((size_t *)extra_data)[1]; struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]); + struct engine *e = (struct engine *)((size_t *)extra_data)[0]; for (int ind = 0; ind < num_elements; ind++) { struct task *t = &tasks[ind]; @@ -1998,7 +2357,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, t->type == task_type_sourceterms || t->type == task_type_sub_self) { /* Set this task's skip. */ - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); + if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } /* Pair? */ @@ -2016,7 +2375,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, *rebuild_space = 1; /* Set this task's skip, otherwise nothing to do. */ - if (ci->ti_end_min <= ti_end || cj->ti_end_min <= ti_end) + if (cell_is_active(t->ci, e) || cell_is_active(t->cj, e)) scheduler_activate(s, t); else continue; @@ -2043,8 +2402,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Activate the tasks to recv foreign cell ci's data. */ scheduler_activate(s, ci->recv_xv); - scheduler_activate(s, ci->recv_rho); - scheduler_activate(s, ci->recv_ti); + if (cell_is_active(ci, e)) { + scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->recv_ti); + } /* Look for the local cell cj's send tasks. */ struct link *l = NULL; @@ -2054,24 +2415,33 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_rho task."); - scheduler_activate(s, l->t); - - for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_ti task."); - scheduler_activate(s, l->t); + if (cj->super->drift) + scheduler_activate(s, cj->super->drift); + else + error("Drift task missing !"); + + if (cell_is_active(cj, e)) { + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } } else if (cj->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell cj's data. */ scheduler_activate(s, cj->recv_xv); - scheduler_activate(s, cj->recv_rho); - scheduler_activate(s, cj->recv_ti); + if (cell_is_active(cj, e)) { + scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->recv_ti); + } /* Look for the local cell ci's send tasks. */ struct link *l = NULL; @@ -2081,32 +2451,41 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_rho task."); - scheduler_activate(s, l->t); - - for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_ti task."); - scheduler_activate(s, l->t); + if (ci->super->drift) + scheduler_activate(s, ci->super->drift); + else + error("Drift task missing !"); + + if (cell_is_active(ci, e)) { + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } } #endif } - /* Kick? */ - else if (t->type == task_type_kick) { - t->ci->updated = 0; - t->ci->g_updated = 0; - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); + /* Kick/Drift/Init? */ + else if (t->type == task_type_kick1 || t->type == task_type_kick2 || + t->type == task_type_drift || t->type == task_type_init) { + if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } - /* Init? */ - else if (t->type == task_type_init) { - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); + /* Time-step? */ + else if (t->type == task_type_timestep) { + t->ci->updated = 0; + t->ci->g_updated = 0; + t->ci->s_updated = 0; + if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } /* Tasks with no cells should not be skipped? */ @@ -2129,7 +2508,7 @@ int engine_marktasks(struct engine *e) { int rebuild_space = 0; /* Run through the tasks and mark as skip or not. */ - size_t extra_data[3] = {e->ti_current, rebuild_space, (size_t)&e->sched}; + size_t extra_data[3] = {(size_t)e, rebuild_space, (size_t)&e->sched}; threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks, sizeof(struct task), 10000, extra_data); rebuild_space = extra_data[1]; @@ -2225,14 +2604,20 @@ void engine_rebuild(struct engine *e) { * @brief Prepare the #engine by re-building the cells and tasks. * * @param e The #engine to prepare. - * @param nodrift Whether to drift particles before rebuilding or not. Will + * @param drift_all Whether to drift particles before rebuilding or not. Will * not be necessary if all particles have already been * drifted (before repartitioning for instance). + * @param postrepart If we have just repartitioned, if so we need to defer the + * skip until after the rebuild and not check the if all + * cells have been drifted. */ -void engine_prepare(struct engine *e, int nodrift) { +void engine_prepare(struct engine *e, int drift_all, int postrepart) { TIMER_TIC; + /* Unskip active tasks and check for rebuild */ + if (!postrepart) engine_unskip(e); + /* Run through the tasks and mark as skip or not. */ int rebuild = e->forcerebuild; @@ -2249,21 +2634,18 @@ void engine_prepare(struct engine *e, int nodrift) { if (rebuild) { /* Drift all particles to the current time if needed. */ - if (!nodrift) { - e->drift_all = 1; - engine_drift(e); - - /* Restore the default drifting policy */ - e->drift_all = (e->policy & engine_policy_drift_all); - } + if (drift_all) engine_drift_all(e); #ifdef SWIFT_DEBUG_CHECKS - /* Check that all cells have been drifted to the current time */ - space_check_drift_point(e->s, e->ti_current); + /* Check that all cells have been drifted to the current time, unless + * we have just repartitioned, that can include cells that have not + * previously been active on this rank. */ + if (!postrepart) space_check_drift_point(e->s, e->ti_current); #endif engine_rebuild(e); } + if (postrepart) engine_unskip(e); /* Re-rank the tasks every now and then. */ if (e->tasks_age % engine_tasksreweight == 1) { @@ -2324,36 +2706,35 @@ void engine_barrier(struct engine *e, int tid) { */ void engine_collect_kick(struct cell *c) { - /* Skip super-cells (Their values are already set) */ - if (c->kick != NULL) return; +/* Skip super-cells (Their values are already set) */ +#ifdef WITH_MPI + if (c->timestep != NULL || c->recv_ti != NULL) return; +#else + if (c->timestep != NULL) return; +#endif /* WITH_MPI */ /* Counters for the different quantities. */ - int updated = 0, g_updated = 0; - int ti_end_min = max_nr_timesteps; - - /* Only do something is the cell is non-empty */ - if (c->count != 0 || c->gcount != 0) { - - /* If this cell is not split, I'm in trouble. */ - if (!c->split) error("Cell is not split."); + int updated = 0, g_updated = 0, s_updated = 0; + integertime_t ti_end_min = max_nr_timesteps; - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL) { + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && (cp->count > 0 || cp->gcount > 0 || cp->scount > 0)) { - /* Recurse */ - engine_collect_kick(cp); + /* Recurse */ + engine_collect_kick(cp); - /* And update */ - ti_end_min = min(ti_end_min, cp->ti_end_min); - updated += cp->updated; - g_updated += cp->g_updated; + /* And update */ + ti_end_min = min(ti_end_min, cp->ti_end_min); + updated += cp->updated; + g_updated += cp->g_updated; + s_updated += cp->s_updated; - /* Collected, so clear for next time. */ - cp->updated = 0; - cp->g_updated = 0; - } + /* Collected, so clear for next time. */ + cp->updated = 0; + cp->g_updated = 0; + cp->s_updated = 0; } } @@ -2361,6 +2742,7 @@ void engine_collect_kick(struct cell *c) { c->ti_end_min = ti_end_min; c->updated = updated; c->g_updated = g_updated; + c->s_updated = s_updated; } /** @@ -2372,14 +2754,14 @@ void engine_collect_kick(struct cell *c) { void engine_collect_timestep(struct engine *e) { const ticks tic = getticks(); - int updates = 0, g_updates = 0; - int ti_end_min = max_nr_timesteps; + int updates = 0, g_updates = 0, s_updates = 0; + integertime_t ti_end_min = max_nr_timesteps; const struct space *s = e->s; /* Collect the cell data. */ - for (int k = 0; k < s->nr_cells; k++) - if (s->cells_top[k].nodeID == e->nodeID) { - struct cell *c = &s->cells_top[k]; + for (int k = 0; k < s->nr_cells; k++) { + struct cell *c = &s->cells_top[k]; + if (c->count > 0 || c->gcount > 0 || c->scount > 0) { /* Make the top-cells recurse */ engine_collect_kick(c); @@ -2388,38 +2770,44 @@ void engine_collect_timestep(struct engine *e) { ti_end_min = min(ti_end_min, c->ti_end_min); updates += c->updated; g_updates += c->g_updated; + s_updates += c->s_updated; /* Collected, so clear for next time. */ c->updated = 0; c->g_updated = 0; + c->s_updated = 0; } + } /* Aggregate the data from the different nodes. */ #ifdef WITH_MPI { - int in_i[1], out_i[1]; + integertime_t in_i[1], out_i[1]; in_i[0] = 0; out_i[0] = ti_end_min; - if (MPI_Allreduce(out_i, in_i, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD) != - MPI_SUCCESS) + if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN, + MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate t_end_min."); ti_end_min = in_i[0]; } { - unsigned long long in_ll[2], out_ll[2]; + long long in_ll[3], out_ll[3]; out_ll[0] = updates; out_ll[1] = g_updates; - if (MPI_Allreduce(out_ll, in_ll, 2, MPI_LONG_LONG_INT, MPI_SUM, + out_ll[2] = s_updates; + if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate energies."); updates = in_ll[0]; g_updates = in_ll[1]; + s_updates = in_ll[2]; } #endif e->ti_end_min = ti_end_min; e->updates = updates; e->g_updates = g_updates; + e->s_updates = s_updates; if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -2463,7 +2851,7 @@ void engine_print_stats(struct engine *e) { } /** - * @brief Sets all the force and kick tasks to be skipped. + * @brief Sets all the force, drift and kick tasks to be skipped. * * @param e The #engine to act on. */ @@ -2477,12 +2865,33 @@ void engine_skip_force_and_kick(struct engine *e) { struct task *t = &tasks[i]; /* Skip everything that updates the particles */ - if (t->subtype == task_subtype_force || t->type == task_type_kick || - t->type == task_type_cooling || t->type == task_type_sourceterms) + if (t->type == task_type_drift || t->type == task_type_kick1 || + t->type == task_type_kick2 || t->type == task_type_timestep || + t->subtype == task_subtype_force || t->type == task_type_cooling || + t->type == task_type_sourceterms) t->skip = 1; } } +/** + * @brief Sets all the drift and first kick tasks to be skipped. + * + * @param e The #engine to act on. + */ +void engine_skip_drift_and_kick(struct engine *e) { + + struct task *tasks = e->sched.tasks; + const int nr_tasks = e->sched.nr_tasks; + + for (int i = 0; i < nr_tasks; ++i) { + + struct task *t = &tasks[i]; + + /* Skip everything that updates the particles */ + if (t->type == task_type_drift || t->type == task_type_kick1) t->skip = 1; + } +} + /** * @brief Launch the runners. * @@ -2538,9 +2947,9 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { struct clocks_time time1, time2; clocks_gettime(&time1); - if (e->nodeID == 0) message("Running initialisation fake time-step."); + if (e->nodeID == 0) message("Computing initial gas densities."); - engine_prepare(e, 1); + engine_prepare(e, 0, 0); engine_marktasks(e); @@ -2555,8 +2964,12 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { /* Apply some conversions (e.g. internal energy -> entropy) */ if (!flag_entropy_ICs) { + if (e->nodeID == 0) message("Converting internal energy variable."); + /* Apply the conversion */ - space_map_cells_pre(s, 0, cell_convert_hydro, NULL); + // space_map_cells_pre(s, 0, cell_convert_hydro, NULL); + for (size_t i = 0; i < s->nr_parts; ++i) + hydro_convert_quantities(&s->parts[i], &s->xparts[i]); /* Correct what we did (e.g. in PE-SPH, need to recompute rho_bar) */ if (hydro_need_extra_init_loop) { @@ -2566,10 +2979,25 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { } } + /* Now time to get ready for the first time-step */ + if (e->nodeID == 0) message("Running initial fake time-step."); + + engine_marktasks(e); + + engine_skip_drift_and_kick(e); + + engine_launch(e, e->nr_threads); + clocks_gettime(&time2); +#ifdef SWIFT_DEBUG_CHECKS + space_check_timesteps(e->s); + part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts, + e->s->nr_gparts, e->s->nr_sparts, e->verbose); +#endif + /* Ready to go */ - e->step = -1; + e->step = 0; e->forcerebuild = 1; e->wallclock_time = (float)clocks_diff(&time1, &time2); @@ -2649,11 +3077,7 @@ void engine_step(struct engine *e, struct repartition *repartition) { snapshot_drift_time = e->timeStep; /* Drift everybody to the snapshot position */ - e->drift_all = 1; - engine_drift(e); - - /* Restore the default drifting policy */ - e->drift_all = (e->policy & engine_policy_drift_all); + engine_drift_all(e); /* Dump... */ engine_dump_snapshot(e); @@ -2673,43 +3097,42 @@ void engine_step(struct engine *e, struct repartition *repartition) { if (e->nodeID == 0) { /* Print some information to the screen */ - printf(" %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time, - e->timeStep, e->updates, e->g_updates, e->wallclock_time); + printf(" %6d %14e %14e %10zu %10zu %10zu %21.3f\n", e->step, e->time, + e->timeStep, e->updates, e->g_updates, e->s_updates, + e->wallclock_time); fflush(stdout); - fprintf(e->file_timesteps, " %6d %14e %14e %10zu %10zu %21.3f\n", e->step, - e->time, e->timeStep, e->updates, e->g_updates, e->wallclock_time); + fprintf(e->file_timesteps, " %6d %14e %14e %10zu %10zu %10zu %21.3f\n", + e->step, e->time, e->timeStep, e->updates, e->g_updates, + e->s_updates, e->wallclock_time); fflush(e->file_timesteps); } /* Drift only the necessary particles, that means all particles * if we are about to repartition. */ const int repart = (e->forcerepart != REPART_NONE); - e->drift_all = repart || e->drift_all; - engine_drift(e); + const int drift_all = (e->policy & engine_policy_drift_all); + if (repart || drift_all) engine_drift_all(e); /* Re-distribute the particles amongst the nodes? */ if (repart) engine_repartition(e); /* Prepare the space. */ - engine_prepare(e, e->drift_all); - - /* Restore the default drifting policy */ - e->drift_all = (e->policy & engine_policy_drift_all); + engine_prepare(e, !(drift_all || repart), repart); if (e->verbose) engine_print_task_counts(e); - /* Send off the runners. */ - TIMER_TIC; - engine_launch(e, e->nr_threads); - TIMER_TOC(timer_runners); - /* Save some statistics */ if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) { engine_print_stats(e); e->timeLastStatistics += e->deltaTimeStatistics; } + /* Send off the runners. */ + TIMER_TIC; + engine_launch(e, e->nr_threads); + TIMER_TOC(timer_runners); + TIMER_TOC2(timer_step); clocks_gettime(&time2); @@ -2734,19 +3157,40 @@ int engine_is_done(struct engine *e) { } /** - * @brief Drift particles using the current engine drift policy. + * @brief Unskip all the tasks that act on active cells at this time. * * @param e The #engine. */ -void engine_drift(struct engine *e) { +void engine_unskip(struct engine *e) { + + const ticks tic = getticks(); + threadpool_map(&e->threadpool, runner_do_unskip_mapper, e->s->cells_top, + e->s->nr_cells, sizeof(struct cell), 1, e); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Drift *all* particles forward to the current time. + * + * @param e The #engine. + */ +void engine_drift_all(struct engine *e) { const ticks tic = getticks(); threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all cells have been drifted to the current time. */ + space_check_drift_point(e->s, e->ti_current); +#endif + if (e->verbose) - message("took %.3f %s (including task unskipping).", - clocks_from_ticks(getticks() - tic), clocks_getunit()); + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -2889,9 +3333,26 @@ void engine_split(struct engine *e, struct partition *initial_partition) { s->parts = parts_new; s->xparts = xparts_new; - /* Re-link the gparts. */ + /* Re-link the gparts to their parts. */ if (s->nr_parts > 0 && s->nr_gparts > 0) - part_relink_gparts(s->parts, s->nr_parts, 0); + part_relink_gparts_to_parts(s->parts, s->nr_parts, 0); + + /* Re-allocate the local sparts. */ + if (e->verbose) + message("Re-allocating sparts array from %zu to %zu.", s->size_sparts, + (size_t)(s->nr_sparts * 1.2)); + s->size_sparts = s->nr_sparts * 1.2; + struct spart *sparts_new = NULL; + if (posix_memalign((void **)&sparts_new, spart_align, + sizeof(struct spart) * s->size_sparts) != 0) + error("Failed to allocate new spart data."); + memcpy(sparts_new, s->sparts, sizeof(struct spart) * s->nr_sparts); + free(s->sparts); + s->sparts = sparts_new; + + /* Re-link the gparts to their sparts. */ + if (s->nr_sparts > 0 && s->nr_gparts > 0) + part_relink_gparts_to_sparts(s->sparts, s->nr_sparts, 0); /* Re-allocate the local gparts. */ if (e->verbose) @@ -2908,31 +3369,17 @@ void engine_split(struct engine *e, struct partition *initial_partition) { /* Re-link the parts. */ if (s->nr_parts > 0 && s->nr_gparts > 0) - part_relink_parts(s->gparts, s->nr_gparts, s->parts); + part_relink_parts_to_gparts(s->gparts, s->nr_gparts, s->parts); + + /* Re-link the sparts. */ + if (s->nr_sparts > 0 && s->nr_gparts > 0) + part_relink_sparts_to_gparts(s->gparts, s->nr_gparts, s->sparts); #ifdef SWIFT_DEBUG_CHECKS /* Verify that the links are correct */ - for (size_t k = 0; k < s->nr_gparts; ++k) { - - if (s->gparts[k].id_or_neg_offset <= 0) { - - struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset]; - - if (part->gpart != &s->gparts[k]) error("Linking problem !"); - - if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] || - s->gparts[k].x[2] != part->x[2]) - error("Linked particles are not at the same position !"); - } - } - for (size_t k = 0; k < s->nr_parts; ++k) { - - if (s->parts[k].gpart != NULL && - s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) - error("Linking problem !"); - } - + part_verify_links(s->parts, s->gparts, s->sparts, s->nr_parts, s->nr_gparts, + s->nr_sparts, e->verbose); #endif #else @@ -2995,7 +3442,7 @@ static cpu_set_t *engine_entry_affinity() { /** * @brief Ensure the NUMA node on which we initialise (first touch) everything - * doesn't change before engine_init allocates NUMA-local workers. + * doesn't change before engine_init allocates NUMA-local workers. */ void engine_pin() { @@ -3088,7 +3535,6 @@ void engine_init(struct engine *e, struct space *s, e->timeStep = 0.; e->timeBase = 0.; e->timeBase_inv = 0.; - e->drift_all = (policy & engine_policy_drift_all); e->internalUnits = internal_units; e->timeFirstSnapshot = parser_get_param_double(params, "Snapshots:time_first"); @@ -3233,6 +3679,7 @@ void engine_init(struct engine *e, struct space *s, #endif if (with_aff) { +#ifdef HAVE_SETAFFINITY #ifdef WITH_MPI printf("[%04i] %s engine_init: cpu map is [ ", nodeID, clocks_get_timesincestart()); @@ -3241,6 +3688,7 @@ void engine_init(struct engine *e, struct space *s, #endif for (int i = 0; i < nr_affinity_cores; i++) printf("%i ", cpuid[i]); printf("].\n"); +#endif } /* Are we doing stuff in parallel? */ @@ -3293,9 +3741,9 @@ void engine_init(struct engine *e, struct space *s, e->hydro_properties->delta_neighbours, e->hydro_properties->eta_neighbours); - fprintf(e->file_timesteps, "# %6s %14s %14s %10s %10s %16s [%s]\n", "Step", - "Time", "Time-step", "Updates", "g-Updates", "Wall-clock time", - clocks_getunit()); + fprintf(e->file_timesteps, "# %6s %14s %14s %10s %10s %10s %16s [%s]\n", + "Step", "Time", "Time-step", "Updates", "g-Updates", "s-Updates", + "Wall-clock time", clocks_getunit()); fflush(e->file_timesteps); } @@ -3430,6 +3878,11 @@ void engine_init(struct engine *e, struct space *s, e->runners[k].cpuid = k; e->runners[k].qid = k * nr_queues / e->nr_threads; } + + /* Allocate particle cache. */ + e->runners[k].par_cache.count = 0; + cache_init(&e->runners[k].par_cache, CACHE_SIZE); + if (verbose) { if (with_aff) message("runner %i on cpuid=%i with qid=%i.", e->runners[k].id, @@ -3473,7 +3926,7 @@ void engine_print_policy(struct engine *e) { #else printf("%s engine_policy: engine policies are [ ", clocks_get_timesincestart()); - for (int k = 1; k < 32; k++) + for (int k = 1; k < 31; k++) if (e->policy & (1 << k)) printf(" %s ", engine_policy_names[k + 1]); printf(" ]\n"); fflush(stdout); @@ -3515,6 +3968,8 @@ void engine_compute_next_snapshot_time(struct engine *e) { */ void engine_clean(struct engine *e) { + for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].par_cache); + free(e->runners); free(e->snapshotUnits); free(e->links); scheduler_clean(&e->sched); diff --git a/src/engine.h b/src/engine.h index 8490a8907203e92b0e73738b7f469f61e9c49f2b..2514179078d6dc679f371ec14c3e53737c5b682e 100644 --- a/src/engine.h +++ b/src/engine.h @@ -65,7 +65,8 @@ enum engine_policy { engine_policy_cosmology = (1 << 10), engine_policy_drift_all = (1 << 11), engine_policy_cooling = (1 << 12), - engine_policy_sourceterms = (1 << 13) + engine_policy_sourceterms = (1 << 13), + engine_policy_stars = (1 << 14) }; extern const char *engine_policy_names[]; @@ -82,9 +83,6 @@ extern const char *engine_policy_names[]; /* The rank of the engine as a global variable (for messages). */ extern int engine_rank; -/* The maximal number of timesteps in a simulation */ -#define max_nr_timesteps (1 << 28) - /* Data structure for the engine. */ struct engine { @@ -117,11 +115,11 @@ struct engine { /* The previous system time. */ double timeOld; - int ti_old; + integertime_t ti_old; /* The current system time. */ double time; - int ti_current; + integertime_t ti_current; /* Time step */ double timeStep; @@ -131,13 +129,10 @@ struct engine { double timeBase_inv; /* Minimal ti_end for the next time-step */ - int ti_end_min; - - /* Are we drifting all particles now ? */ - int drift_all; + integertime_t ti_end_min; /* Number of particles updated */ - size_t updates, g_updates; + size_t updates, g_updates, s_updates; /* Total numbers of particles in the system. */ size_t total_nr_parts, total_nr_gparts; @@ -148,7 +143,7 @@ struct engine { /* Snapshot information */ double timeFirstSnapshot; double deltaTimeSnapshot; - int ti_nextSnapshot; + integertime_t ti_nextSnapshot; char snapshotBaseName[200]; int snapshotCompression; struct UnitSystem *snapshotUnits; @@ -228,7 +223,8 @@ struct engine { /* Function prototypes. */ void engine_barrier(struct engine *e, int tid); void engine_compute_next_snapshot_time(struct engine *e); -void engine_drift(struct engine *e); +void engine_unskip(struct engine *e); +void engine_drift_all(struct engine *e); void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, @@ -240,7 +236,7 @@ void engine_init(struct engine *e, struct space *s, const struct cooling_function_data *cooling, struct sourceterms *sourceterms); void engine_launch(struct engine *e, int nr_runners); -void engine_prepare(struct engine *e, int nodrift); +void engine_prepare(struct engine *e, int drift_all, int postrepart); void engine_print(struct engine *e); void engine_init_particles(struct engine *e, int flag_entropy_ICs); void engine_step(struct engine *e, struct repartition *repartition); @@ -248,7 +244,9 @@ void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); void engine_exchange_strays(struct engine *e, size_t offset_parts, int *ind_part, size_t *Npart, size_t offset_gparts, - int *ind_gpart, size_t *Ngpart); + int *ind_gpart, size_t *Ngpart, + size_t offset_sparts, int *ind_spart, + size_t *Nspart); void engine_rebuild(struct engine *e); void engine_repartition(struct engine *e); void engine_makeproxies(struct engine *e); diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index 9e0ca81edff06b8a32afb185f24a88b41dc87da7..a0bfee05f8b7f93cce65e8b9a3e7e322e166569d 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -42,22 +42,6 @@ gravity_compute_timestep_self(const struct gpart* const gp) { return dt; } -/** - * @brief Initialises the g-particles for the first time - * - * This function is called only once just after the ICs have been - * read in to do some conversions. - * - * @param gp The particle to act upon - */ -__attribute__((always_inline)) INLINE static void gravity_first_init_gpart( - struct gpart* gp) { - - gp->ti_begin = 0; - gp->ti_end = 0; - gp->epsilon = 0.; // MATTHIEU -} - /** * @brief Prepares a g-particle for the gravity calculation * @@ -97,9 +81,25 @@ __attribute__((always_inline)) INLINE static void gravity_end_force( * * @param gp The particle to act upon * @param dt The time-step for this kick - * @param half_dt The half time-step for this kick */ __attribute__((always_inline)) INLINE static void gravity_kick_extra( - struct gpart* gp, float dt, float half_dt) {} + struct gpart* gp, float dt) {} + +/** + * @brief Initialises the g-particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void gravity_first_init_gpart( + struct gpart* gp) { + + gp->time_bin = 0; + gp->epsilon = 0.; // MATTHIEU + + gravity_init_gpart(gp); +} #endif /* SWIFT_DEFAULT_GRAVITY_H */ diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h index c284f543b3be06297600c010e302423eb683adc9..f0d145647ab3f973f3c0ffc2f995ee01d534bc72 100644 --- a/src/gravity/Default/gravity_debug.h +++ b/src/gravity/Default/gravity_debug.h @@ -22,12 +22,10 @@ __attribute__((always_inline)) INLINE static void gravity_debug_particle( const struct gpart* p) { printf( - "x=[%.3e,%.3e,%.3e], " - "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " - "mass=%.3e t_begin=%d, t_end=%d\n", - p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2], - p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin, - p->ti_end); + "mass=%.3e epsilon=%.5e time_bin=%d\n" + "x=[%.5e,%.5e,%.5e], v_full=[%.5e,%.5e,%.5e], a=[%.5e,%.5e,%.5e]\n", + p->mass, p->epsilon, p->time_bin, p->x[0], p->x[1], p->x[2], p->v_full[0], + p->v_full[1], p->v_full[2], p->a_grav[0], p->a_grav[1], p->a_grav[2]); } #endif /* SWIFT_DEFAULT_GRAVITY_DEBUG_H */ diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index f06e65e5b30ebcd609c0c6204de33da17b770add..f484b13663059fa5f4f822aa78748fe4ef9d5926 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -19,12 +19,13 @@ #ifndef SWIFT_DEFAULT_GRAVITY_PART_H #define SWIFT_DEFAULT_GRAVITY_PART_H -/* Some standard headers. */ -#include <stdlib.h> - /* Gravity particle. */ struct gpart { + /* Particle ID. If negative, it is the negative offset of the #part with + which this gpart is linked. */ + long long id_or_neg_offset; + /* Particle position. */ double x[3]; @@ -43,15 +44,21 @@ struct gpart { /* Softening length */ float epsilon; - /* Particle time of beginning of time-step. */ - int ti_begin; + /* Time-step length */ + timebin_t time_bin; - /* Particle time of end of time-step. */ - int ti_end; + /* Type of the #gpart (DM, gas, star, ...) */ + enum part_type type; - /* Particle ID. If negative, it is the negative offset of the #part with - which this gpart is linked. */ - long long id_or_neg_offset; +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif } SWIFT_STRUCT_ALIGN; diff --git a/src/hydro/Default/hydro.h b/src/hydro/Default/hydro.h index 3fd357a2d8778f5ca8b014935d538350eccb99c6..bfb5cd1ce39a9908573c66406f41b56561a870d6 100644 --- a/src/hydro/Default/hydro.h +++ b/src/hydro/Default/hydro.h @@ -33,7 +33,7 @@ * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy( - const struct part *restrict p, float dt) { + const struct part *restrict p) { return p->u; } @@ -45,7 +45,7 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy( * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_pressure( - const struct part *restrict p, float dt) { + const struct part *restrict p) { return gas_pressure_from_internal_energy(p->rho, p->u); } @@ -57,7 +57,7 @@ __attribute__((always_inline)) INLINE static float hydro_get_pressure( * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_entropy( - const struct part *restrict p, float dt) { + const struct part *restrict p) { return gas_entropy_from_internal_energy(p->rho, p->u); } @@ -69,7 +69,7 @@ __attribute__((always_inline)) INLINE static float hydro_get_entropy( * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed( - const struct part *restrict p, float dt) { + const struct part *restrict p) { return p->force.soundspeed; } @@ -97,34 +97,30 @@ __attribute__((always_inline)) INLINE static float hydro_get_mass( } /** - * @brief Modifies the thermal state of a particle to the imposed internal - * energy + * @brief Returns the time derivative of internal energy of a particle * - * This overrides the current state of the particle but does *not* change its - * time-derivatives + * We assume a constant density. * - * @param p The particle - * @param u The new internal energy + * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy( - struct part *restrict p, float u) { +__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( + const struct part *restrict p) { - p->u = u; + return p->force.u_dt; } /** - * @brief Modifies the thermal state of a particle to the imposed entropy + * @brief Returns the time derivative of internal energy of a particle * - * This overrides the current state of the particle but does *not* change its - * time-derivatives + * We assume a constant density. * - * @param p The particle - * @param S The new entropy + * @param p The particle of interest. + * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_entropy( - struct part *restrict p, float S) { +__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( + struct part *restrict p, float du_dt) { - p->u = gas_internal_energy_from_entropy(p->rho, S); + p->force.u_dt = du_dt; } /** @@ -152,26 +148,6 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( return min(dt_cfl, dt_u_change); } -/** - * @brief Initialises the particles for the first time - * - * This function is called only once just after the ICs have been - * read in to do some conversions. - * - * @param p The particle to act upon - * @param xp The extended particle data to act upon - */ -__attribute__((always_inline)) INLINE static void hydro_first_init_part( - struct part *restrict p, struct xpart *restrict xp) { - - p->ti_begin = 0; - p->ti_end = 0; - xp->v_full[0] = p->v[0]; - xp->v_full[1] = p->v[1]; - xp->v_full[2] = p->v[2]; - xp->u_full = p->u; -} - /** * @brief Prepares a particle for the density calculation. * @@ -244,8 +220,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( * @param time The current time */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( - struct part *restrict p, struct xpart *restrict xp, int ti_current, - double timeBase) { + struct part *restrict p, struct xpart *restrict xp) { /* Some smoothing length multiples. */ const float h = p->h; @@ -270,17 +245,18 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( p->force.balsara = normDiv_v / (normDiv_v + normRot_v + 0.0001f * fc * h_inv); /* Viscosity parameter decay time */ - const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed); + /* const float tau = h / (2.f * const_viscosity_length * p->force.soundspeed); + */ /* Viscosity source term */ - const float S = max(-normDiv_v, 0.f); + /* const float S = max(-normDiv_v, 0.f); */ /* Compute the particle's viscosity parameter time derivative */ - const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau + - (const_viscosity_alpha_max - p->alpha) * S; + /* const float alpha_dot = (const_viscosity_alpha_min - p->alpha) / tau + */ + /* (const_viscosity_alpha_max - p->alpha) * S; */ /* Update particle's viscosity paramter */ - p->alpha += alpha_dot * (p->ti_end - p->ti_begin) * timeBase; + /* p->alpha += alpha_dot * (p->ti_end - p->ti_begin) * timeBase; */ // MATTHIEU } /** @@ -305,6 +281,22 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( p->force.v_sig = 0.0f; } +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param p The particle. + * @param xp The extended data of this particle. + */ +__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( + struct part *restrict p, const struct xpart *restrict xp) { + + /* Re-set the predicted velocities */ + p->v[0] = xp->v_full[0]; + p->v[1] = xp->v_full[1]; + p->v[2] = xp->v_full[2]; +} + /** * @brief Predict additional particle fields forward in time when drifting * @@ -316,8 +308,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( * @param timeBase The minimal time-step size */ __attribute__((always_inline)) INLINE static void hydro_predict_extra( - struct part *restrict p, struct xpart *restrict xp, float dt, int t0, - int t1, double timeBase) { + struct part *restrict p, struct xpart *restrict xp, float dt) { float u, w; const float h_inv = 1.f / p->h; @@ -368,8 +359,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param half_dt The half time-step for this kick */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( - struct part *restrict p, struct xpart *restrict xp, float dt, - float half_dt) {} + struct part *restrict p, struct xpart *restrict xp, float dt) {} /** * @brief Converts hydro quantity of a particle at the start of a run @@ -379,6 +369,28 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( * @param p The particle to act upon */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( - struct part *restrict p) {} + struct part *restrict p, struct xpart *restrict xp) {} + +/** + * @brief Initialises the particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_first_init_part( + struct part *restrict p, struct xpart *restrict xp) { + + p->time_bin = 0; + xp->v_full[0] = p->v[0]; + xp->v_full[1] = p->v[1]; + xp->v_full[2] = p->v[2]; + xp->u_full = p->u; + + hydro_reset_acceleration(p); + hydro_init_part(p); +} #endif /* SWIFT_DEFAULT_HYDRO_H */ diff --git a/src/hydro/Default/hydro_debug.h b/src/hydro/Default/hydro_debug.h index d02d3ef82c1b3d751731f49850c06df4b146b164..3be9c9e1760591423edbd218d19b46ddf9aad01e 100644 --- a/src/hydro/Default/hydro_debug.h +++ b/src/hydro/Default/hydro_debug.h @@ -25,11 +25,10 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "x=[%.3e,%.3e,%.3e], " "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "h=%.3e, " - "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, t_begin=%d, t_end=%d\n", + "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, time_bin=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], - p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->ti_begin, - p->ti_end); + p->h, (int)p->density.wcount, p->mass, p->rho_dh, p->rho, p->time_bin); } #endif /* SWIFT_DEFAULT_HYDRO_DEBUG_H */ diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index c7464bcf338b1c5b81ffa91d92264c2bd35e9313..332eecb27fb65a6b4da48cbb595450a432c44615 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -55,12 +55,6 @@ struct part { /* Particle cutoff radius. */ float h; - /* Particle time of beginning of time-step. */ - int ti_begin; - - /* Particle time of end of time-step. */ - int ti_end; - /* Particle internal energy. */ float u; @@ -125,6 +119,9 @@ struct part { /* Pointer to corresponding gravity part. */ struct gpart* gpart; + /* Particle time-bin */ + timebin_t time_bin; + } SWIFT_STRUCT_ALIGN; #endif /* SWIFT_DEFAULT_HYDRO_PART_H */ diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 157893bc9e27806d2b97ac5f5a81d0f6fbb1c589..160a2d8b5d25a97cefb2afd5e22d8e6bcea0006e 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -43,50 +43,42 @@ * @brief Returns the internal energy of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy( - const struct part *restrict p, float dt) { - - const float entropy = p->entropy + p->entropy_dt * dt; + const struct part *restrict p) { - return gas_internal_energy_from_entropy(p->rho, entropy); + return gas_internal_energy_from_entropy(p->rho, p->entropy); } /** * @brief Returns the pressure of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_pressure( - const struct part *restrict p, float dt) { - - const float entropy = p->entropy + p->entropy_dt * dt; + const struct part *restrict p) { - return gas_pressure_from_entropy(p->rho, entropy); + return gas_pressure_from_entropy(p->rho, p->entropy); } /** * @brief Returns the entropy of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_entropy( - const struct part *restrict p, float dt) { + const struct part *restrict p) { - return p->entropy + p->entropy_dt * dt; + return p->entropy; } /** * @brief Returns the sound speed of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed( - const struct part *restrict p, float dt) { + const struct part *restrict p) { return p->force.soundspeed; } @@ -114,70 +106,30 @@ __attribute__((always_inline)) INLINE static float hydro_get_mass( } /** - * @brief Modifies the thermal state of a particle to the imposed internal - * energy + * @brief Returns the time derivative of internal energy of a particle * - * This overwrites the current state of the particle but does *not* change its - * time-derivatives. Entropy, pressure, sound-speed and signal velocity will be - * updated. + * We assume a constant density. * - * @param p The particle - * @param u The new internal energy + * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy( - struct part *restrict p, float u) { - - p->entropy = gas_entropy_from_internal_energy(p->rho, u); - - /* Compute the new pressure */ - const float pressure = gas_pressure_from_internal_energy(p->rho, u); - - /* Compute the new sound speed */ - const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure); - - /* Update the signal velocity */ - const float v_sig_old = p->force.v_sig; - const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed; - const float v_sig = max(v_sig_old, v_sig_new); - - const float rho_inv = 1.f / p->rho; +__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( + const struct part *restrict p) { - p->force.soundspeed = soundspeed; - p->force.P_over_rho2 = pressure * rho_inv * rho_inv; - p->force.v_sig = v_sig; + return gas_internal_energy_from_entropy(p->rho, p->entropy_dt); } /** - * @brief Modifies the thermal state of a particle to the imposed entropy + * @brief Returns the time derivative of internal energy of a particle * - * This overwrites the current state of the particle but does *not* change its - * time-derivatives. Entropy, pressure, sound-speed and signal velocity will be - * updated. + * We assume a constant density. * - * @param p The particle - * @param S The new entropy + * @param p The particle of interest. + * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_entropy( - struct part *restrict p, float S) { - - p->entropy = S; - - /* Compute the pressure */ - const float pressure = gas_pressure_from_entropy(p->rho, p->entropy); - - /* Compute the new sound speed */ - const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure); - - /* Update the signal velocity */ - const float v_sig_old = p->force.v_sig; - const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed; - const float v_sig = max(v_sig_old, v_sig_new); +__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( + struct part *restrict p, float du_dt) { - const float rho_inv = 1.f / p->rho; - - p->force.soundspeed = soundspeed; - p->force.P_over_rho2 = pressure * rho_inv * rho_inv; - p->force.v_sig = v_sig; + p->entropy_dt = gas_entropy_from_internal_energy(p->rho, du_dt); } /** @@ -200,25 +152,6 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( return dt_cfl; } -/** - * @brief Initialises the particles for the first time - * - * This function is called only once just after the ICs have been - * read in to do some conversions. - * - * @param p The particle to act upon - * @param xp The extended particle data to act upon - */ -__attribute__((always_inline)) INLINE static void hydro_first_init_part( - struct part *restrict p, struct xpart *restrict xp) { - - p->ti_begin = 0; - p->ti_end = 0; - xp->v_full[0] = p->v[0]; - xp->v_full[1] = p->v[1]; - xp->v_full[2] = p->v[2]; -} - /** * @brief Prepares a particle for the density calculation. * @@ -229,9 +162,10 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( */ __attribute__((always_inline)) INLINE static void hydro_init_part( struct part *restrict p) { + + p->rho = 0.f; p->density.wcount = 0.f; p->density.wcount_dh = 0.f; - p->rho = 0.f; p->density.rho_dh = 0.f; p->density.div_v = 0.f; p->density.rot_v[0] = 0.f; @@ -289,8 +223,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( * @param timeBase The minimal time-step size */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( - struct part *restrict p, struct xpart *restrict xp, int ti_current, - double timeBase) { + struct part *restrict p, struct xpart *restrict xp) { const float fac_mu = 1.f; /* Will change with cosmological integration */ @@ -303,8 +236,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( const float abs_div_v = fabsf(p->density.div_v); /* Compute the pressure */ - const float half_dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; - const float pressure = hydro_get_pressure(p, half_dt); + const float pressure = gas_pressure_from_entropy(p->rho, p->entropy); /* Compute the sound speed */ const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure); @@ -352,6 +284,25 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( p->force.v_sig = 0.0f; } +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param p The particle. + * @param xp The extended data of this particle. + */ +__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( + struct part *restrict p, const struct xpart *restrict xp) { + + /* Re-set the predicted velocities */ + p->v[0] = xp->v_full[0]; + p->v[1] = xp->v_full[1]; + p->v[2] = xp->v_full[2]; + + /* Re-set the entropy */ + p->entropy = xp->entropy_full; +} + /** * @brief Predict additional particle fields forward in time when drifting * @@ -363,8 +314,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( * @param timeBase The minimal time-step size */ __attribute__((always_inline)) INLINE static void hydro_predict_extra( - struct part *restrict p, const struct xpart *restrict xp, float dt, int t0, - int t1, double timeBase) { + struct part *restrict p, const struct xpart *restrict xp, float dt) { const float h_inv = 1.f / p->h; @@ -382,9 +332,11 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( else p->rho *= expf(w2); - /* Drift the pressure */ - const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase; - const float pressure = hydro_get_pressure(p, dt_entr); + /* Predict the entropy */ + p->entropy += p->entropy_dt * dt; + + /* Re-compute the pressure */ + const float pressure = gas_pressure_from_entropy(p->rho, p->entropy); /* Compute the new sound speed */ const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure); @@ -420,25 +372,21 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param p The particle to act upon * @param xp The particle extended data to act upon * @param dt The time-step for this kick - * @param half_dt The half time-step for this kick */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( - struct part *restrict p, struct xpart *restrict xp, float dt, - float half_dt) { - - /* Do not decrease the entropy (temperature) by more than a factor of 2*/ - const float entropy_change = p->entropy_dt * dt; - if (entropy_change > -0.5f * p->entropy) - p->entropy += entropy_change; - else - p->entropy *= 0.5f; + struct part *restrict p, struct xpart *restrict xp, float dt) { - /* Do not 'overcool' when timestep increases */ - if (p->entropy + p->entropy_dt * half_dt < 0.5f * p->entropy) - p->entropy_dt = -0.5f * p->entropy / half_dt; + /* Do not decrease the entropy by more than a factor of 2 */ + if (dt > 0. && p->entropy_dt * dt < -0.5f * xp->entropy_full) { + /* message("Warning! Limiting entropy_dt. Possible cooling error.\n + * entropy_full = %g \n entropy_dt * dt =%g \n", */ + /* xp->entropy_full,p->entropy_dt * dt); */ + p->entropy_dt = -0.5f * xp->entropy_full / dt; + } + xp->entropy_full += p->entropy_dt * dt; /* Compute the pressure */ - const float pressure = gas_pressure_from_entropy(p->rho, p->entropy); + const float pressure = gas_pressure_from_entropy(p->rho, xp->entropy_full); /* Compute the new sound speed */ const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure); @@ -459,10 +407,11 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( * @param p The particle to act upon */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( - struct part *restrict p) { + struct part *restrict p, struct xpart *restrict xp) { /* We read u in the entropy field. We now get S from u */ - p->entropy = gas_entropy_from_internal_energy(p->rho, p->entropy); + xp->entropy_full = gas_entropy_from_internal_energy(p->rho, p->entropy); + p->entropy = xp->entropy_full; /* Compute the pressure */ const float pressure = gas_pressure_from_entropy(p->rho, p->entropy); @@ -478,4 +427,26 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( p->force.P_over_rho2 = P_over_rho2; } +/** + * @brief Initialises the particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_first_init_part( + struct part *restrict p, struct xpart *restrict xp) { + + p->time_bin = 0; + xp->v_full[0] = p->v[0]; + xp->v_full[1] = p->v[1]; + xp->v_full[2] = p->v[2]; + xp->entropy_full = p->entropy; + + hydro_reset_acceleration(p); + hydro_init_part(p); +} + #endif /* SWIFT_GADGET2_HYDRO_H */ diff --git a/src/hydro/Gadget2/hydro_debug.h b/src/hydro/Gadget2/hydro_debug.h index 656299b38374f68824ec20d85ece169d5f1fd599..6500d1126bd5b5a65d3e511c13afb8364574e0ba 100644 --- a/src/hydro/Gadget2/hydro_debug.h +++ b/src/hydro/Gadget2/hydro_debug.h @@ -27,14 +27,14 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, " "P=%.3e, P_over_rho2=%.3e, S=%.3e, dS/dt=%.3e, c=%.3e\n" "divV=%.3e, rotV=[%.3e,%.3e,%.3e], balsara=%.3e \n " - "v_sig=%e dh/dt=%.3e t_begin=%d, t_end=%d\n", + "v_sig=%e dh/dt=%.3e time_bin=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->density.rho_dh, - p->rho, hydro_get_pressure(p, 0.), p->force.P_over_rho2, p->entropy, + p->rho, hydro_get_pressure(p), p->force.P_over_rho2, p->entropy, p->entropy_dt, p->force.soundspeed, p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], p->density.rot_v[2], p->force.balsara, - p->force.v_sig, p->force.h_dt, p->ti_begin, p->ti_end); + p->force.v_sig, p->force.h_dt, p->time_bin); } #endif /* SWIFT_GADGET2_HYDRO_DEBUG_H */ diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 08fb2b37db566e191bd74d82488b5d68e764573b..3fef18b4f487f1734a5f93c4bad46cf4e6968240 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -155,20 +155,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( /* Get the radius and inverse radius. */ r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - /*vec_rsqrt does not have the level of accuracy we need, so an extra term is - * added below.*/ - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + ri = vec_reciprocal_sqrt(r2); r.v = r2.v * ri.v; hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); + hi_inv = vec_reciprocal(hi); xi.v = r.v * hi_inv.v; hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f)); + hj_inv = vec_reciprocal(hj); xj.v = r.v * hj_inv.v; /* Compute the kernel function. */ @@ -327,15 +322,11 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, /* Get the radius and inverse radius. */ r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - /*vec_rsqrt does not have the level of accuracy we need, so an extra term is - * added below.*/ - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + ri = vec_reciprocal_sqrt(r2); r.v = r2.v * ri.v; hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); + hi_inv = vec_reciprocal(hi); xi.v = r.v * hi_inv.v; kernel_deval_vec(&xi, &wi, &wi_dx); @@ -382,6 +373,176 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, #endif } +#ifdef WITH_VECTORIZATION +/** + * @brief Density interaction computed using 2 interleaved vectors + * (non-symmetric vectorized version). + */ +__attribute__((always_inline)) INLINE static void +runner_iact_nonsym_2_vec_density( + float *R2, float *Dx, float *Dy, float *Dz, vector hi_inv, vector vix, + vector viy, vector viz, float *Vjx, float *Vjy, float *Vjz, float *Mj, + vector *rhoSum, vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum, + vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum, + vector mask, vector mask2, int knlMask, int knlMask2) { + + vector r, ri, r2, xi, wi, wi_dx; + vector mj; + vector dx, dy, dz, dvx, dvy, dvz; + vector vjx, vjy, vjz; + vector dvdr; + vector curlvrx, curlvry, curlvrz; + vector r_2, ri2, r2_2, xi2, wi2, wi_dx2; + vector mj2; + vector dx2, dy2, dz2, dvx2, dvy2, dvz2; + vector vjx2, vjy2, vjz2; + vector dvdr2; + vector curlvrx2, curlvry2, curlvrz2; + + /* Fill the vectors. */ + mj.v = vec_load(Mj); + mj2.v = vec_load(&Mj[VEC_SIZE]); + vjx.v = vec_load(Vjx); + vjx2.v = vec_load(&Vjx[VEC_SIZE]); + vjy.v = vec_load(Vjy); + vjy2.v = vec_load(&Vjy[VEC_SIZE]); + vjz.v = vec_load(Vjz); + vjz2.v = vec_load(&Vjz[VEC_SIZE]); + dx.v = vec_load(Dx); + dx2.v = vec_load(&Dx[VEC_SIZE]); + dy.v = vec_load(Dy); + dy2.v = vec_load(&Dy[VEC_SIZE]); + dz.v = vec_load(Dz); + dz2.v = vec_load(&Dz[VEC_SIZE]); + + /* Get the radius and inverse radius. */ + r2.v = vec_load(R2); + r2_2.v = vec_load(&R2[VEC_SIZE]); + ri = vec_reciprocal_sqrt(r2); + ri2 = vec_reciprocal_sqrt(r2_2); + r.v = vec_mul(r2.v, ri.v); + r_2.v = vec_mul(r2_2.v, ri2.v); + + xi.v = vec_mul(r.v, hi_inv.v); + xi2.v = vec_mul(r_2.v, hi_inv.v); + + /* Calculate the kernel for two particles. */ + kernel_deval_2_vec(&xi, &wi, &wi_dx, &xi2, &wi2, &wi_dx2); + + /* Compute dv. */ + dvx.v = vec_sub(vix.v, vjx.v); + dvx2.v = vec_sub(vix.v, vjx2.v); + dvy.v = vec_sub(viy.v, vjy.v); + dvy2.v = vec_sub(viy.v, vjy2.v); + dvz.v = vec_sub(viz.v, vjz.v); + dvz2.v = vec_sub(viz.v, vjz2.v); + + /* Compute dv dot r */ + dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_mul(dvz.v, dz.v))); + dvdr2.v = + vec_fma(dvx2.v, dx2.v, vec_fma(dvy2.v, dy2.v, vec_mul(dvz2.v, dz2.v))); + dvdr.v = vec_mul(dvdr.v, ri.v); + dvdr2.v = vec_mul(dvdr2.v, ri2.v); + + /* Compute dv cross r */ + curlvrx.v = + vec_fma(dvy.v, dz.v, vec_mul(vec_set1(-1.0f), vec_mul(dvz.v, dy.v))); + curlvrx2.v = + vec_fma(dvy2.v, dz2.v, vec_mul(vec_set1(-1.0f), vec_mul(dvz2.v, dy2.v))); + curlvry.v = + vec_fma(dvz.v, dx.v, vec_mul(vec_set1(-1.0f), vec_mul(dvx.v, dz.v))); + curlvry2.v = + vec_fma(dvz2.v, dx2.v, vec_mul(vec_set1(-1.0f), vec_mul(dvx2.v, dz2.v))); + curlvrz.v = + vec_fma(dvx.v, dy.v, vec_mul(vec_set1(-1.0f), vec_mul(dvy.v, dx.v))); + curlvrz2.v = + vec_fma(dvx2.v, dy2.v, vec_mul(vec_set1(-1.0f), vec_mul(dvy2.v, dx2.v))); + curlvrx.v = vec_mul(curlvrx.v, ri.v); + curlvrx2.v = vec_mul(curlvrx2.v, ri2.v); + curlvry.v = vec_mul(curlvry.v, ri.v); + curlvry2.v = vec_mul(curlvry2.v, ri2.v); + curlvrz.v = vec_mul(curlvrz.v, ri.v); + curlvrz2.v = vec_mul(curlvrz2.v, ri2.v); + +/* Mask updates to intermediate vector sums for particle pi. */ +#ifdef HAVE_AVX512_F + rhoSum->v = + _mm512_mask_add_ps(rhoSum->v, knlMask, vec_mul(mj.v, wi.v), rhoSum->v); + rhoSum->v = + _mm512_mask_add_ps(rhoSum->v, knlMask2, vec_mul(mj2.v, wi2.v), rhoSum->v); + + rho_dhSum->v = + _mm512_mask_sub_ps(rho_dhSum->v, knlMask, rho_dhSum->v, + vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v, + vec_mul(xi.v, wi_dx.v)))); + rho_dhSum->v = _mm512_mask_sub_ps( + rho_dhSum->v, knlMask2, rho_dhSum->v, + vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v, + vec_mul(xi2.v, wi_dx2.v)))); + + wcountSum->v = _mm512_mask_add_ps(wcountSum->v, knlMask, wi.v, wcountSum->v); + wcountSum->v = + _mm512_mask_add_ps(wcountSum->v, knlMask2, wi2.v, wcountSum->v); + + wcount_dhSum->v = _mm512_mask_sub_ps(wcount_dhSum->v, knlMask, + wcount_dhSum->v, vec_mul(xi.v, wi_dx.v)); + wcount_dhSum->v = _mm512_mask_sub_ps( + wcount_dhSum->v, knlMask2, wcount_dhSum->v, vec_mul(xi2.v, wi_dx2.v)); + + div_vSum->v = _mm512_mask_sub_ps(div_vSum->v, knlMask, div_vSum->v, + vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v))); + div_vSum->v = _mm512_mask_sub_ps(div_vSum->v, knlMask2, div_vSum->v, + vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v))); + + curlvxSum->v = _mm512_mask_add_ps(curlvxSum->v, knlMask, + vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), + curlvxSum->v); + curlvxSum->v = _mm512_mask_add_ps( + curlvxSum->v, knlMask2, vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)), + curlvxSum->v); + + curlvySum->v = _mm512_mask_add_ps(curlvySum->v, knlMask, + vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)), + curlvySum->v); + curlvySum->v = _mm512_mask_add_ps( + curlvySum->v, knlMask2, vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)), + curlvySum->v); + + curlvzSum->v = _mm512_mask_add_ps(curlvzSum->v, knlMask, + vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)), + curlvzSum->v); + curlvzSum->v = _mm512_mask_add_ps( + curlvzSum->v, knlMask2, vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)), + curlvzSum->v); +#else + rhoSum->v += vec_and(vec_mul(mj.v, wi.v), mask.v); + rhoSum->v += vec_and(vec_mul(mj2.v, wi2.v), mask2.v); + rho_dhSum->v -= vec_and(vec_mul(mj.v, vec_fma(vec_set1(hydro_dimension), wi.v, + vec_mul(xi.v, wi_dx.v))), + mask.v); + rho_dhSum->v -= + vec_and(vec_mul(mj2.v, vec_fma(vec_set1(hydro_dimension), wi2.v, + vec_mul(xi2.v, wi_dx2.v))), + mask2.v); + wcountSum->v += vec_and(wi.v, mask.v); + wcountSum->v += vec_and(wi2.v, mask2.v); + wcount_dhSum->v -= vec_and(vec_mul(xi.v, wi_dx.v), mask.v); + wcount_dhSum->v -= vec_and(vec_mul(xi2.v, wi_dx2.v), mask2.v); + div_vSum->v -= vec_and(vec_mul(mj.v, vec_mul(dvdr.v, wi_dx.v)), mask.v); + div_vSum->v -= vec_and(vec_mul(mj2.v, vec_mul(dvdr2.v, wi_dx2.v)), mask2.v); + curlvxSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrx.v, wi_dx.v)), mask.v); + curlvxSum->v += + vec_and(vec_mul(mj2.v, vec_mul(curlvrx2.v, wi_dx2.v)), mask2.v); + curlvySum->v += vec_and(vec_mul(mj.v, vec_mul(curlvry.v, wi_dx.v)), mask.v); + curlvySum->v += + vec_and(vec_mul(mj2.v, vec_mul(curlvry2.v, wi_dx2.v)), mask2.v); + curlvzSum->v += vec_and(vec_mul(mj.v, vec_mul(curlvrz.v, wi_dx.v)), mask.v); + curlvzSum->v += + vec_and(vec_mul(mj2.v, vec_mul(curlvrz2.v, wi_dx2.v)), mask2.v); +#endif +} +#endif + /** * @brief Force loop */ @@ -492,9 +653,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( vector hi, hj, hi_inv, hj_inv; vector hid_inv, hjd_inv; vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector piPOrho, pjPOrho, pirho, pjrho; + vector piPOrho2, pjPOrho2, pirho, pjrho; vector mi, mj; vector f; + vector grad_hi, grad_hj; vector dx[3]; vector vi[3], vj[3]; vector pia[3], pja[3]; @@ -512,14 +674,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, + pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); + pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, + pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + grad_hi.v = + vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f, + pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f); + grad_hj.v = + vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f, + pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, @@ -551,10 +719,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( #elif VEC_SIZE == 4 mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); + piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); + pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); + grad_hi.v = + vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f); + grad_hj.v = + vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, @@ -577,14 +749,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Get the radius and inverse radius. */ r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + ri = vec_reciprocal_sqrt(r2); r.v = r2.v * ri.v; /* Get the kernel for hi. */ hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); + hi_inv = vec_reciprocal(hi); hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */ xi.v = r.v * hi_inv.v; kernel_deval_vec(&xi, &wi, &wi_dx); @@ -592,8 +762,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Get the kernel for hj. */ hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); + hj_inv = vec_reciprocal(hj); hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */ xj.v = r.v * hj_inv.v; kernel_deval_vec(&xj, &wj, &wj_dx); @@ -619,7 +788,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /* Now, convolve with the kernel */ visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; - sph_term.v = (piPOrho.v * wi_dr.v + pjPOrho.v * wj_dr.v) * ri.v; + sph_term.v = + (grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) * + ri.v; /* Eventually get the acceleration */ acc.v = visc_term.v + sph_term.v; @@ -764,9 +935,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( vector hi, hj, hi_inv, hj_inv; vector hid_inv, hjd_inv; vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector piPOrho, pjPOrho, pirho, pjrho; + vector piPOrho2, pjPOrho2, pirho, pjrho; vector mj; vector f; + vector grad_hi, grad_hj; vector dx[3]; vector vi[3], vj[3]; vector pia[3]; @@ -782,14 +954,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( #if VEC_SIZE == 8 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, - pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, - pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); - pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, - pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, - pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2, + pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2, + pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2); + pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2, + pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2, + pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2); + grad_hi.v = + vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f, + pi[4]->force.f, pi[5]->force.f, pi[6]->force.f, pi[7]->force.f); + grad_hj.v = + vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f, + pj[4]->force.f, pj[5]->force.f, pj[6]->force.f, pj[7]->force.f); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, pi[5]->rho, pi[6]->rho, pi[7]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, @@ -820,10 +998,14 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( pj[6]->force.balsara, pj[7]->force.balsara); #elif VEC_SIZE == 4 mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, - pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); - pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, - pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); + piPOrho2.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2, + pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2); + pjPOrho2.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2, + pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2); + grad_hi.v = + vec_set(pi[0]->force.f, pi[1]->force.f, pi[2]->force.f, pi[3]->force.f); + grad_hj.v = + vec_set(pj[0]->force.f, pj[1]->force.f, pj[2]->force.f, pj[3]->force.f); pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed, @@ -846,14 +1028,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Get the radius and inverse radius. */ r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); + ri = vec_reciprocal_sqrt(r2); r.v = r2.v * ri.v; /* Get the kernel for hi. */ hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); + hi_inv = vec_reciprocal(hi); hid_inv = pow_dimension_plus_one_vec(hi_inv); xi.v = r.v * hi_inv.v; kernel_deval_vec(&xi, &wi, &wi_dx); @@ -861,8 +1041,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Get the kernel for hj. */ hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); + hj_inv = vec_reciprocal(hj); hjd_inv = pow_dimension_plus_one_vec(hj_inv); xj.v = r.v * hj_inv.v; kernel_deval_vec(&xj, &wj, &wj_dx); @@ -888,7 +1067,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( /* Now, convolve with the kernel */ visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v; - sph_term.v = (piPOrho.v * wi_dr.v + pjPOrho.v * wj_dr.v) * ri.v; + sph_term.v = + (grad_hi.v * piPOrho2.v * wi_dr.v + grad_hj.v * pjPOrho2.v * wj_dr.v) * + ri.v; /* Eventually get the acceleration */ acc.v = visc_term.v + sph_term.v; diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index 433aef64c388c8bc4989e883f10a8f0d3eeb30e9..162d368dd073be2fd0f06f4ecbc1431fb34e7798 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -57,12 +57,12 @@ void hydro_read_particles(struct part* parts, struct io_props* list, float convert_u(struct engine* e, struct part* p) { - return hydro_get_internal_energy(p, 0); + return hydro_get_internal_energy(p); } float convert_P(struct engine* e, struct part* p) { - return hydro_get_pressure(p, 0); + return hydro_get_pressure(p); } /** diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 4bbbf0aede12b692b15442b71a03ffbbcf2f8378..69ae79666e1db4e4f405c653cfc533606989a73a 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -42,6 +42,9 @@ struct xpart { /* Velocity at the last full step. */ float v_full[3]; + /* Entropy at the last full step. */ + float entropy_full; + /* Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; @@ -50,6 +53,12 @@ struct xpart { /* Data of a single particle. */ struct part { + /* Particle ID. */ + long long id; + + /* Pointer to corresponding gravity part. */ + struct gpart* gpart; + /* Particle position. */ double x[3]; @@ -65,12 +74,6 @@ struct part { /* Particle mass. */ float mass; - /* Particle time of beginning of time-step. */ - int ti_begin; - - /* Particle time of end of time-step. */ - int ti_end; - /* Particle density. */ float rho; @@ -124,11 +127,18 @@ struct part { } force; }; - /* Particle ID. */ - long long id; + /* Time-step length */ + timebin_t time_bin; - /* Pointer to corresponding gravity part. */ - struct gpart* gpart; +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif } SWIFT_STRUCT_ALIGN; diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h index 1c64291ee64dd770b1f1a76371f67a34230365c7..c59af05460157a756c15d8ca84af8a7834fde2d3 100644 --- a/src/hydro/Gizmo/hydro.h +++ b/src/hydro/Gizmo/hydro.h @@ -178,11 +178,10 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( * @param timeBase Conversion factor between integer time and physical time. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( - struct part* restrict p, struct xpart* restrict xp, int ti_current, - double timeBase) { + struct part* restrict p, struct xpart* restrict xp) { /* Set the physical time step */ - p->force.dt = (p->ti_end - p->ti_begin) * timeBase; + p->force.dt = get_timestep(p->time_bin, 0.); // MATTHIEU 0 /* Initialize time step criterion variables */ p->timestepvars.vmax = 0.0f; @@ -233,6 +232,16 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( p->force.h_dt = 0.0f; } +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param p The particle. + * @param xp The extended data of this particle. + */ +__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( + struct part* restrict p, const struct xpart* restrict xp) {} + /** * @brief Converts the hydrodynamic variables from the initial condition file to * conserved variables that can be used during the integration @@ -250,7 +259,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( * @param p The particle to act upon. */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( - struct part* p) { + struct part* p, struct xpart* xp) { const float volume = p->geometry.volume; const float m = p->conserved.mass; @@ -283,8 +292,7 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( * @param timeBase Conversion factor between integer and physical time. */ __attribute__((always_inline)) INLINE static void hydro_predict_extra( - struct part* p, struct xpart* xp, float dt, int t0, int t1, - double timeBase) { + struct part* p, struct xpart* xp, float dt) { const float h_inv = 1.0f / p->h; @@ -367,9 +375,10 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param half_dt Half the physical time step. */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( - struct part* p, struct xpart* xp, float dt, float half_dt) { + struct part* p, struct xpart* xp, float dt) { float oldm, oldp[3], anew[3]; + const float half_dt = 0.5f * dt; // MATTHIEU /* Retrieve the current value of the gravitational acceleration from the gpart. We are only allowed to do this because this is the kick. We still @@ -441,10 +450,9 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( * @brief Returns the internal energy of a particle * * @param p The particle of interest. - * @param dt Time since the last kick. */ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy( - const struct part* restrict p, float dt) { + const struct part* restrict p) { return p->primitives.P / hydro_gamma_minus_one / p->primitives.rho; } @@ -453,10 +461,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy( * @brief Returns the entropy of a particle * * @param p The particle of interest. - * @param dt Time since the last kick. */ __attribute__((always_inline)) INLINE static float hydro_get_entropy( - const struct part* restrict p, float dt) { + const struct part* restrict p) { return p->primitives.P / pow_gamma(p->primitives.rho); } @@ -465,10 +472,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_entropy( * @brief Returns the sound speed of a particle * * @param p The particle of interest. - * @param dt Time since the last kick. */ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed( - const struct part* restrict p, float dt) { + const struct part* restrict p) { return sqrtf(hydro_gamma * p->primitives.P / p->primitives.rho); } @@ -477,10 +483,9 @@ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed( * @brief Returns the pressure of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_pressure( - const struct part* restrict p, float dt) { + const struct part* restrict p) { return p->primitives.P; } diff --git a/src/hydro/Gizmo/hydro_debug.h b/src/hydro/Gizmo/hydro_debug.h index f4c071023a627b177fd06373856f25611fc9485d..a05ff9a7d96f04ca3354235540adc31386a2d2e3 100644 --- a/src/hydro/Gizmo/hydro_debug.h +++ b/src/hydro/Gizmo/hydro_debug.h @@ -24,8 +24,7 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "v=[%.3e,%.3e,%.3e], " "a=[%.3e,%.3e,%.3e], " "h=%.3e, " - "ti_begin=%d, " - "ti_end=%d, " + "time_bin=%d, " "primitives={" "v=[%.3e,%.3e,%.3e], " "rho=%.3e, " @@ -54,9 +53,9 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "curl_v=[%.3e,%.3e,%.3e], " "wcount=%.3e}\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0], - p->a_hydro[1], p->a_hydro[2], p->h, p->ti_begin, p->ti_end, - p->primitives.v[0], p->primitives.v[1], p->primitives.v[2], - p->primitives.rho, p->primitives.P, p->primitives.gradients.rho[0], + p->a_hydro[1], p->a_hydro[2], p->h, p->time_bin, p->primitives.v[0], + p->primitives.v[1], p->primitives.v[2], p->primitives.rho, + p->primitives.P, p->primitives.gradients.rho[0], p->primitives.gradients.rho[1], p->primitives.gradients.rho[2], p->primitives.gradients.v[0][0], p->primitives.gradients.v[0][1], p->primitives.gradients.v[0][2], p->primitives.gradients.v[1][0], diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h index cf2b9a223b49c3ce2fbd6874b83c523e8213a5ce..aba6bd53c1c9557929426c11a0986e5f02888874 100644 --- a/src/hydro/Gizmo/hydro_iact.h +++ b/src/hydro/Gizmo/hydro_iact.h @@ -411,7 +411,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( UPDATE particle j. ==> we update particle j if (MODE IS 1) OR (j IS INACTIVE) */ - if (mode == 1 || pj->ti_end > pi->ti_end) { + + // MATTHIEU + const integertime_t pj_ti_end = 0; // get_integer_time_end(pj->time_bin); + const integertime_t pi_ti_end = 0; // get_integer_time_end(pi->time_bin); + + if (mode == 1 || pj_ti_end > pi_ti_end) { /* Store mass flux */ mflux = dtj * Anorm * totflux[0]; pj->gravity.mflux[0] -= mflux * dx[0]; diff --git a/src/hydro/Gizmo/hydro_part.h b/src/hydro/Gizmo/hydro_part.h index c4919ff173c64a4a83a5d1bf61ab82697cc03096..f6592ca107d8d2c6970f34ebd3929e226b53a355 100644 --- a/src/hydro/Gizmo/hydro_part.h +++ b/src/hydro/Gizmo/hydro_part.h @@ -38,6 +38,12 @@ struct xpart { /* Data of a single particle. */ struct part { + /* Particle ID. */ + long long id; + + /* Associated gravitas. */ + struct gpart *gpart; + /* Particle position. */ double x[3]; @@ -50,12 +56,6 @@ struct part { /* Particle smoothing length. */ float h; - /* Particle time of beginning of time-step. */ - int ti_begin; - - /* Particle time of end of time-step. */ - int ti_end; - /* Old internal energy flux */ float du_dt; @@ -197,11 +197,18 @@ struct part { } gravity; - /* Particle ID. */ - long long id; + /* Time-step length */ + timebin_t time_bin; - /* Associated gravitas. */ - struct gpart *gpart; +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif } SWIFT_STRUCT_ALIGN; diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index beb6f98b8c0d781aa709fb6ee3ca564a52704db2..20856b7e038855e22aa3776a74ba9f495ff6c93f 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -49,26 +49,22 @@ * energy from the thermodynamic variable. * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy( - const struct part *restrict p, float dt) { + const struct part *restrict p) { - return p->u + p->u_dt * dt; + return p->u; } /** * @brief Returns the pressure of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_pressure( - const struct part *restrict p, float dt) { - - const float u = p->u + p->u_dt * dt; + const struct part *restrict p) { - return gas_pressure_from_internal_energy(p->rho, u); + return gas_pressure_from_internal_energy(p->rho, p->u); } /** @@ -79,24 +75,20 @@ __attribute__((always_inline)) INLINE static float hydro_get_pressure( * the thermodynamic variable. * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_entropy( - const struct part *restrict p, float dt) { - - const float u = p->u + p->u_dt * dt; + const struct part *restrict p) { - return gas_entropy_from_internal_energy(p->rho, u); + return gas_entropy_from_internal_energy(p->rho, p->u); } /** * @brief Returns the sound speed of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed( - const struct part *restrict p, float dt) { + const struct part *restrict p) { return p->force.soundspeed; } @@ -124,68 +116,31 @@ __attribute__((always_inline)) INLINE static float hydro_get_mass( } /** - * @brief Modifies the thermal state of a particle to the imposed internal - * energy + * @brief Returns the time derivative of internal energy of a particle * - * This overwrites the current state of the particle but does *not* change its - * time-derivatives. Internal energy, pressure, sound-speed and signal velocity - * will be updated. + * We assume a constant density. * - * @param p The particle - * @param u The new internal energy + * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy( - struct part *restrict p, float u) { - - p->u = u; - - /* Compute the new pressure */ - const float pressure = gas_pressure_from_internal_energy(p->rho, p->u); - - /* Compute the new sound speed */ - const float soundspeed = gas_soundspeed_from_internal_energy(p->rho, p->u); - - /* Update the signal velocity */ - const float v_sig_old = p->force.v_sig; - const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed; - const float v_sig = max(v_sig_old, v_sig_new); +__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( + const struct part *restrict p) { - p->force.soundspeed = soundspeed; - p->force.pressure = pressure; - p->force.v_sig = v_sig; + return p->u_dt; } /** - * @brief Modifies the thermal state of a particle to the imposed entropy + * @brief Returns the time derivative of internal energy of a particle * - * This overwrites the current state of the particle but does *not* change its - * time-derivatives. Internal energy, pressure, sound-speed and signal velocity - * will be updated. + * We assume a constant density. * - * @param p The particle - * @param S The new entropy + * @param p The particle of interest. + * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_entropy( - struct part *restrict p, float S) { - - p->u = gas_internal_energy_from_entropy(p->rho, S); - - /* Compute the pressure */ - const float pressure = gas_pressure_from_internal_energy(p->rho, p->u); - - /* Compute the new sound speed */ - const float soundspeed = gas_soundspeed_from_internal_energy(p->rho, p->u); - - /* Update the signal velocity */ - const float v_sig_old = p->force.v_sig; - const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed; - const float v_sig = max(v_sig_old, v_sig_new); +__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( + struct part *restrict p, float du_dt) { - p->force.soundspeed = soundspeed; - p->force.pressure = pressure; - p->force.v_sig = v_sig; + p->u_dt = du_dt; } - /** * @brief Computes the hydro time-step of a given particle * @@ -210,26 +165,6 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( return dt_cfl; } -/** - * @brief Initialises the particles for the first time - * - * This function is called only once just after the ICs have been - * read in to do some conversions or assignments between the particle - * and extended particle fields. - * - * @param p The particle to act upon - * @param xp The extended particle data to act upon - */ -__attribute__((always_inline)) INLINE static void hydro_first_init_part( - struct part *restrict p, struct xpart *restrict xp) { - - p->ti_begin = 0; - p->ti_end = 0; - xp->v_full[0] = p->v[0]; - xp->v_full[1] = p->v[1]; - xp->v_full[2] = p->v[2]; -} - /** * @brief Prepares a particle for the density calculation. * @@ -292,16 +227,12 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( * * @param p The particle to act upon * @param xp The extended particle data to act upon - * @param ti_current The current time (on the timeline) - * @param timeBase The minimal time-step size */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( - struct part *restrict p, struct xpart *restrict xp, int ti_current, - double timeBase) { + struct part *restrict p, struct xpart *restrict xp) { /* Compute the pressure */ - const float half_dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; - const float pressure = hydro_get_pressure(p, half_dt); + const float pressure = gas_pressure_from_internal_energy(p->rho, p->u); /* Compute the sound speed */ const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure); @@ -339,6 +270,25 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( p->force.v_sig = 0.0f; } +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param p The particle. + * @param xp The extended data of this particle. + */ +__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( + struct part *restrict p, const struct xpart *restrict xp) { + + /* Re-set the predicted velocities */ + p->v[0] = xp->v_full[0]; + p->v[1] = xp->v_full[1]; + p->v[2] = xp->v_full[2]; + + /* Re-set the entropy */ + p->u = xp->u_full; +} + /** * @brief Predict additional particle fields forward in time when drifting * @@ -348,13 +298,9 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( * @param p The particle. * @param xp The extended data of the particle. * @param dt The drift time-step. - * @param t0 The time at the start of the drift (on the timeline). - * @param t1 The time at the end of the drift (on the timeline). - * @param timeBase The minimal time-step size. */ __attribute__((always_inline)) INLINE static void hydro_predict_extra( - struct part *restrict p, const struct xpart *restrict xp, float dt, int t0, - int t1, double timeBase) { + struct part *restrict p, const struct xpart *restrict xp, float dt) { const float h_inv = 1.f / p->h; @@ -372,9 +318,11 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( else p->rho *= expf(w2); - /* Drift the pressure */ - const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase; - const float pressure = hydro_get_pressure(p, dt_entr); + /* Predict the internal energy */ + p->u += p->u_dt * dt; + + /* Compute the new pressure */ + const float pressure = gas_pressure_from_internal_energy(p->rho, p->u); /* Compute the new sound speed */ const float soundspeed = gas_soundspeed_from_pressure(p->rho, pressure); @@ -407,24 +355,18 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param p The particle to act upon * @param xp The particle extended data to act upon * @param dt The time-step for this kick - * @param half_dt The half time-step for this kick */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( - struct part *restrict p, struct xpart *restrict xp, float dt, - float half_dt) { + struct part *restrict p, struct xpart *restrict xp, float dt) { /* Do not decrease the energy by more than a factor of 2*/ - const float u_change = p->u_dt * dt; - if (u_change > -0.5f * p->u) - p->u += u_change; - else - p->u *= 0.5f; - - /* Do not 'overcool' when timestep increases */ - if (p->u + p->u_dt * half_dt < 0.5f * p->u) p->u_dt = -0.5f * p->u / half_dt; + if (dt > 0. && p->u_dt * dt < -0.5f * xp->u_full) { + p->u_dt = -0.5f * xp->u_full / dt; + } + xp->u_full += p->u_dt * dt; /* Compute the pressure */ - const float pressure = gas_pressure_from_internal_energy(p->rho, p->u); + const float pressure = gas_pressure_from_internal_energy(p->rho, xp->u_full); /* Compute the sound speed */ const float soundspeed = gas_soundspeed_from_internal_energy(p->rho, p->u); @@ -442,9 +384,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( * This can be used to convert internal energy into entropy for instance. * * @param p The particle to act upon + * @param xp The extended particle to act upon */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( - struct part *restrict p) { + struct part *restrict p, struct xpart *restrict xp) { /* Compute the pressure */ const float pressure = gas_pressure_from_internal_energy(p->rho, p->u); @@ -456,4 +399,27 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( p->force.soundspeed = soundspeed; } +/** + * @brief Initialises the particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions or assignments between the particle + * and extended particle fields. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_first_init_part( + struct part *restrict p, struct xpart *restrict xp) { + + p->time_bin = 0; + xp->v_full[0] = p->v[0]; + xp->v_full[1] = p->v[1]; + xp->v_full[2] = p->v[2]; + xp->u_full = p->u; + + hydro_reset_acceleration(p); + hydro_init_part(p); +} + #endif /* SWIFT_MINIMAL_HYDRO_H */ diff --git a/src/hydro/Minimal/hydro_debug.h b/src/hydro/Minimal/hydro_debug.h index 16ae62413a0d76b7bf871e615fe5684219752fee..876ce148824489d4c43358c2c519aa3b90dcf002 100644 --- a/src/hydro/Minimal/hydro_debug.h +++ b/src/hydro/Minimal/hydro_debug.h @@ -40,12 +40,11 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e], " "u=%.3e, du/dt=%.3e v_sig=%.3e, P=%.3e\n" "h=%.3e, dh/dt=%.3e wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, " - "t_begin=%d, t_end=%d\n", + "time_bin=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->u, p->u_dt, p->force.v_sig, p->force.pressure, p->h, p->force.h_dt, - (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho, p->ti_begin, - p->ti_end); + (int)p->density.wcount, p->mass, p->density.rho_dh, p->rho, p->time_bin); } #endif /* SWIFT_MINIMAL_HYDRO_DEBUG_H */ diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index 01a75b17fd5577cfcfb48d3afac22579f30fcf7a..8c83349a3e17d6b3375663698af7beeeab0636bc 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -71,12 +71,12 @@ void hydro_read_particles(struct part* parts, struct io_props* list, float convert_S(struct engine* e, struct part* p) { - return hydro_get_entropy(p, 0); + return hydro_get_entropy(p); } float convert_P(struct engine* e, struct part* p) { - return hydro_get_pressure(p, 0); + return hydro_get_pressure(p); } /** diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index 8542177278998d5e0b830dc164988611549ef24d..dabae1a546d66f61db4f9796c21b71817ca20aac 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -49,6 +49,9 @@ struct xpart { /*! Velocity at the last full step. */ float v_full[3]; + /*! Internal energy at the last full step. */ + float u_full; + /*! Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; @@ -63,6 +66,12 @@ struct xpart { */ struct part { + /*! Particle unique ID. */ + long long id; + + /*! Pointer to corresponding gravity part. */ + struct gpart* gpart; + /*! Particle position. */ double x[3]; @@ -78,12 +87,6 @@ struct part { /*! Particle smoothing length. */ float h; - /*! Time at the beginning of time-step. */ - int ti_begin; - - /*! Time at the end of time-step. */ - int ti_end; - /*! Particle internal energy. */ float u; @@ -143,11 +146,18 @@ struct part { } force; }; - /*! Particle unique ID. */ - long long id; + /*! Time-step length */ + timebin_t time_bin; - /*! Pointer to corresponding gravity part. */ - struct gpart* gpart; +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif } SWIFT_STRUCT_ALIGN; diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h index 8c063596efd3be97ebb4da6b6879ac06122bd357..f22bb8a13a8ba4d896a77bd4c4f5e86bed5a5960 100644 --- a/src/hydro/PressureEntropy/hydro.h +++ b/src/hydro/PressureEntropy/hydro.h @@ -43,50 +43,42 @@ * @brief Returns the internal energy of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_internal_energy( - const struct part *restrict p, float dt) { - - const float entropy = p->entropy + p->entropy_dt * dt; + const struct part *restrict p) { - return gas_internal_energy_from_entropy(p->rho_bar, entropy); + return gas_internal_energy_from_entropy(p->rho_bar, p->entropy); } /** * @brief Returns the pressure of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_pressure( - const struct part *restrict p, float dt) { - - const float entropy = p->entropy + p->entropy_dt * dt; + const struct part *restrict p) { - return gas_pressure_from_entropy(p->rho_bar, entropy); + return gas_pressure_from_entropy(p->rho_bar, p->entropy); } /** * @brief Returns the entropy of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_entropy( - const struct part *restrict p, float dt) { + const struct part *restrict p) { - return p->entropy + p->entropy_dt * dt; + return p->entropy; } /** * @brief Returns the sound speed of a particle * * @param p The particle of interest - * @param dt Time since the last kick */ __attribute__((always_inline)) INLINE static float hydro_get_soundspeed( - const struct part *restrict p, float dt) { + const struct part *restrict p) { return p->force.soundspeed; } @@ -114,72 +106,30 @@ __attribute__((always_inline)) INLINE static float hydro_get_mass( } /** - * @brief Modifies the thermal state of a particle to the imposed internal - * energy + * @brief Returns the time derivative of internal energy of a particle * - * This overwrites the current state of the particle but does *not* change its - * time-derivatives. Entropy, pressure, sound-speed and signal velocity will be - * updated. + * We assume a constant density. * - * @param p The particle - * @param u The new internal energy + * @param p The particle of interest */ -__attribute__((always_inline)) INLINE static void hydro_set_internal_energy( - struct part *restrict p, float u) { - - p->entropy = gas_entropy_from_internal_energy(p->rho_bar, u); - p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy); - - /* Compute the pressure */ - const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy); - - /* Compute the sound speed from the pressure*/ - const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure); - - /* Update the signal velocity */ - const float v_sig_old = p->force.v_sig; - const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed; - const float v_sig = max(v_sig_old, v_sig_new); - - const float rho_bar_inv = 1.f / p->rho_bar; +__attribute__((always_inline)) INLINE static float hydro_get_internal_energy_dt( + const struct part *restrict p) { - p->force.soundspeed = soundspeed; - p->force.P_over_rho2 = pressure * rho_bar_inv * rho_bar_inv; - p->force.v_sig = v_sig; + return gas_internal_energy_from_entropy(p->rho_bar, p->entropy_dt); } /** - * @brief Modifies the thermal state of a particle to the imposed entropy + * @brief Returns the time derivative of internal energy of a particle * - * This overwrites the current state of the particle but does *not* change its - * time-derivatives. Entropy, pressure, sound-speed and signal velocity will be - * updated. + * We assume a constant density. * - * @param p The particle - * @param S The new entropy + * @param p The particle of interest. + * @param du_dt The new time derivative of the internal energy. */ -__attribute__((always_inline)) INLINE static void hydro_set_entropy( - struct part *restrict p, float S) { +__attribute__((always_inline)) INLINE static void hydro_set_internal_energy_dt( + struct part *restrict p, float du_dt) { - p->entropy = S; - p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy); - - /* Compute the pressure */ - const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy); - - /* Compute the sound speed from the pressure*/ - const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure); - - /* Update the signal velocity */ - const float v_sig_old = p->force.v_sig; - const float v_sig_new = p->force.v_sig - p->force.soundspeed + soundspeed; - const float v_sig = max(v_sig_old, v_sig_new); - - const float rho_bar_inv = 1.f / p->rho_bar; - - p->force.soundspeed = soundspeed; - p->force.P_over_rho2 = pressure * rho_bar_inv * rho_bar_inv; - p->force.v_sig = v_sig; + p->entropy_dt = gas_entropy_from_internal_energy(p->rho_bar, du_dt); } /** @@ -202,27 +152,6 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( return dt_cfl; } -/** - * @brief Initialises the particles for the first time - * - * This function is called only once just after the ICs have been - * read in to do some conversions. - * - * @param p The particle to act upon - * @param xp The extended particle data to act upon - */ -__attribute__((always_inline)) INLINE static void hydro_first_init_part( - struct part *restrict p, struct xpart *restrict xp) { - - p->ti_begin = 0; - p->ti_end = 0; - p->rho_bar = 0.f; - p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy); - xp->v_full[0] = p->v[0]; - xp->v_full[1] = p->v[1]; - xp->v_full[2] = p->v[2]; -} - /** * @brief Prepares a particle for the density calculation. * @@ -302,12 +231,9 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( * * @param p The particle to act upon * @param xp The extended particle data to act upon - * @param ti_current The current time (on the timeline) - * @param timeBase The minimal time-step size */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( - struct part *restrict p, struct xpart *restrict xp, int ti_current, - double timeBase) { + struct part *restrict p, struct xpart *restrict xp) { const float fac_mu = 1.f; /* Will change with cosmological integration */ @@ -320,9 +246,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( const float abs_div_v = fabsf(p->density.div_v); /* Compute the pressure */ - const float half_dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; - const float entropy = hydro_get_entropy(p, half_dt); - const float pressure = gas_pressure_from_entropy(p->rho_bar, entropy); + const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy); /* Compute the sound speed from the pressure*/ const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure); @@ -375,19 +299,34 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( p->force.v_sig = 0.0f; } +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param p The particle. + * @param xp The extended data of this particle. + */ +__attribute__((always_inline)) INLINE static void hydro_reset_predicted_values( + struct part *restrict p, const struct xpart *restrict xp) { + + /* Re-set the predicted velocities */ + p->v[0] = xp->v_full[0]; + p->v[1] = xp->v_full[1]; + p->v[2] = xp->v_full[2]; + + /* Re-set the entropy */ + p->entropy = xp->entropy_full; +} + /** * @brief Predict additional particle fields forward in time when drifting * * @param p The particle * @param xp The extended data of the particle * @param dt The drift time-step. - * @param t0 The time at the start of the drift (on the timeline). - * @param t1 The time at the end of the drift (on the timeline). - * @param timeBase The minimal time-step size */ __attribute__((always_inline)) INLINE static void hydro_predict_extra( - struct part *restrict p, const struct xpart *restrict xp, float dt, int t0, - int t1, double timeBase) { + struct part *restrict p, const struct xpart *restrict xp, float dt) { const float h_inv = 1.f / p->h; @@ -408,12 +347,11 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( p->rho_bar *= expf(w2); } - /* Drift the entropy */ - const float dt_entr = (t1 - (p->ti_begin + p->ti_end) / 2) * timeBase; - const float entropy = hydro_get_entropy(p, dt_entr); + /* Predict the entropy */ + p->entropy += p->entropy_dt * dt; /* Compute the pressure */ - const float pressure = gas_pressure_from_entropy(p->rho_bar, entropy); + const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy); /* Compute the new sound speed */ const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure); @@ -423,7 +361,7 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( const float P_over_rho2 = pressure * rho_bar_inv * rho_bar_inv; /* Update the variables */ - p->entropy_one_over_gamma = pow_one_over_gamma(entropy); + p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy); p->force.soundspeed = soundspeed; p->force.P_over_rho2 = P_over_rho2; } @@ -453,22 +391,17 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( * @param half_dt The half time-step for this kick */ __attribute__((always_inline)) INLINE static void hydro_kick_extra( - struct part *restrict p, struct xpart *restrict xp, float dt, - float half_dt) { + struct part *restrict p, struct xpart *restrict xp, float dt) { /* Do not decrease the entropy (temperature) by more than a factor of 2*/ - const float entropy_change = p->entropy_dt * dt; - if (entropy_change > -0.5f * p->entropy) - p->entropy += entropy_change; - else - p->entropy *= 0.5f; - - /* Do not 'overcool' when timestep increases */ - if (p->entropy + p->entropy_dt * half_dt < 0.5f * p->entropy) - p->entropy_dt = -0.5f * p->entropy / half_dt; + if (dt > 0. && p->entropy_dt * dt < -0.5f * xp->entropy_full) { + p->entropy_dt = -0.5f * xp->entropy_full / dt; + } + xp->entropy_full += p->entropy_dt * dt; /* Compute the pressure */ - const float pressure = gas_pressure_from_entropy(p->rho_bar, p->entropy); + const float pressure = + gas_pressure_from_entropy(p->rho_bar, xp->entropy_full); /* Compute the new sound speed */ const float soundspeed = gas_soundspeed_from_pressure(p->rho_bar, pressure); @@ -490,10 +423,11 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( * @param p The particle to act upon */ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( - struct part *restrict p) { + struct part *restrict p, struct xpart *restrict xp) { /* We read u in the entropy field. We now get S from u */ - p->entropy = gas_entropy_from_internal_energy(p->rho_bar, p->entropy); + xp->entropy_full = gas_entropy_from_internal_energy(p->rho_bar, p->entropy); + p->entropy = xp->entropy_full; p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy); /* Compute the pressure */ @@ -510,4 +444,27 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( p->force.P_over_rho2 = P_over_rho2; } +/** + * @brief Initialises the particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_first_init_part( + struct part *restrict p, struct xpart *restrict xp) { + + p->time_bin = 0; + p->rho_bar = 0.f; + p->entropy_one_over_gamma = pow_one_over_gamma(p->entropy); + xp->v_full[0] = p->v[0]; + xp->v_full[1] = p->v[1]; + xp->v_full[2] = p->v[2]; + + hydro_reset_acceleration(p); + hydro_init_part(p); +} + #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_H */ diff --git a/src/hydro/PressureEntropy/hydro_debug.h b/src/hydro/PressureEntropy/hydro_debug.h index 486543793515795092e7cc97fe7b567b8230be3b..3a0a315a4fa0eb4710042e8020002691ed9c425a 100644 --- a/src/hydro/PressureEntropy/hydro_debug.h +++ b/src/hydro/PressureEntropy/hydro_debug.h @@ -29,7 +29,6 @@ * Follows eqautions (19), (21) and (22) of Hopkins, P., MNRAS, 2013, * Volume 428, Issue 4, pp. 2840-2856 with a simple Balsara viscosity term. */ - __attribute__((always_inline)) INLINE static void hydro_debug_particle( const struct part* p, const struct xpart* xp) { printf( @@ -37,14 +36,14 @@ __attribute__((always_inline)) INLINE static void hydro_debug_particle( "v=[%.3e,%.3e,%.3e],v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "h=%.3e, wcount=%.3f, wcount_dh=%.3e, m=%.3e, dh_drho=%.3e, rho=%.3e, " "rho_bar=%.3e, P=%.3e, dP_dh=%.3e, P_over_rho2=%.3e, S=%.3e, S^1/g=%.3e, " - "dS/dt=%.3e,\nc=%.3e v_sig=%e dh/dt=%.3e t_begin=%d, t_end=%d\n", + "dS/dt=%.3e,\nc=%.3e v_sig=%e dh/dt=%.3e time_bin=%d\n", p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], xp->v_full[0], xp->v_full[1], xp->v_full[2], p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->h, p->density.wcount, p->density.wcount_dh, p->mass, p->density.rho_dh, - p->rho, p->rho_bar, hydro_get_pressure(p, 0.), p->density.pressure_dh, + p->rho, p->rho_bar, hydro_get_pressure(p), p->density.pressure_dh, p->force.P_over_rho2, p->entropy, p->entropy_one_over_gamma, p->entropy_dt, p->force.soundspeed, p->force.v_sig, p->force.h_dt, - p->ti_begin, p->ti_end); + p->time_bin); } #endif /* SWIFT_PRESSURE_ENTROPY_HYDRO_DEBUG_H */ diff --git a/src/hydro/PressureEntropy/hydro_io.h b/src/hydro/PressureEntropy/hydro_io.h index 9914a656466f3f0d0a5eeb79b511706d7068ffc6..fcc8439f64d299b7dcb59e819f8dd273112ce25a 100644 --- a/src/hydro/PressureEntropy/hydro_io.h +++ b/src/hydro/PressureEntropy/hydro_io.h @@ -69,12 +69,12 @@ void hydro_read_particles(struct part* parts, struct io_props* list, float convert_u(struct engine* e, struct part* p) { - return hydro_get_internal_energy(p, 0); + return hydro_get_internal_energy(p); } float convert_P(struct engine* e, struct part* p) { - return hydro_get_pressure(p, 0); + return hydro_get_pressure(p); } /** diff --git a/src/hydro/PressureEntropy/hydro_part.h b/src/hydro/PressureEntropy/hydro_part.h index cac585ff79bae737f0e5c09860a38536cbf3a38c..b6e496918fa0e7989a8bddcfc5e8ea6b332c338e 100644 --- a/src/hydro/PressureEntropy/hydro_part.h +++ b/src/hydro/PressureEntropy/hydro_part.h @@ -41,6 +41,9 @@ struct xpart { /*! Velocity at the last full step. */ float v_full[3]; + /*! Entropy at the last full step. */ + float entropy_full; + /*! Additional data used to record cooling information */ struct cooling_xpart_data cooling_data; @@ -49,6 +52,12 @@ struct xpart { /* Data of a single particle. */ struct part { + /*! Particle ID. */ + long long id; + + /*! Pointer to corresponding gravity part. */ + struct gpart* gpart; + /*! Particle position. */ double x[3]; @@ -64,12 +73,6 @@ struct part { /*! Particle mass. */ float mass; - /*! Particle time of beginning of time-step. */ - int ti_begin; - - /*! Particle time of end of time-step. */ - int ti_end; - /*! Particle density. */ float rho; @@ -132,11 +135,18 @@ struct part { } force; }; - /*! Particle ID. */ - long long id; + /* Time-step length */ + timebin_t time_bin; - /*! Pointer to corresponding gravity part. */ - struct gpart* gpart; +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif } SWIFT_STRUCT_ALIGN; diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index 8f38fc0d2b98988a48fe36edcbd2f9419d237d41..7bf2e01a719a29b731bb437096093b13ca086e37 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -362,6 +362,117 @@ __attribute__((always_inline)) INLINE static void kernel_deval_vec( dw_dx->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v; } +/* Define constant vectors for the Wendland C2 kernel coefficients. */ +#ifdef WENDLAND_C2_KERNEL +static const vector wendland_const_c0 = FILL_VEC(4.f); +static const vector wendland_const_c1 = FILL_VEC(-15.f); +static const vector wendland_const_c2 = FILL_VEC(20.f); +static const vector wendland_const_c3 = FILL_VEC(-10.f); +static const vector wendland_const_c4 = FILL_VEC(0.f); +static const vector wendland_const_c5 = FILL_VEC(1.f); +#endif + +/** + * @brief Computes the kernel function and its derivative for two particles + * using interleaved vectors. + * + * Return 0 if $u > \\gamma = H/h$ + * + * @param u The ratio of the distance to the smoothing length $u = x/h$. + * @param w (return) The value of the kernel function $W(x,h)$. + * @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$. + * @param u2 The ratio of the distance to the smoothing length $u = x/h$ for + * second particle. + * @param w2 (return) The value of the kernel function $W(x,h)$ for second + * particle. + * @param dw_dx2 (return) The norm of the gradient of $|\\nabla W(x,h)|$ for + * second particle. + */ +__attribute__((always_inline)) INLINE static void kernel_deval_2_vec( + vector *u, vector *w, vector *dw_dx, vector *u2, vector *w2, + vector *dw_dx2) { + + /* Go to the range [0,1[ from [0,H[ */ + vector x, x2; + x.v = vec_mul(u->v, kernel_gamma_inv_vec.v); + x2.v = vec_mul(u2->v, kernel_gamma_inv_vec.v); + +#ifdef WENDLAND_C2_KERNEL + /* Init the iteration for Horner's scheme. */ + w->v = vec_fma(wendland_const_c0.v, x.v, wendland_const_c1.v); + w2->v = vec_fma(wendland_const_c0.v, x2.v, wendland_const_c1.v); + dw_dx->v = wendland_const_c0.v; + dw_dx2->v = wendland_const_c0.v; + + /* Calculate the polynomial interleaving vector operations */ + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); + dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v); + w->v = vec_fma(x.v, w->v, wendland_const_c2.v); + w2->v = vec_fma(x2.v, w2->v, wendland_const_c2.v); + + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); + dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v); + w->v = vec_fma(x.v, w->v, wendland_const_c3.v); + w2->v = vec_fma(x2.v, w2->v, wendland_const_c3.v); + + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); + dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v); + w->v = vec_fma(x.v, w->v, wendland_const_c4.v); + w2->v = vec_fma(x2.v, w2->v, wendland_const_c4.v); + + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); + dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v); + w->v = vec_fma(x.v, w->v, wendland_const_c5.v); + w2->v = vec_fma(x2.v, w2->v, wendland_const_c5.v); + + /* Return everything */ + w->v = + vec_mul(w->v, vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v)); + w2->v = vec_mul(w2->v, + vec_mul(kernel_constant_vec.v, kernel_gamma_inv_dim_vec.v)); + dw_dx->v = vec_mul(dw_dx->v, vec_mul(kernel_constant_vec.v, + kernel_gamma_inv_dim_plus_one_vec.v)); + dw_dx2->v = vec_mul(dw_dx2->v, vec_mul(kernel_constant_vec.v, + kernel_gamma_inv_dim_plus_one_vec.v)); +#else + + /* Load x and get the interval id. */ + vector ind, ind2; + ind.m = vec_ftoi(vec_fmin(x.v * kernel_ivals_vec.v, kernel_ivals_vec.v)); + ind2.m = vec_ftoi(vec_fmin(x2.v * kernel_ivals_vec.v, kernel_ivals_vec.v)); + + /* load the coefficients. */ + vector c[kernel_degree + 1], c2[kernel_degree + 1]; + for (int k = 0; k < VEC_SIZE; k++) + for (int j = 0; j < kernel_degree + 1; j++) { + c[j].f[k] = kernel_coeffs[ind.i[k] * (kernel_degree + 1) + j]; + c2[j].f[k] = kernel_coeffs[ind2.i[k] * (kernel_degree + 1) + j]; + } + + /* Init the iteration for Horner's scheme. */ + w->v = (c[0].v * x.v) + c[1].v; + w2->v = (c2[0].v * x2.v) + c2[1].v; + dw_dx->v = c[0].v; + dw_dx2->v = c2[0].v; + + /* And we're off! */ + for (int k = 2; k <= kernel_degree; k++) { + dw_dx->v = (dw_dx->v * x.v) + w->v; + dw_dx2->v = (dw_dx2->v * x2.v) + w2->v; + w->v = (x.v * w->v) + c[k].v; + w2->v = (x2.v * w2->v) + c2[k].v; + } + /* Return everything */ + w->v = w->v * kernel_constant_vec.v * kernel_gamma_inv_dim_vec.v; + w2->v = w2->v * kernel_constant_vec.v * kernel_gamma_inv_dim_vec.v; + dw_dx->v = + dw_dx->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v; + dw_dx2->v = + dw_dx2->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v; + +#endif +} + #endif /* Some cross-check functions */ diff --git a/src/kick.h b/src/kick.h index e3fa3bf78c7da514abacf697a9d94212020e5a7b..d6c85b5eab92a288f78f22fce2f03862bc34604f 100644 --- a/src/kick.h +++ b/src/kick.h @@ -25,34 +25,31 @@ /* Local headers. */ #include "const.h" #include "debug.h" +#include "stars.h" +#include "timeline.h" /** * @brief Perform the 'kick' operation on a #gpart * * @param gp The #gpart to kick. - * @param new_dti The (integer) time-step for this kick. + * @param ti_start The starting (integer) time of the kick + * @param ti_end The ending (integer) time of the kick * @param timeBase The minimal allowed time-step size. */ __attribute__((always_inline)) INLINE static void kick_gpart( - struct gpart *restrict gp, int new_dti, double timeBase) { + struct gpart *restrict gp, integertime_t ti_start, integertime_t ti_end, + double timeBase) { - /* Compute the time step for this kick */ - const int ti_start = (gp->ti_begin + gp->ti_end) / 2; - const int ti_end = gp->ti_end + new_dti / 2; + /* Time interval for this half-kick */ const float dt = (ti_end - ti_start) * timeBase; - const float half_dt = (ti_end - gp->ti_end) * timeBase; - - /* Move particle forward in time */ - gp->ti_begin = gp->ti_end; - gp->ti_end = gp->ti_begin + new_dti; /* Kick particles in momentum space */ gp->v_full[0] += gp->a_grav[0] * dt; gp->v_full[1] += gp->a_grav[1] * dt; gp->v_full[2] += gp->a_grav[2] * dt; - /* Extra kick work */ - gravity_kick_extra(gp, dt, half_dt); + /* Kick extra variables */ + gravity_kick_extra(gp, dt); } /** @@ -60,26 +57,26 @@ __attribute__((always_inline)) INLINE static void kick_gpart( * * @param p The #part to kick. * @param xp The #xpart of the particle. - * @param new_dti The (integer) time-step for this kick. + * @param ti_start The starting (integer) time of the kick + * @param ti_end The ending (integer) time of the kick * @param timeBase The minimal allowed time-step size. */ __attribute__((always_inline)) INLINE static void kick_part( - struct part *restrict p, struct xpart *restrict xp, int new_dti, - double timeBase) { + struct part *restrict p, struct xpart *restrict xp, integertime_t ti_start, + integertime_t ti_end, double timeBase) { - /* Compute the time step for this kick */ - const int ti_start = (p->ti_begin + p->ti_end) / 2; - const int ti_end = p->ti_end + new_dti / 2; + /* Time interval for this half-kick */ const float dt = (ti_end - ti_start) * timeBase; - const float half_dt = (ti_end - p->ti_end) * timeBase; - /* Move particle forward in time */ - p->ti_begin = p->ti_end; - p->ti_end = p->ti_begin + new_dti; - if (p->gpart != NULL) { - p->gpart->ti_begin = p->ti_begin; - p->gpart->ti_end = p->ti_end; - } +#ifdef SWIFT_DEBUG_CHECKS + if (p->ti_kick != ti_start) + error( + "Particle has not been kicked to the current time p->ti_kick=%lld, " + "ti_start=%lld, ti_end=%lld", + p->ti_kick, ti_start, ti_end); + + p->ti_kick = ti_end; +#endif /* Get the acceleration */ float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]}; @@ -99,14 +96,40 @@ __attribute__((always_inline)) INLINE static void kick_part( p->gpart->v_full[2] = xp->v_full[2]; } - /* Go back by half-step for the hydro velocity */ - p->v[0] = xp->v_full[0] - half_dt * a_tot[0]; - p->v[1] = xp->v_full[1] - half_dt * a_tot[1]; - p->v[2] = xp->v_full[2] - half_dt * a_tot[2]; - /* Extra kick work */ - hydro_kick_extra(p, xp, dt, half_dt); - if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt, half_dt); + hydro_kick_extra(p, xp, dt); + if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt); +} + +/** + * @brief Perform the 'kick' operation on a #spart + * + * @param sp The #spart to kick. + * @param ti_start The starting (integer) time of the kick + * @param ti_end The ending (integer) time of the kick + * @param timeBase The minimal allowed time-step size. + */ +__attribute__((always_inline)) INLINE static void kick_spart( + struct spart *restrict sp, integertime_t ti_start, integertime_t ti_end, + double timeBase) { + + /* Time interval for this half-kick */ + const float dt = (ti_end - ti_start) * timeBase; + + /* Acceleration from gravity */ + const float a[3] = {sp->gpart->a_grav[0], sp->gpart->a_grav[1], + sp->gpart->a_grav[2]}; + + /* Kick particles in momentum space */ + sp->v[0] += a[0] * dt; + sp->v[1] += a[1] * dt; + sp->v[2] += a[2] * dt; + sp->gpart->v_full[0] = sp->v[0]; + sp->gpart->v_full[1] = sp->v[1]; + sp->gpart->v_full[2] = sp->v[2]; + + /* Kick extra variables */ + star_kick_extra(sp, dt); } #endif /* SWIFT_KICK_H */ diff --git a/src/logger.c b/src/logger.c new file mode 100644 index 0000000000000000000000000000000000000000..b2acf47aa70cef55f53d296033f6f5c6162fd5bd --- /dev/null +++ b/src/logger.c @@ -0,0 +1,446 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +/* This object's header. */ +#include "logger.h" + +/* Local headers. */ +#include "atomic.h" +#include "dump.h" +#include "error.h" +#include "part.h" + +/** + * @brief Compute the size of a message given its mask. + * + * @param mask The mask that will be used to dump a #part or #gpart. + * + * @return The size of the logger message in bytes. + */ + +int logger_size(unsigned int mask) { + + /* Start with 8 bytes for the header. */ + int size = 8; + + /* Is this a particle or a timestep? */ + if (mask & logger_mask_timestamp) { + + /* The timestamp should not contain any other bits. */ + if (mask != logger_mask_timestamp) + error("Timestamps should not include any other data."); + + /* A timestamp consists of an unsigned long long int. */ + size += sizeof(unsigned long long int); + + } else { + + /* Particle position as three doubles. */ + if (mask & logger_mask_x) size += 3 * sizeof(double); + + /* Particle velocity as three floats. */ + if (mask & logger_mask_v) size += 3 * sizeof(float); + + /* Particle accelleration as three floats. */ + if (mask & logger_mask_a) size += 3 * sizeof(float); + + /* Particle internal energy as a single float. */ + if (mask & logger_mask_u) size += sizeof(float); + + /* Particle smoothing length as a single float. */ + if (mask & logger_mask_h) size += sizeof(float); + + /* Particle density as a single float. */ + if (mask & logger_mask_rho) size += sizeof(float); + + /* Particle constants, which is a bit more complicated. */ + if (mask & logger_mask_rho) { + size += sizeof(float) + // mass + sizeof(long long); // id + } + } + + return size; +} + +/** + * @brief Dump a #part to the log. + * + * @param p The #part to dump. + * @param mask The mask of the data to dump. + * @param offset Pointer to the offset of the previous log of this particle. + * @param dump The #dump in which to log the particle data. + */ + +void logger_log_part(struct part *p, unsigned int mask, size_t *offset, + struct dump *dump) { + + /* Make sure we're not writing a timestamp. */ + if (mask & logger_mask_timestamp) + error("You should not log particles as timestamps."); + + /* Start by computing the size of the message. */ + const int size = logger_size(mask); + + /* Allocate a chunk of memory in the dump of the right size. */ + size_t offset_new; + char *buff = dump_get(dump, size, &offset_new); + + /* Write the header. */ + uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) | + ((uint64_t)mask << 56); + memcpy(buff, &temp, 8); + buff += 8; + + /* Particle position as three doubles. */ + if (mask & logger_mask_x) { + memcpy(buff, p->x, 3 * sizeof(double)); + buff += 3 * sizeof(double); + } + + /* Particle velocity as three floats. */ + if (mask & logger_mask_v) { + memcpy(buff, p->v, 3 * sizeof(float)); + buff += 3 * sizeof(float); + } + + /* Particle accelleration as three floats. */ + if (mask & logger_mask_a) { + memcpy(buff, p->a_hydro, 3 * sizeof(float)); + buff += 3 * sizeof(float); + } + +#if defined(GADGET2_SPH) + + /* Particle internal energy as a single float. */ + if (mask & logger_mask_u) { + memcpy(buff, &p->entropy, sizeof(float)); + buff += sizeof(float); + } + + /* Particle smoothing length as a single float. */ + if (mask & logger_mask_h) { + memcpy(buff, &p->h, sizeof(float)); + buff += sizeof(float); + } + + /* Particle density as a single float. */ + if (mask & logger_mask_rho) { + memcpy(buff, &p->rho, sizeof(float)); + buff += sizeof(float); + } + + /* Particle constants, which is a bit more complicated. */ + if (mask & logger_mask_rho) { + memcpy(buff, &p->mass, sizeof(float)); + buff += sizeof(float); + memcpy(buff, &p->id, sizeof(long long)); + buff += sizeof(long long); + } + +#endif + + /* Update the log message offset. */ + *offset = offset_new; +} + +/** + * @brief Dump a #gpart to the log. + * + * @param p The #gpart to dump. + * @param mask The mask of the data to dump. + * @param offset Pointer to the offset of the previous log of this particle. + * @param dump The #dump in which to log the particle data. + */ + +void logger_log_gpart(struct gpart *p, unsigned int mask, size_t *offset, + struct dump *dump) { + + /* Make sure we're not writing a timestamp. */ + if (mask & logger_mask_timestamp) + error("You should not log particles as timestamps."); + + /* Make sure we're not looging fields not supported by gparts. */ + if (mask & (logger_mask_u | logger_mask_rho)) + error("Can't log SPH quantities for gparts."); + + /* Start by computing the size of the message. */ + const int size = logger_size(mask); + + /* Allocate a chunk of memory in the dump of the right size. */ + size_t offset_new; + char *buff = dump_get(dump, size, &offset_new); + + /* Write the header. */ + uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) | + ((uint64_t)mask << 56); + memcpy(buff, &temp, 8); + buff += 8; + + /* Particle position as three doubles. */ + if (mask & logger_mask_x) { + memcpy(buff, p->x, 3 * sizeof(double)); + buff += 3 * sizeof(double); + } + + /* Particle velocity as three floats. */ + if (mask & logger_mask_v) { + memcpy(buff, p->v_full, 3 * sizeof(float)); + buff += 3 * sizeof(float); + } + + /* Particle accelleration as three floats. */ + if (mask & logger_mask_a) { + memcpy(buff, p->a_grav, 3 * sizeof(float)); + buff += 3 * sizeof(float); + } + + /* Particle smoothing length as a single float. */ + if (mask & logger_mask_h) { + memcpy(buff, &p->epsilon, sizeof(float)); + buff += sizeof(float); + } + + /* Particle constants, which is a bit more complicated. */ + if (mask & logger_mask_rho) { + memcpy(buff, &p->mass, sizeof(float)); + buff += sizeof(float); + memcpy(buff, &p->id_or_neg_offset, sizeof(long long)); + buff += sizeof(long long); + } + + /* Update the log message offset. */ + *offset = offset_new; +} + +void logger_log_timestamp(unsigned long long int timestamp, size_t *offset, + struct dump *dump) { + + /* Start by computing the size of the message. */ + const int size = logger_size(logger_mask_timestamp); + + /* Allocate a chunk of memory in the dump of the right size. */ + size_t offset_new; + char *buff = dump_get(dump, size, &offset_new); + + /* Write the header. */ + uint64_t temp = (((uint64_t)(offset_new - *offset)) & 0xffffffffffffffULL) | + ((uint64_t)logger_mask_timestamp << 56); + memcpy(buff, &temp, 8); + buff += 8; + + /* Store the timestamp. */ + memcpy(buff, ×tamp, sizeof(unsigned long long int)); + + /* Update the log message offset. */ + *offset = offset_new; +} + +/** + * @brief Read a logger message and store the data in a #part. + * + * @param p The #part in which to store the values. + * @param offset Pointer to the offset of the logger message in the buffer, + * will be overwritten with the offset of the previous message. + * @param buff Pointer to the start of an encoded logger message. + * + * @return The mask containing the values read. + */ + +int logger_read_part(struct part *p, size_t *offset, const char *buff) { + + /* Jump to the offset. */ + buff = &buff[*offset]; + + /* Start by reading the logger mask for this entry. */ + uint64_t temp; + memcpy(&temp, buff, 8); + const int mask = temp >> 56; + *offset -= temp & 0xffffffffffffffULL; + buff += 8; + + /* We are only interested in particle data. */ + if (mask & logger_mask_timestamp) + error("Trying to read timestamp as particle."); + + /* Particle position as three doubles. */ + if (mask & logger_mask_x) { + memcpy(p->x, buff, 3 * sizeof(double)); + buff += 3 * sizeof(double); + } + + /* Particle velocity as three floats. */ + if (mask & logger_mask_v) { + memcpy(p->v, buff, 3 * sizeof(float)); + buff += 3 * sizeof(float); + } + + /* Particle accelleration as three floats. */ + if (mask & logger_mask_a) { + memcpy(p->a_hydro, buff, 3 * sizeof(float)); + buff += 3 * sizeof(float); + } + +#if defined(GADGET2_SPH) + + /* Particle internal energy as a single float. */ + if (mask & logger_mask_u) { + memcpy(&p->entropy, buff, sizeof(float)); + buff += sizeof(float); + } + + /* Particle smoothing length as a single float. */ + if (mask & logger_mask_h) { + memcpy(&p->h, buff, sizeof(float)); + buff += sizeof(float); + } + + /* Particle density as a single float. */ + if (mask & logger_mask_rho) { + memcpy(&p->rho, buff, sizeof(float)); + buff += sizeof(float); + } + + /* Particle constants, which is a bit more complicated. */ + if (mask & logger_mask_rho) { + memcpy(&p->mass, buff, sizeof(float)); + buff += sizeof(float); + memcpy(&p->id, buff, sizeof(long long)); + buff += sizeof(long long); + } + +#endif + + /* Finally, return the mask of the values we just read. */ + return mask; +} + +/** + * @brief Read a logger message and store the data in a #gpart. + * + * @param p The #gpart in which to store the values. + * @param offset Pointer to the offset of the logger message in the buffer, + * will be overwritten with the offset of the previous message. + * @param buff Pointer to the start of an encoded logger message. + * + * @return The mask containing the values read. + */ + +int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff) { + + /* Jump to the offset. */ + buff = &buff[*offset]; + + /* Start by reading the logger mask for this entry. */ + uint64_t temp; + memcpy(&temp, buff, 8); + const int mask = temp >> 56; + *offset -= temp & 0xffffffffffffffULL; + buff += 8; + + /* We are only interested in particle data. */ + if (mask & logger_mask_timestamp) + error("Trying to read timestamp as particle."); + + /* We can't store all part fields in a gpart. */ + if (mask & (logger_mask_u | logger_mask_rho)) + error("Trying to read SPH quantities into a gpart."); + + /* Particle position as three doubles. */ + if (mask & logger_mask_x) { + memcpy(p->x, buff, 3 * sizeof(double)); + buff += 3 * sizeof(double); + } + + /* Particle velocity as three floats. */ + if (mask & logger_mask_v) { + memcpy(p->v_full, buff, 3 * sizeof(float)); + buff += 3 * sizeof(float); + } + + /* Particle accelleration as three floats. */ + if (mask & logger_mask_a) { + memcpy(p->a_grav, buff, 3 * sizeof(float)); + buff += 3 * sizeof(float); + } + + /* Particle smoothing length as a single float. */ + if (mask & logger_mask_h) { + memcpy(&p->epsilon, buff, sizeof(float)); + buff += sizeof(float); + } + + /* Particle constants, which is a bit more complicated. */ + if (mask & logger_mask_rho) { + memcpy(&p->mass, buff, sizeof(float)); + buff += sizeof(float); + memcpy(&p->id_or_neg_offset, buff, sizeof(long long)); + buff += sizeof(long long); + } + + /* Finally, return the mask of the values we just read. */ + return mask; +} + +/** + * @brief Read a logger message for a timestamp. + * + * @param t The timestamp in which to store the value. + * @param offset Pointer to the offset of the logger message in the buffer, + * will be overwritten with the offset of the previous message. + * @param buff Pointer to the start of an encoded logger message. + * + * @return The mask containing the values read. + */ + +int logger_read_timestamp(unsigned long long int *t, size_t *offset, + const char *buff) { + + /* Jump to the offset. */ + buff = &buff[*offset]; + + /* Start by reading the logger mask for this entry. */ + uint64_t temp; + memcpy(&temp, buff, 8); + const int mask = temp >> 56; + *offset -= temp & 0xffffffffffffffULL; + buff += 8; + + /* We are only interested in timestamps. */ + if (!(mask & logger_mask_timestamp)) + error("Trying to read timestamp from a particle."); + + /* Make sure we don't have extra fields. */ + if (mask != logger_mask_timestamp) + error("Timestamp message contains extra fields."); + + /* Copy the timestamp value from the buffer. */ + memcpy(t, buff, sizeof(unsigned long long int)); + + /* Finally, return the mask of the values we just read. */ + return mask; +} diff --git a/src/logger.h b/src/logger.h new file mode 100644 index 0000000000000000000000000000000000000000..32fae752c2ae13a143809d9df3030dbc06b0942d --- /dev/null +++ b/src/logger.h @@ -0,0 +1,87 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_LOGGER_H +#define SWIFT_LOGGER_H + +/* Includes. */ +#include "dump.h" +#include "part.h" + +/** + * Logger entries contain messages representing the particle data at a given + * point in time during the simulation. + * + * The logger messages always start with an 8-byte header structured as + * follows: + * + * data: [ mask | offset ] + * byte: [ 01 | 02 | 03 | 04 | 05 | 06 | 07 | 08 ] + * + * I.e. a first "mask" byte followed by 7 "offset" bytes. The mask contains + * information on what kind of data is packed after the header. The mask + * bits correspond to the following data: + * + * bit | name | size | comment + * ------------------------------------------------------------------------- + * 0 | x | 24 | The particle position, in absolute coordinates, + * | | | stored as three doubles. + * 1 | v | 12 | Particle velocity, stored as three floats. + * 2 | a | 12 | Particle acceleration, stored as three floats. + * 3 | u | 4 | Particle internal energy (or entropy, if Gadget-SPH + * | | | is used), stored as a single float. + * 4 | h | 4 | Particle smoothing length (or epsilon, if a gpart), + * | | | stored as a single float. + * 5 | rho | 4 | Particle density, stored as a single float. + * 6 | consts | 12 | Particle constants, i.e. mass and ID. + * 7 | time | 8 | Timestamp, not associated with a particle, just + * | | | marks the transitions from one timestep to another. + * + * There is no distinction between gravity and SPH particles. + * + * The offset refers to the relative location of the previous message for the + * same particle or for the previous timestamp (if mask bit 7 is set). I.e. + * the previous log entry will be at the address of the current mask byte minus + * the unsigned value stored in the offset. An offset of zero indicates that + * this is the first message for the given particle/timestamp. + */ + +/* Some constants. */ +#define logger_mask_x 1 +#define logger_mask_v 2 +#define logger_mask_a 4 +#define logger_mask_u 8 +#define logger_mask_h 16 +#define logger_mask_rho 32 +#define logger_mask_consts 64 +#define logger_mask_timestamp 128 + +/* Function prototypes. */ +int logger_size(unsigned int mask); +void logger_log_part(struct part *p, unsigned int mask, size_t *offset, + struct dump *dump); +void logger_log_gpart(struct gpart *p, unsigned int mask, size_t *offset, + struct dump *dump); +void logger_log_timestamp(unsigned long long int t, size_t *offset, + struct dump *dump); +int logger_read_part(struct part *p, size_t *offset, const char *buff); +int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff); +int logger_read_timestamp(unsigned long long int *t, size_t *offset, + const char *buff); + +#endif /* SWIFT_LOGGER_H */ diff --git a/src/memswap.h b/src/memswap.h index 4643725535917952d12927d52187bc7306ced5ef..92c902eeb158978d4a606f5f2a9416d4113fae0b 100644 --- a/src/memswap.h +++ b/src/memswap.h @@ -32,24 +32,27 @@ #include <altivec.h> #endif -/* Macro for in-place swap of two values a and b of type t. */ -#define swap_loop(t, a, b, c) \ - while (c >= sizeof(t)) { \ - register t temp = *(t *)a; \ - *(t *)a = *(t *)b; \ - *(t *)b = temp; \ - a += sizeof(t); \ - b += sizeof(t); \ - bytes -= sizeof(t); \ +/* Macro for in-place swap of two values a and b of type t. a and b are + assumed to be of type char* so that the pointer arithmetic works. */ +#define swap_loop(type, a, b, count) \ + while (count >= sizeof(type)) { \ + register type temp = *(type *)a; \ + *(type *)a = *(type *)b; \ + *(type *)b = temp; \ + a += sizeof(type); \ + b += sizeof(type); \ + count -= sizeof(type); \ } /** * @brief Swap the contents of two elements in-place. * - * Keep in mind that this function works best when the underlying data + * Keep in mind that this function only works when the underlying data * is aligned to the vector length, e.g. with the @c * __attribute__((aligned(32))) - * syntax, and the code is compiled with @c -funroll-loops. + * syntax! + * Furthermore, register re-labeling only seems to work when the code is + * compiled with @c -funroll-loops. * * @param void_a Pointer to the first element. * @param void_b Pointer to the second element. @@ -76,4 +79,63 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b, swap_loop(char, a, b, bytes); } +/** + * @brief Swap the contents of two elements in-place. + * + * As opposed to #memswap, this function does not require the parameters + * to be aligned in any specific way. + * Furthermore, register re-labeling only seems to work when the code is + * compiled with @c -funroll-loops. + * + * @param void_a Pointer to the first element. + * @param void_b Pointer to the second element. + * @param bytes Size, in bytes, of the data pointed to by @c a and @c b. + */ +__attribute__((always_inline)) inline void memswap_unaligned(void *void_a, + void *void_b, + size_t bytes) { + char *a = (char *)void_a, *b = (char *)void_b; +#ifdef __AVX512F__ + while (bytes >= sizeof(__m512i)) { + register __m512i temp; + temp = _mm512_loadu_si512((__m512i *)a); + _mm512_storeu_si512((__m512i *)a, _mm512_loadu_si512((__m512i *)b)); + _mm512_storeu_si512((__m512i *)b, temp); + a += sizeof(__m512i); + b += sizeof(__m512i); + bytes -= sizeof(__m512i); + } +#endif +#ifdef __AVX__ + while (bytes >= sizeof(__m256i)) { + register __m256i temp; + temp = _mm256_loadu_si256((__m256i *)a); + _mm256_storeu_si256((__m256i *)a, _mm256_loadu_si256((__m256i *)b)); + _mm256_storeu_si256((__m256i *)b, temp); + a += sizeof(__m256i); + b += sizeof(__m256i); + bytes -= sizeof(__m256i); + } +#endif +#ifdef __SSE2__ + while (bytes >= sizeof(__m128i)) { + register __m128i temp; + temp = _mm_loadu_si128((__m128i *)a); + _mm_storeu_si128((__m128i *)a, _mm_loadu_si128((__m128i *)b)); + _mm_storeu_si128((__m128i *)b, temp); + a += sizeof(__m128i); + b += sizeof(__m128i); + bytes -= sizeof(__m128i); + } +#endif +#ifdef __ALTIVEC__ + // Power8 supports unaligned load/stores, but not sure what it will do here. + swap_loop(vector int, a, b, bytes); +#endif + swap_loop(size_t, a, b, bytes); + swap_loop(int, a, b, bytes); + swap_loop(short, a, b, bytes); + swap_loop(char, a, b, bytes); +} + #endif /* SWIFT_MEMSWAP_H */ diff --git a/src/parallel_io.c b/src/parallel_io.c index 66c9203e39e56d520eeace8858b0c618b45e6a22..e429ff641961da342187f0c297eba8041cfcc51a 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -46,6 +46,7 @@ #include "io_properties.h" #include "kernel_hydro.h" #include "part.h" +#include "stars_io.h" #include "units.h" /** @@ -373,18 +374,22 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - size_t* Ngas, size_t* Ngparts, int* periodic, - int* flag_entropy, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info, int dry_run) { + struct spart** sparts, size_t* Ngas, size_t* Ngparts, + size_t* Nstars, int* periodic, int* flag_entropy, + int with_hydro, int with_gravity, int with_stars, + int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, + int dry_run) { + hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; - int numParticles[NUM_PARTICLE_TYPES] = {0}; - int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + long long numParticles[NUM_PARTICLE_TYPES] = {0}; + long long numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; size_t N[NUM_PARTICLE_TYPES] = {0}; long long N_total[NUM_PARTICLE_TYPES] = {0}; long long offset[NUM_PARTICLE_TYPES] = {0}; int dimension = 3; /* Assume 3D if nothing is specified */ + size_t Ndm = 0; /* Open file */ /* message("Opening file '%s' as IC.", fileName); */ @@ -425,21 +430,21 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units, readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp); *flag_entropy = flag_entropy_temp[0]; readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); - readAttribute(h_grp, "NumPart_Total", UINT, numParticles); - readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); + readAttribute(h_grp, "NumPart_Total", LONGLONG, numParticles); + readAttribute(h_grp, "NumPart_Total_HighWord", LONGLONG, + numParticles_highWord); for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) - N_total[ptype] = ((long long)numParticles[ptype]) + - ((long long)numParticles_highWord[ptype] << 32); + N_total[ptype] = + (numParticles[ptype]) + (numParticles_highWord[ptype] << 32); dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; - /* message("Found %d particles in a %speriodic box of size - * [%f %f %f].", */ - /* N_total, (periodic ? "": "non-"), dim[0], - * dim[1], dim[2]); */ + /* message("Found %lld particles in a %speriodic box of size [%f %f %f].", */ + /* N_total[0], (periodic ? "": "non-"), dim[0], */ + /* dim[1], dim[2]); */ /* Divide the particles among the tasks. */ for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { @@ -492,29 +497,38 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units, units_conversion_factor(ic_units, internal_units, UNIT_CONV_LENGTH); /* Allocate memory to store SPH particles */ - *Ngas = N[0]; - if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) != - 0) - error("Error while allocating memory for particles"); - bzero(*parts, *Ngas * sizeof(struct part)); - - /* Allocate memory to store all particles */ - const size_t Ndm = N[1]; - *Ngparts = N[1] + N[0]; - if (posix_memalign((void*)gparts, gpart_align, - *Ngparts * sizeof(struct gpart)) != 0) - error( - "Error while allocating memory for gravity " - "particles"); - bzero(*gparts, *Ngparts * sizeof(struct gpart)); - - /* message("Allocated %8.2f MB for particles.", *N * - * sizeof(struct part) / + if (with_hydro) { + *Ngas = N[0]; + if (posix_memalign((void*)parts, part_align, + (*Ngas) * sizeof(struct part)) != 0) + error("Error while allocating memory for particles"); + bzero(*parts, *Ngas * sizeof(struct part)); + } + + /* Allocate memory to store star particles */ + if (with_stars) { + *Nstars = N[STAR]; + if (posix_memalign((void*)sparts, spart_align, + *Nstars * sizeof(struct spart)) != 0) + error("Error while allocating memory for star particles"); + bzero(*sparts, *Nstars * sizeof(struct spart)); + } + + /* Allocate memory to store gravity particles */ + if (with_gravity) { + Ndm = N[1]; + *Ngparts = (with_hydro ? N[GAS] : 0) + N[DM] + (with_stars ? N[STAR] : 0); + if (posix_memalign((void*)gparts, gpart_align, + *Ngparts * sizeof(struct gpart)) != 0) + error("Error while allocating memory for gravity particles"); + bzero(*gparts, *Ngparts * sizeof(struct gpart)); + } + + /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / * (1024.*1024.)); */ /* message("BoxSize = %lf", dim[0]); */ - /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, - * *Ngparts); */ + /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */ /* Loop over all particle types */ for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { @@ -539,13 +553,24 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units, switch (ptype) { case GAS: - Nparticles = *Ngas; - hydro_read_particles(*parts, list, &num_fields); + if (with_hydro) { + Nparticles = *Ngas; + hydro_read_particles(*parts, list, &num_fields); + } break; case DM: - Nparticles = Ndm; - darkmatter_read_particles(*gparts, list, &num_fields); + if (with_gravity) { + Nparticles = Ndm; + darkmatter_read_particles(*gparts, list, &num_fields); + } + break; + + case STAR: + if (with_stars) { + Nparticles = *Nstars; + star_read_particles(*sparts, list, &num_fields); + } break; default: @@ -563,10 +588,15 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units, } /* Prepare the DM particles */ - if (!dry_run) prepare_dm_gparts(*gparts, Ndm); + if (!dry_run && with_gravity) prepare_dm_gparts(*gparts, Ndm); - /* Now duplicate the hydro particle into gparts */ - if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + /* Duplicate the hydro particles into gparts */ + if (!dry_run && with_gravity && with_hydro) + duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + + /* Duplicate the star particles into gparts */ + if (!dry_run && with_gravity && with_stars) + duplicate_star_gparts(*sparts, *gparts, *Nstars, Ndm + *Ngas); /* message("Done Reading particles..."); */ @@ -609,17 +639,19 @@ void write_output_parallel(struct engine* e, const char* baseName, hid_t h_file = 0, h_grp = 0; const size_t Ngas = e->s->nr_parts; + const size_t Nstars = e->s->nr_sparts; const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; int numFiles = 1; struct part* parts = e->s->parts; struct gpart* gparts = e->s->gparts; struct gpart* dmparts = NULL; + struct spart* sparts = e->s->sparts; static int outputCount = 0; FILE* xmfFile = 0; /* Number of unassociated gparts */ - const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; /* File name */ char fileName[FILENAME_BUFFER_SIZE]; @@ -642,16 +674,16 @@ void write_output_parallel(struct engine* e, const char* baseName, /* Compute offset in the file and total number of * particles */ - size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; + size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0, 0, Nstars, 0}; long long N_total[NUM_PARTICLE_TYPES] = {0}; long long offset[NUM_PARTICLE_TYPES] = {0}; - MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm); + MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG_INT, MPI_SUM, comm); for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) N_total[ptype] = offset[ptype] + N[ptype]; /* The last rank now has the correct N_total. Let's * broadcast from there */ - MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm); + MPI_Bcast(&N_total, 6, MPI_LONG_LONG_INT, mpi_size - 1, comm); /* Now everybody konws its offset and the total number of * particles of each @@ -816,9 +848,11 @@ void write_output_parallel(struct engine* e, const char* baseName, /* Write DM particles */ Nparticles = Ndm; darkmatter_write_particles(dmparts, list, &num_fields); + break; - /* Free temporary array */ - free(dmparts); + case STAR: + Nparticles = Nstars; + star_write_particles(sparts, list, &num_fields); break; default: @@ -832,7 +866,10 @@ void write_output_parallel(struct engine* e, const char* baseName, internal_units, snapshot_units); /* Free temporary array */ - free(dmparts); + if (dmparts) { + free(dmparts); + dmparts = 0; + } /* Close particle group */ H5Gclose(h_grp); diff --git a/src/parallel_io.h b/src/parallel_io.h index e5b12aa50c30b4d63ccc81835d2d8454e01b3889..e4cb9f5976bc0f5b55207a7422597a05feaa3d5e 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -36,9 +36,11 @@ void read_ic_parallel(char* fileName, const struct UnitSystem* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - size_t* Ngas, size_t* Ngparts, int* periodic, - int* flag_entropy, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info, int dry_run); + struct spart** sparts, size_t* Ngas, size_t* Ngparts, + size_t* Nsparts, int* periodic, int* flag_entropy, + int with_hydro, int with_gravity, int with_stars, + int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, + int dry_run); void write_output_parallel(struct engine* e, const char* baseName, const struct UnitSystem* internal_units, diff --git a/src/part.c b/src/part.c index b00eaccaae0e86f7c4e8019a307f0bf455687b7c..ecc5ca977ae0716b395cdd61af97382e6603186e 100644 --- a/src/part.c +++ b/src/part.c @@ -36,7 +36,8 @@ * @param N The number of particles to re-link; * @param offset The offset of #part%s relative to the global parts list. */ -void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset) { +void part_relink_gparts_to_parts(struct part *parts, size_t N, + ptrdiff_t offset) { for (size_t k = 0; k < N; k++) { if (parts[k].gpart) { parts[k].gpart->id_or_neg_offset = -(k + offset); @@ -45,28 +46,194 @@ void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset) { } /** - * @brief Re-link the #gpart%s associated with the list of #part%s. + * @brief Re-link the #gpart%s associated with the list of #spart%s. + * + * @param sparts The list of #spart. + * @param N The number of s-particles to re-link; + * @param offset The offset of #spart%s relative to the global sparts list. + */ +void part_relink_gparts_to_sparts(struct spart *sparts, size_t N, + ptrdiff_t offset) { + for (size_t k = 0; k < N; k++) { + if (sparts[k].gpart) { + sparts[k].gpart->id_or_neg_offset = -(k + offset); + } + } +} + +/** + * @brief Re-link the #part%s associated with the list of #gpart%s. * * @param gparts The list of #gpart. * @param N The number of particles to re-link; - * @param parts The global part array in which to find the #gpart offsets. + * @param parts The global #part array in which to find the #gpart offsets. */ -void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts) { +void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, + struct part *parts) { for (size_t k = 0; k < N; k++) { - if (gparts[k].id_or_neg_offset <= 0) { + if (gparts[k].type == swift_type_gas) { parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } } } +/** + * @brief Re-link the #spart%s associated with the list of #gpart%s. + * + * @param gparts The list of #gpart. + * @param N The number of particles to re-link; + * @param sparts The global #spart array in which to find the #gpart offsets. + */ +void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, + struct spart *sparts) { + for (size_t k = 0; k < N; k++) { + if (gparts[k].type == swift_type_star) { + sparts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; + } + } +} + +/** + * @brief Verifies that the #gpart, #part and #spart are correctly linked + * together + * and that the particle poisitions match. + * + * This is a debugging function. + * + * @param parts The #part array. + * @param gparts The #gpart array. + * @param sparts The #spart array. + * @param nr_parts The number of #part in the array. + * @param nr_gparts The number of #gpart in the array. + * @param nr_sparts The number of #spart in the array. + * @param verbose Do we report verbosely in case of success ? + */ +void part_verify_links(struct part *parts, struct gpart *gparts, + struct spart *sparts, size_t nr_parts, size_t nr_gparts, + size_t nr_sparts, int verbose) { + + for (size_t k = 0; k < nr_gparts; ++k) { + + /* We have a DM particle */ + if (gparts[k].type == swift_type_dark_matter) { + + /* Check that it's not linked */ + if (gparts[k].id_or_neg_offset < 0) + error("DM gpart particle linked to something !"); + } + + /* We have a gas particle */ + else if (gparts[k].type == swift_type_gas) { + + /* Check that it is linked */ + if (gparts[k].id_or_neg_offset > 0) + error("Gas gpart not linked to anything !"); + + /* Find its link */ + const struct part *part = &parts[-gparts[k].id_or_neg_offset]; + + /* Check the reverse link */ + if (part->gpart != &gparts[k]) error("Linking problem !"); + + /* Check that the particles are at the same place */ + if (gparts[k].x[0] != part->x[0] || gparts[k].x[1] != part->x[1] || + gparts[k].x[2] != part->x[2]) + error( + "Linked particles are not at the same position !\n" + "gp->x=[%e %e %e] p->x=[%e %e %e] diff=[%e %e %e]", + gparts[k].x[0], gparts[k].x[1], gparts[k].x[2], part->x[0], + part->x[1], part->x[2], gparts[k].x[0] - part->x[0], + gparts[k].x[1] - part->x[1], gparts[k].x[2] - part->x[2]); + + /* Check that the particles are at the same time */ + if (gparts[k].time_bin != part->time_bin) + error("Linked particles are not at the same time !"); + } + + else if (gparts[k].type == swift_type_star) { + + /* Check that it is linked */ + if (gparts[k].id_or_neg_offset > 0) + error("Star gpart not linked to anything !"); + + /* Find its link */ + const struct spart *spart = &sparts[-gparts[k].id_or_neg_offset]; + + /* Check the reverse link */ + if (spart->gpart != &gparts[k]) error("Linking problem !"); + + /* Check that the particles are at the same place */ + if (gparts[k].x[0] != spart->x[0] || gparts[k].x[1] != spart->x[1] || + gparts[k].x[2] != spart->x[2]) + error( + "Linked particles are not at the same position !\n" + "gp->x=[%e %e %e] sp->x=[%e %e %e] diff=[%e %e %e]", + gparts[k].x[0], gparts[k].x[1], gparts[k].x[2], spart->x[0], + spart->x[1], spart->x[2], gparts[k].x[0] - spart->x[0], + gparts[k].x[1] - spart->x[1], gparts[k].x[2] - spart->x[2]); + + /* Check that the particles are at the same time */ + if (gparts[k].time_bin != spart->time_bin) + error("Linked particles are not at the same time !"); + } + } + + /* Now check that all parts are linked */ + for (size_t k = 0; k < nr_parts; ++k) { + + /* Ok, there is a link */ + if (parts[k].gpart != NULL) { + + /* Check the link */ + if (parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) { + error("Linking problem !"); + } + + /* Check that the particles are at the same place */ + if (parts[k].x[0] != parts[k].gpart->x[0] || + parts[k].x[1] != parts[k].gpart->x[1] || + parts[k].x[2] != parts[k].gpart->x[2]) + error("Linked particles are not at the same position !"); + + /* Check that the particles are at the same time */ + if (parts[k].time_bin != parts[k].gpart->time_bin) + error("Linked particles are not at the same time !"); + } + } + + /* Now check that all sparts are linked */ + for (size_t k = 0; k < nr_sparts; ++k) { + + /* Ok, there is a link */ + if (sparts[k].gpart != NULL) { + + /* Check the link */ + if (sparts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) { + error("Linking problem !"); + + /* Check that the particles are at the same place */ + if (sparts[k].x[0] != sparts[k].gpart->x[0] || + sparts[k].x[1] != sparts[k].gpart->x[1] || + sparts[k].x[2] != sparts[k].gpart->x[2]) + error("Linked particles are not at the same position !"); + + /* Check that the particles are at the same time */ + if (sparts[k].time_bin != sparts[k].gpart->time_bin) + error("Linked particles are not at the same time !"); + } + } + } + + if (verbose) message("All links OK"); +} + #ifdef WITH_MPI /* MPI data type for the particle transfers */ MPI_Datatype part_mpi_type; MPI_Datatype xpart_mpi_type; MPI_Datatype gpart_mpi_type; -#endif +MPI_Datatype spart_mpi_type; -#ifdef WITH_MPI /** * @brief Registers MPI particle types. */ @@ -93,5 +260,10 @@ void part_create_mpi_types() { MPI_Type_commit(&gpart_mpi_type) != MPI_SUCCESS) { error("Failed to create MPI type for gparts."); } + if (MPI_Type_contiguous(sizeof(struct spart) / sizeof(unsigned char), + MPI_BYTE, &spart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&spart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for sparts."); + } } #endif diff --git a/src/part.h b/src/part.h index 0bf4359f891619b0900f8aa9f17b2a2a71127579..4ed4b490964b59239faf170218cd099d225f5edd 100644 --- a/src/part.h +++ b/src/part.h @@ -32,10 +32,13 @@ /* Local headers. */ #include "align.h" +#include "part_type.h" +#include "timeline.h" /* Some constants. */ #define part_align 128 #define xpart_align 128 +#define spart_align 128 #define gpart_align 128 /* Import the right hydro particle definition */ @@ -62,13 +65,27 @@ /* Import the right gravity particle definition */ #include "./gravity/Default/gravity_part.h" -void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset); -void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts); +/* Import the right star particle definition */ +#include "./stars/Default/star_part.h" + +void part_relink_gparts_to_parts(struct part *parts, size_t N, + ptrdiff_t offset); +void part_relink_gparts_to_sparts(struct spart *sparts, size_t N, + ptrdiff_t offset); +void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, + struct part *parts); +void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, + struct spart *sparts); +void part_verify_links(struct part *parts, struct gpart *gparts, + struct spart *sparts, size_t nr_parts, size_t nr_gparts, + size_t nr_sparts, int verbose); + #ifdef WITH_MPI /* MPI data type for the particle transfers */ extern MPI_Datatype part_mpi_type; extern MPI_Datatype xpart_mpi_type; extern MPI_Datatype gpart_mpi_type; +extern MPI_Datatype spart_mpi_type; void part_create_mpi_types(); #endif diff --git a/src/part_type.h b/src/part_type.h new file mode 100644 index 0000000000000000000000000000000000000000..2c564d6908c8887e8fa8a5197a0a92ed85cbe5bb --- /dev/null +++ b/src/part_type.h @@ -0,0 +1,34 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PART_TYPES_H +#define SWIFT_PART_TYPES_H + +/** + * @brief The different types of particles a #gpart can link to. + * + * Note we use the historical values from Gadget for these fields. + */ +enum part_type { + swift_type_gas = 0, + swift_type_dark_matter = 1, + swift_type_star = 4, + swift_type_black_hole = 5 +} __attribute__((packed)); + +#endif /* SWIFT_PART_TYPES_H */ diff --git a/src/partition.c b/src/partition.c index 90fbc53c9f5898b8fcb6133f1411e7ee1f0bcb95..3e21c2b51b20501631e080ffc3f9e7d8df5dab20 100644 --- a/src/partition.c +++ b/src/partition.c @@ -522,8 +522,9 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, /* Skip un-interesting tasks. */ if (t->type != task_type_self && t->type != task_type_pair && t->type != task_type_sub_self && t->type != task_type_sub_self && - t->type != task_type_ghost && t->type != task_type_kick && - t->type != task_type_init) + t->type != task_type_ghost && t->type != task_type_kick1 && + t->type != task_type_kick2 && t->type != task_type_timestep && + t->type != task_type_drift && t->type != task_type_init) continue; /* Get the task weight. */ @@ -554,7 +555,9 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, int cid = ci - cells; /* Different weights for different tasks. */ - if (t->type == task_type_ghost || t->type == task_type_kick) { + if (t->type == task_type_ghost || t->type == task_type_kick1 || + t->type == task_type_kick2 || t->type == task_type_timestep || + t->type == task_type_drift) { /* Particle updates add only to vertex weight. */ if (taskvweights) weights_v[cid] += w; diff --git a/src/potential.h b/src/potential.h index c462806e206e0e0455bf7094708ab003b7ca9682..116ea8302e7f706cdb861540a89d562174d73408 100644 --- a/src/potential.h +++ b/src/potential.h @@ -34,8 +34,6 @@ #include "./potential/point_mass/potential.h" #elif defined(EXTERNAL_POTENTIAL_ISOTHERMAL) #include "./potential/isothermal/potential.h" -#elif defined(EXTERNAL_POTENTIAL_SOFTENED_ISOTHERMAL) -#include "./potential/softened_isothermal/potential.h" #elif defined(EXTERNAL_POTENTIAL_DISC_PATCH) #include "./potential/disc_patch/potential.h" #else diff --git a/src/potential/disc_patch/potential.h b/src/potential/disc_patch/potential.h index fe1df8796f046edded0c5b1779859a1c6fffffc0..400539a8d02d29a8d383bb1c523d064f733267c5 100644 --- a/src/potential/disc_patch/potential.h +++ b/src/potential/disc_patch/potential.h @@ -83,37 +83,37 @@ __attribute__((always_inline)) INLINE static float external_gravity_timestep( float dt = dt_dyn; /* absolute value of height above disc */ - const float dz = fabs(g->x[2] - potential->z_disc); + const float dz = fabsf(g->x[2] - potential->z_disc); - /* vertical cceleration */ + /* vertical acceleration */ const float z_accel = 2.f * M_PI * phys_const->const_newton_G * potential->surface_density * - tanh(dz / potential->scale_height); + tanhf(dz / potential->scale_height); /* demand that dt * velocity < fraction of scale height of disc */ float dt1 = FLT_MAX; - if (fabs(g->v_full[2]) > 0) { - dt1 = potential->scale_height / fabs(g->v_full[2]); + if (g->v_full[2] != 0.f) { + dt1 = potential->scale_height / fabsf(g->v_full[2]); if (dt1 < dt) dt = dt1; } /* demand that dt^2 * acceleration < fraction of scale height of disc */ float dt2 = FLT_MAX; - if (fabs(z_accel) > 0) { - dt2 = potential->scale_height / fabs(z_accel); - if (dt2 < dt * dt) dt = sqrt(dt2); + if (z_accel != 0.f) { + dt2 = potential->scale_height / fabsf(z_accel); + if (dt2 < dt * dt) dt = sqrtf(dt2); } - /* demand that dt^3 jerk < fraction of scale height of disc */ + /* demand that dt^3 * jerk < fraction of scale height of disc */ float dt3 = FLT_MAX; - if (abs(g->v_full[2]) > 0) { + if (g->v_full[2] != 0.f) { const float dz_accel_over_dt = 2.f * M_PI * phys_const->const_newton_G * potential->surface_density / - potential->scale_height / cosh(dz / potential->scale_height) / - cosh(dz / potential->scale_height) * fabs(g->v_full[2]); + potential->scale_height / coshf(dz / potential->scale_height) / + coshf(dz / potential->scale_height) * fabsf(g->v_full[2]); - dt3 = potential->scale_height / fabs(dz_accel_over_dt); - if (dt3 < dt * dt * dt) dt = pow(dt3, 1. / 3.); + dt3 = potential->scale_height / fabsf(dz_accel_over_dt); + if (dt3 < dt * dt * dt) dt = cbrtf(dt3); } return potential->timestep_mult * dt; @@ -123,7 +123,8 @@ __attribute__((always_inline)) INLINE static float external_gravity_timestep( * @brief Computes the gravitational acceleration along z due to a hydrostatic * disc * - * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948 + * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948, + * equation 17. * * @param time The current time in internal units. * @param potential The properties of the potential. @@ -144,7 +145,7 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( /* Accelerations. Note that they are multiplied by G later on */ const float z_accel = reduction_factor * 2.f * M_PI * potential->surface_density * - tanh(fabs(dz) / potential->scale_height); + tanhf(fabsf(dz) / potential->scale_height); if (dz > 0) g->a_grav[2] -= z_accel; if (dz < 0) g->a_grav[2] += z_accel; @@ -153,26 +154,40 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( /** * @brief Computes the gravitational potential energy of a particle in the * disc patch potential. - * Time evolving system so not sure how to do this - * Placeholder for now- just returns 0 * + * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948, + * equation 24. + * + * @param time The current time. * @param potential The #external_potential used in the run. * @param phys_const Physical constants in internal units. - * @param p Pointer to the particle data. + * @param gp Pointer to the particle data. */ - __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( - const struct external_potential* potential, - const struct phys_const* const phys_const, const struct gpart* p) { + double time, const struct external_potential* potential, + const struct phys_const* const phys_const, const struct gpart* gp) { + + const float dz = gp->x[2] - potential->z_disc; + const float t_dyn = potential->dynamical_time; - return 0.f; + float reduction_factor = 1.f; + if (time < potential->growth_time * t_dyn) + reduction_factor = time / (potential->growth_time * t_dyn); + + /* Accelerations. Note that they are multiplied by G later on */ + return reduction_factor * 2.f * M_PI * phys_const->const_newton_G * + potential->surface_density * potential->scale_height * + logf(coshf(dz / potential->scale_height)); } /** * @brief Initialises the external potential properties in the internal system * of units. * + * See Creasey, Theuns & Bower, 2013, MNRAS, Volume 429, Issue 3, p.1922-1948, + * equation 22. + * * @param parameter_file The parsed parameter file * @param phys_const Physical constants in internal units * @param us The current internal system of units diff --git a/src/potential/isothermal/potential.h b/src/potential/isothermal/potential.h index a582dce17daba0ac9705ef4ae1fc6be9db19315a..9c07f3eb67528a003788ca94bd1e2e52dd985a2c 100644 --- a/src/potential/isothermal/potential.h +++ b/src/potential/isothermal/potential.h @@ -1,7 +1,8 @@ /******************************************************************************* * This file is part of SWIFT. - * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * Copyright (c) 2016 Tom Theuns (tom.theuns@durham.ac.uk) + * Stefan Arridge (stefan.arridge@durham.ac.uk) + * Matthieu Schaller (matthieu.schaller@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -35,7 +36,8 @@ #include "units.h" /** - * @brief External Potential Properties - Isothermal sphere case + * @brief External Potential Properties - Isothermal sphere case with + * central softening */ struct external_potential { @@ -45,9 +47,14 @@ struct external_potential { /*! Rotation velocity */ double vrot; - /*! Square of vrot divided by G \f$ \frac{v_{rot}^2}{G} \f$ */ + /*! Square of vrot, the circular velocity which defines the isothermal + * potential devided by Newton's constant */ double vrot2_over_G; + /*! Square of the softening length. Acceleration tends to zero within this + * distance from the origin */ + double epsilon2; + /*! Time-step condition pre-factor */ double timestep_mult; }; @@ -70,17 +77,18 @@ __attribute__((always_inline)) INLINE static float external_gravity_timestep( const float dy = g->x[1] - potential->y; const float dz = g->x[2] - potential->z; - const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz); + const float r2_plus_epsilon2_inv = + 1.f / (dx * dx + dy * dy + dz * dz + potential->epsilon2); const float drdv = dx * (g->v_full[0]) + dy * (g->v_full[1]) + dz * (g->v_full[2]); const double vrot = potential->vrot; - const float dota_x = - vrot * vrot * rinv2 * (g->v_full[0] - 2.f * drdv * dx * rinv2); - const float dota_y = - vrot * vrot * rinv2 * (g->v_full[1] - 2.f * drdv * dy * rinv2); - const float dota_z = - vrot * vrot * rinv2 * (g->v_full[2] - 2.f * drdv * dz * rinv2); + const float dota_x = vrot * vrot * r2_plus_epsilon2_inv * + (g->v_full[0] - 2.f * drdv * dx * r2_plus_epsilon2_inv); + const float dota_y = vrot * vrot * r2_plus_epsilon2_inv * + (g->v_full[1] - 2.f * drdv * dy * r2_plus_epsilon2_inv); + const float dota_z = vrot * vrot * r2_plus_epsilon2_inv * + (g->v_full[2] - 2.f * drdv * dz * r2_plus_epsilon2_inv); const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z; const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] + g->a_grav[2] * g->a_grav[2]; @@ -94,6 +102,10 @@ __attribute__((always_inline)) INLINE static float external_gravity_timestep( * Note that the accelerations are multiplied by Newton's G constant * later on. * + * a_x = -(v_rot^2 / G) * x / (r^2 + epsilon^2) + * a_y = -(v_rot^2 / G) * y / (r^2 + epsilon^2) + * a_z = -(v_rot^2 / G) * z / (r^2 + epsilon^2) + * * @param time The current time. * @param potential The #external_potential used in the run. * @param phys_const The physical constants in internal units. @@ -106,10 +118,10 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( const float dx = g->x[0] - potential->x; const float dy = g->x[1] - potential->y; const float dz = g->x[2] - potential->z; + const float r2_plus_epsilon2_inv = + 1.f / (dx * dx + dy * dy + dz * dz + potential->epsilon2); - const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz); - - const double term = -potential->vrot2_over_G * rinv2; + const float term = -potential->vrot2_over_G * r2_plus_epsilon2_inv; g->a_grav[0] += term * dx; g->a_grav[1] += term * dy; @@ -120,21 +132,24 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( * @brief Computes the gravitational potential energy of a particle in an * isothermal potential. * + * phi = -0.5 * vrot^2 * ln(r^2 + epsilon^2) + * + * @param time The current time (unused here). * @param potential The #external_potential used in the run. * @param phys_const Physical constants in internal units. * @param g Pointer to the particle data. */ __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( - const struct external_potential* potential, + double time, const struct external_potential* potential, const struct phys_const* const phys_const, const struct gpart* g) { const float dx = g->x[0] - potential->x; const float dy = g->x[1] - potential->y; const float dz = g->x[2] - potential->z; - return 0.5f * potential->vrot * potential->vrot * - logf(dx * dx + dy * dy * dz * dz); + return -0.5f * potential->vrot * potential->vrot * + logf(dx * dx + dy * dy + dz * dz + potential->epsilon2); } /** @@ -164,9 +179,11 @@ static INLINE void potential_init_backend( parser_get_param_double(parameter_file, "IsothermalPotential:vrot"); potential->timestep_mult = parser_get_param_float( parameter_file, "IsothermalPotential:timestep_mult"); - + const double epsilon = + parser_get_param_double(parameter_file, "IsothermalPotential:epsilon"); potential->vrot2_over_G = potential->vrot * potential->vrot / phys_const->const_newton_G; + potential->epsilon2 = epsilon * epsilon; } /** @@ -180,9 +197,9 @@ static INLINE void potential_print_backend( message( "External potential is 'Isothermal' with properties are (x,y,z) = (%e, " "%e, %e), vrot = %e " - "timestep multiplier = %e.", + "timestep multiplier = %e, epsilon = %e", potential->x, potential->y, potential->z, potential->vrot, - potential->timestep_mult); + potential->timestep_mult, sqrtf(potential->epsilon2)); } -#endif /* SWIFT_POTENTIAL_ISOTHERMAL_H */ +#endif /* SWIFT_ISOTHERMAL_H */ diff --git a/src/potential/none/potential.h b/src/potential/none/potential.h index 8248b64678e28e06b9df4aab375cde0b5ed5281b..cb6254b4a23b336637cb3c9f36a2dd01170eabad 100644 --- a/src/potential/none/potential.h +++ b/src/potential/none/potential.h @@ -71,13 +71,16 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( /** * @brief Computes the gravitational potential energy due to nothing. * + * We return 0. + * + * @param time The current time. * @param potential The #external_potential used in the run. * @param phys_const Physical constants in internal units. * @param g Pointer to the particle data. */ __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( - const struct external_potential* potential, + double time, const struct external_potential* potential, const struct phys_const* const phys_const, const struct gpart* g) { return 0.f; diff --git a/src/potential/point_mass/potential.h b/src/potential/point_mass/potential.h index 5f3d1c27b85c4f1353481e6351fba47aff62d66f..81b51ab2009ad599d0201708d78c8c64cac991dc 100644 --- a/src/potential/point_mass/potential.h +++ b/src/potential/point_mass/potential.h @@ -120,13 +120,14 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( * @brief Computes the gravitational potential energy of a particle in a point * mass potential. * + * @param time The current time (unused here). * @param potential The #external_potential used in the run. * @param phys_const Physical constants in internal units. * @param g Pointer to the particle data. */ __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( - const struct external_potential* potential, + double time, const struct external_potential* potential, const struct phys_const* const phys_const, const struct gpart* g) { const float dx = g->x[0] - potential->x; diff --git a/src/potential/softened_isothermal/potential.h b/src/potential/softened_isothermal/potential.h deleted file mode 100644 index 24e59b12a5745728fb1189fbbfbc7cc3c06fbfa6..0000000000000000000000000000000000000000 --- a/src/potential/softened_isothermal/potential.h +++ /dev/null @@ -1,196 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk) - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ -#ifndef SWIFT_POTENTIAL_SOFTENED_ISOTHERMAL_H -#define SWIFT_POTENTIAL_SOFTENED_ISOTHERMAL_H - -/* Config parameters. */ -#include "../config.h" - -/* Some standard headers. */ -#include <math.h> - -/* Local includes. */ -#include "error.h" -#include "parser.h" -#include "part.h" -#include "physical_constants.h" -#include "space.h" -#include "units.h" - -/** - * @brief External Potential Properties - Softened Isothermal sphere case - */ -struct external_potential { - - /*! Position of the centre of potential */ - double x, y, z; - - /*! Rotation velocity */ - double vrot; - - /*! Square of vrot, the circular velocity which defines the isothermal - * potential */ - double vrot2_over_G; - - /*! Square of the softening length. Acceleration tends to zero within this - * distance from the origin */ - double epsilon2; - - /*! Time-step condition pre-factor */ - double timestep_mult; -}; - -/** - * @brief Computes the time-step due to the acceleration from an isothermal - * potential. - * - * @param time The current time. - * @param potential The #external_potential used in the run. - * @param phys_const The physical constants in internal units. - * @param g Pointer to the g-particle data. - */ -__attribute__((always_inline)) INLINE static float external_gravity_timestep( - double time, const struct external_potential* restrict potential, - const struct phys_const* restrict phys_const, - const struct gpart* restrict g) { - - const float dx = g->x[0] - potential->x; - const float dy = g->x[1] - potential->y; - const float dz = g->x[2] - potential->z; - - const float r2_plus_epsilon2_inv = - 1.f / (dx * dx + dy * dy + dz * dz + potential->epsilon2); - const float drdv = - dx * (g->v_full[0]) + dy * (g->v_full[1]) + dz * (g->v_full[2]); - const double vrot = potential->vrot; - - const float dota_x = vrot * vrot * r2_plus_epsilon2_inv * - (g->v_full[0] - 2.f * drdv * dx * r2_plus_epsilon2_inv); - const float dota_y = vrot * vrot * r2_plus_epsilon2_inv * - (g->v_full[1] - 2.f * drdv * dy * r2_plus_epsilon2_inv); - const float dota_z = vrot * vrot * r2_plus_epsilon2_inv * - (g->v_full[2] - 2.f * drdv * dz * r2_plus_epsilon2_inv); - const float dota_2 = dota_x * dota_x + dota_y * dota_y + dota_z * dota_z; - const float a_2 = g->a_grav[0] * g->a_grav[0] + g->a_grav[1] * g->a_grav[1] + - g->a_grav[2] * g->a_grav[2]; - - return potential->timestep_mult * sqrtf(a_2 / dota_2); -} - -/** - * @brief Computes the gravitational acceleration from an isothermal potential. - * - * Note that the accelerations are multiplied by Newton's G constant - * later on. - * - * a = v_rot^2 * (x,y,z) / (r^2 + epsilon^2) - * @param time The current time. - * @param potential The #external_potential used in the run. - * @param phys_const The physical constants in internal units. - * @param g Pointer to the g-particle data. - */ -__attribute__((always_inline)) INLINE static void external_gravity_acceleration( - double time, const struct external_potential* potential, - const struct phys_const* const phys_const, struct gpart* g) { - - const float dx = g->x[0] - potential->x; - const float dy = g->x[1] - potential->y; - const float dz = g->x[2] - potential->z; - const float r2_plus_epsilon2_inv = - 1.f / (dx * dx + dy * dy + dz * dz + potential->epsilon2); - - const double term = -potential->vrot2_over_G * r2_plus_epsilon2_inv; - - g->a_grav[0] += term * dx; - g->a_grav[1] += term * dy; - g->a_grav[2] += term * dz; -} - -/** - * @brief Computes the gravitational potential energy of a particle in an - * isothermal potential. - * - * @param potential The #external_potential used in the run. - * @param phys_const Physical constants in internal units. - * @param g Pointer to the particle data. - */ -__attribute__((always_inline)) INLINE static float -external_gravity_get_potential_energy( - const struct external_potential* potential, - const struct phys_const* const phys_const, const struct gpart* g) { - - const float dx = g->x[0] - potential->x; - const float dy = g->x[1] - potential->y; - const float dz = g->x[2] - potential->z; - - return 0.5f * potential->vrot * potential->vrot * - logf(dx * dx + dy * dy * dz * dz + potential->epsilon2); -} -/** - * @brief Initialises the external potential properties in the internal system - * of units. - * - * @param parameter_file The parsed parameter file - * @param phys_const Physical constants in internal units - * @param us The current internal system of units - * @param potential The external potential properties to initialize - */ -static INLINE void potential_init_backend( - const struct swift_params* parameter_file, - const struct phys_const* phys_const, const struct UnitSystem* us, - const struct space* s, struct external_potential* potential) { - - potential->x = s->dim[0] / 2. + - parser_get_param_double( - parameter_file, "SoftenedIsothermalPotential:position_x"); - potential->y = s->dim[1] / 2. + - parser_get_param_double( - parameter_file, "SoftenedIsothermalPotential:position_y"); - potential->z = s->dim[2] / 2. + - parser_get_param_double( - parameter_file, "SoftenedIsothermalPotential:position_z"); - potential->vrot = parser_get_param_double(parameter_file, - "SoftenedIsothermalPotential:vrot"); - potential->timestep_mult = parser_get_param_float( - parameter_file, "SoftenedIsothermalPotential:timestep_mult"); - const double epsilon = parser_get_param_float( - parameter_file, "SoftenedIsothermalPotential:epsilon"); - potential->vrot2_over_G = - potential->vrot * potential->vrot / phys_const->const_newton_G; - potential->epsilon2 = epsilon * epsilon; -} - -/** - * @brief Prints the properties of the external potential to stdout. - * - * @param potential The external potential properties. - */ -static INLINE void potential_print_backend( - const struct external_potential* potential) { - - message( - "External potential is 'Isothermal' with properties are (x,y,z) = (%e, " - "%e, %e), vrot = %e " - "timestep multiplier = %e, epsilon = %e", - potential->x, potential->y, potential->z, potential->vrot, - potential->timestep_mult, sqrtf(potential->epsilon2)); -} - -#endif /* SWIFT_POTENTIAL_ISOTHERMAL_H */ diff --git a/src/proxy.c b/src/proxy.c index efe3a3eec108d44d5b9bf8b4718dc025464f8762..dd6faa3055cb17a0a3050d9e62d107d7489a4326 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -45,7 +45,6 @@ * * @param p The #proxy. */ - void proxy_cells_exch1(struct proxy *p) { #ifdef WITH_MPI @@ -65,8 +64,8 @@ void proxy_cells_exch1(struct proxy *p) { /* Allocate and fill the pcell buffer. */ if (p->pcells_out != NULL) free(p->pcells_out); - if ((p->pcells_out = malloc(sizeof(struct pcell) * p->size_pcells_out)) == - NULL) + if (posix_memalign((void **)&p->pcells_out, SWIFT_STRUCT_ALIGNMENT, + sizeof(struct pcell) * p->size_pcells_out) != 0) error("Failed to allocate pcell_out buffer."); for (int ind = 0, k = 0; k < p->nr_cells_out; k++) { memcpy(&p->pcells_out[ind], p->cells_out[k]->pcell, @@ -102,8 +101,8 @@ void proxy_cells_exch2(struct proxy *p) { /* Re-allocate the pcell_in buffer. */ if (p->pcells_in != NULL) free(p->pcells_in); - if ((p->pcells_in = (struct pcell *)malloc(sizeof(struct pcell) * - p->size_pcells_in)) == NULL) + if (posix_memalign((void **)&p->pcells_in, SWIFT_STRUCT_ALIGNMENT, + sizeof(struct pcell) * p->size_pcells_in) != 0) error("Failed to allocate pcell_in buffer."); /* Receive the particle buffers. */ @@ -126,7 +125,6 @@ void proxy_cells_exch2(struct proxy *p) { * @param p The #proxy. * @param c The #cell. */ - void proxy_addcell_in(struct proxy *p, struct cell *c) { /* Check if the cell is already registered with the proxy. */ @@ -155,7 +153,6 @@ void proxy_addcell_in(struct proxy *p, struct cell *c) { * @param p The #proxy. * @param c The #cell. */ - void proxy_addcell_out(struct proxy *p, struct cell *c) { /* Check if the cell is already registered with the proxy. */ @@ -183,7 +180,6 @@ void proxy_addcell_out(struct proxy *p, struct cell *c) { * * @param p The #proxy. */ - void proxy_parts_exch1(struct proxy *p) { #ifdef WITH_MPI @@ -191,7 +187,8 @@ void proxy_parts_exch1(struct proxy *p) { /* Send the number of particles. */ p->buff_out[0] = p->nr_parts_out; p->buff_out[1] = p->nr_gparts_out; - if (MPI_Isend(p->buff_out, 2, MPI_INT, p->nodeID, + p->buff_out[2] = p->nr_sparts_out; + if (MPI_Isend(p->buff_out, 3, MPI_INT, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_out) != MPI_SUCCESS) error("Failed to isend nr of parts."); @@ -218,13 +215,22 @@ void proxy_parts_exch1(struct proxy *p) { if (MPI_Isend(p->gparts_out, p->nr_gparts_out, gpart_mpi_type, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_gparts, MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS) - error("Failed to isend part data."); + error("Failed to isend gpart data."); + // message( "isent gpart data (%i) to node %i." , p->nr_parts_out , + // p->nodeID ); fflush(stdout); + } + + if (p->nr_sparts_out > 0) { + if (MPI_Isend(p->sparts_out, p->nr_sparts_out, spart_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_sparts, + MPI_COMM_WORLD, &p->req_sparts_out) != MPI_SUCCESS) + error("Failed to isend spart data."); // message( "isent gpart data (%i) to node %i." , p->nr_parts_out , // p->nodeID ); fflush(stdout); } /* Receive the number of particles. */ - if (MPI_Irecv(p->buff_in, 2, MPI_INT, p->nodeID, + if (MPI_Irecv(p->buff_in, 3, MPI_INT, p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_in) != MPI_SUCCESS) error("Failed to irecv nr of parts."); @@ -241,8 +247,9 @@ void proxy_parts_exch2(struct proxy *p) { /* Unpack the incomming parts counts. */ p->nr_parts_in = p->buff_in[0]; p->nr_gparts_in = p->buff_in[1]; + p->nr_sparts_in = p->buff_in[2]; - /* Is there enough space in the buffer? */ + /* Is there enough space in the buffers? */ if (p->nr_parts_in > p->size_parts_in) { do { p->size_parts_in *= proxy_buffgrow; @@ -264,6 +271,15 @@ void proxy_parts_exch2(struct proxy *p) { p->size_gparts_in)) == NULL) error("Failed to re-allocate gparts_in buffers."); } + if (p->nr_sparts_in > p->size_sparts_in) { + do { + p->size_sparts_in *= proxy_buffgrow; + } while (p->nr_sparts_in > p->size_sparts_in); + free(p->sparts_in); + if ((p->sparts_in = (struct spart *)malloc(sizeof(struct spart) * + p->size_sparts_in)) == NULL) + error("Failed to re-allocate sparts_in buffers."); + } /* Receive the particle buffers. */ if (p->nr_parts_in > 0) { @@ -285,6 +301,14 @@ void proxy_parts_exch2(struct proxy *p) { // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in , // p->nodeID ); fflush(stdout); } + if (p->nr_sparts_in > 0) { + if (MPI_Irecv(p->sparts_in, p->nr_sparts_in, spart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_sparts, + MPI_COMM_WORLD, &p->req_sparts_in) != MPI_SUCCESS) + error("Failed to irecv spart data."); + // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in , + // p->nodeID ); fflush(stdout); + } #else error("SWIFT was not compiled with MPI support."); @@ -299,7 +323,6 @@ void proxy_parts_exch2(struct proxy *p) { * @param xparts Pointer to an array of #xpart to send. * @param N The number of parts. */ - void proxy_parts_load(struct proxy *p, const struct part *parts, const struct xpart *xparts, int N) { @@ -308,8 +331,8 @@ void proxy_parts_load(struct proxy *p, const struct part *parts, do { p->size_parts_out *= proxy_buffgrow; } while (p->nr_parts_out + N > p->size_parts_out); - struct part *tp; - struct xpart *txp; + struct part *tp = NULL; + struct xpart *txp = NULL; if ((tp = (struct part *)malloc(sizeof(struct part) * p->size_parts_out)) == NULL || (txp = (struct xpart *)malloc(sizeof(struct xpart) * @@ -332,13 +355,12 @@ void proxy_parts_load(struct proxy *p, const struct part *parts, } /** - * @brief Load parts onto a proxy for exchange. + * @brief Load gparts onto a proxy for exchange. * * @param p The #proxy. * @param gparts Pointer to an array of #gpart to send. - * @param N The number of parts. + * @param N The number of gparts. */ - void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) { /* Is there enough space in the buffer? */ @@ -362,6 +384,36 @@ void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) { p->nr_gparts_out += N; } +/** + * @brief Load sparts onto a proxy for exchange. + * + * @param p The #proxy. + * @param sparts Pointer to an array of #spart to send. + * @param N The number of sparts. + */ +void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N) { + + /* Is there enough space in the buffer? */ + if (p->nr_sparts_out + N > p->size_sparts_out) { + do { + p->size_sparts_out *= proxy_buffgrow; + } while (p->nr_sparts_out + N > p->size_sparts_out); + struct spart *tp; + if ((tp = (struct spart *)malloc(sizeof(struct spart) * + p->size_sparts_out)) == NULL) + error("Failed to re-allocate sparts_out buffers."); + memcpy(tp, p->sparts_out, sizeof(struct spart) * p->nr_sparts_out); + free(p->sparts_out); + p->sparts_out = tp; + } + + /* Copy the parts and xparts data to the buffer. */ + memcpy(&p->sparts_out[p->nr_sparts_out], sparts, sizeof(struct spart) * N); + + /* Increase the counters. */ + p->nr_sparts_out += N; +} + /** * @brief Initialize the given proxy. * @@ -369,7 +421,6 @@ void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) { * @param mynodeID The node this proxy is running on. * @param nodeID The node with which this proxy will communicate. */ - void proxy_init(struct proxy *p, int mynodeID, int nodeID) { /* Set the nodeID. */ @@ -427,4 +478,20 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { error("Failed to allocate gparts_out buffers."); } p->nr_gparts_out = 0; + + /* Allocate the spart send and receive buffers, if needed. */ + if (p->sparts_in == NULL) { + p->size_sparts_in = proxy_buffinit; + if ((p->sparts_in = (struct spart *)malloc(sizeof(struct spart) * + p->size_sparts_in)) == NULL) + error("Failed to allocate sparts_in buffers."); + } + p->nr_sparts_in = 0; + if (p->sparts_out == NULL) { + p->size_sparts_out = proxy_buffinit; + if ((p->sparts_out = (struct spart *)malloc(sizeof(struct spart) * + p->size_sparts_out)) == NULL) + error("Failed to allocate sparts_out buffers."); + } + p->nr_sparts_out = 0; } diff --git a/src/proxy.h b/src/proxy.h index 5a747187e05a78a109ce4523ebb3c9d5fe2ad717..a245077193878bb669b474944965badceffcee80 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -33,7 +33,8 @@ #define proxy_tag_parts 1 #define proxy_tag_xparts 2 #define proxy_tag_gparts 3 -#define proxy_tag_cells 4 +#define proxy_tag_sparts 4 +#define proxy_tag_cells 5 /* Data structure for the proxy. */ struct proxy { @@ -55,13 +56,16 @@ struct proxy { struct part *parts_in, *parts_out; struct xpart *xparts_in, *xparts_out; struct gpart *gparts_in, *gparts_out; + struct spart *sparts_in, *sparts_out; int size_parts_in, size_parts_out; int nr_parts_in, nr_parts_out; int size_gparts_in, size_gparts_out; int nr_gparts_in, nr_gparts_out; + int size_sparts_in, size_sparts_out; + int nr_sparts_in, nr_sparts_out; /* Buffer to hold the incomming/outgoing particle counts. */ - int buff_out[2], buff_in[2]; + int buff_out[3], buff_in[3]; /* MPI request handles. */ #ifdef WITH_MPI @@ -69,6 +73,7 @@ struct proxy { MPI_Request req_parts_out, req_parts_in; MPI_Request req_xparts_out, req_xparts_in; MPI_Request req_gparts_out, req_gparts_in; + MPI_Request req_sparts_out, req_sparts_in; MPI_Request req_cells_count_out, req_cells_count_in; MPI_Request req_cells_out, req_cells_in; #endif @@ -79,6 +84,7 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID); void proxy_parts_load(struct proxy *p, const struct part *parts, const struct xpart *xparts, int N); void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N); +void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N); void proxy_parts_exch1(struct proxy *p); void proxy_parts_exch2(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c); diff --git a/src/queue.h b/src/queue.h index c0a2fb1da6e6e3cbea813a0ef53841084ab0f933..951a3e5a056d7ad0c3935f98341a0d93c805e3ad 100644 --- a/src/queue.h +++ b/src/queue.h @@ -30,6 +30,7 @@ #define queue_sizegrow 2 #define queue_search_window 8 #define queue_incoming_size 1024 +#define queue_struct_align 64 /* Counters. */ enum { @@ -57,7 +58,7 @@ struct queue { int *tid_incoming; volatile unsigned int first_incoming, last_incoming, count_incoming; -} __attribute__((aligned(64))); +} __attribute__((aligned(queue_struct_align))); /* Function prototypes. */ struct task *queue_gettask(struct queue *q, const struct task *prev, diff --git a/src/runner.c b/src/runner.c index 2d6da4e4aedc9c40d1dade243e605e9aeda86dbe..64b03732b4f12319ff7713c82cba4546b3c48510 100644 --- a/src/runner.c +++ b/src/runner.c @@ -53,9 +53,11 @@ #include "hydro_properties.h" #include "kick.h" #include "minmax.h" +#include "runner_doiact_vec.h" #include "scheduler.h" #include "sourceterms.h" #include "space.h" +#include "stars.h" #include "task.h" #include "timers.h" #include "timestep.h" @@ -205,14 +207,17 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) { struct part *restrict parts = c->parts; struct xpart *restrict xparts = c->xparts; const int count = c->count; - const int ti_current = r->e->ti_current; - const struct cooling_function_data *cooling_func = r->e->cooling_func; - const struct phys_const *constants = r->e->physical_constants; - const struct UnitSystem *us = r->e->internalUnits; - const double timeBase = r->e->timeBase; + const struct engine *e = r->e; + const struct cooling_function_data *cooling_func = e->cooling_func; + const struct phys_const *constants = e->physical_constants; + const struct UnitSystem *us = e->internalUnits; + const double timeBase = e->timeBase; TIMER_TIC; + /* Anything to do here? */ + if (!cell_is_active(c, e)) return; + /* Recurse? */ if (c->split) { for (int k = 0; k < 8; k++) @@ -226,11 +231,10 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) { struct part *restrict p = &parts[i]; struct xpart *restrict xp = &xparts[i]; - /* Kick has already updated ti_end, so need to check ti_begin */ - if (p->ti_begin == ti_current) { - - const double dt = (p->ti_end - p->ti_begin) * timeBase; + if (part_is_active(p, e)) { + /* Let's cool ! */ + const double dt = get_timestep(p->time_bin, timeBase); cooling_cool_part(constants, us, cooling_func, p, xp, dt); } } @@ -589,8 +593,6 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { struct xpart *restrict xparts = c->xparts; int redo, count = c->count; const struct engine *e = r->e; - const int ti_current = e->ti_current; - const double timeBase = e->timeBase; const float target_wcount = e->hydro_properties->target_neighbours; const float max_wcount = target_wcount + e->hydro_properties->delta_neighbours; @@ -672,7 +674,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { /* As of here, particle force variables will be set. */ /* Compute variables required for the force loop */ - hydro_prepare_force(p, xp, ti_current, timeBase); + hydro_prepare_force(p, xp); /* The particle force values are now set. Do _NOT_ try to read any particle density variables! */ @@ -733,8 +735,16 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { } } +#ifdef SWIFT_DEBUG_CHECKS + if (count) { + message("Smoothing length failed to converge on %i particles.", count); + + error("Aborting...."); + } +#else if (count) message("Smoothing length failed to converge on %i particles.", count); +#endif /* Be clean */ free(pid); @@ -744,15 +754,15 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { } /** - * @brief Drift particles and g-particles in a cell forward in time, - * unskipping any tasks associated with active cells. + * @brief Unskip any tasks associated with active cells. * * @param c The cell. * @param e The engine. - * @param drift whether to actually drift the particles, will not be - * necessary for non-local cells. */ -static void runner_do_drift(struct cell *c, struct engine *e, int drift) { +static void runner_do_unskip(struct cell *c, struct engine *e) { + + /* Ignore empty cells. */ + if (c->count == 0 && c->gcount == 0) return; /* Unskip any active tasks. */ if (cell_is_active(c, e)) { @@ -760,239 +770,463 @@ static void runner_do_drift(struct cell *c, struct engine *e, int drift) { if (forcerebuild) atomic_inc(&e->forcerebuild); } - /* Do we really need to drift? */ - if (drift) { - if (!e->drift_all && !cell_is_drift_needed(c, e)) return; - } else { - - /* Not drifting, but may still need to recurse for task un-skipping. */ - if (c->split) { - for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; - runner_do_drift(cp, e, 0); - } + /* Recurse */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + runner_do_unskip(cp, e); } } - return; } +} + +/** + * @brief Mapper function to unskip active tasks. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void runner_do_unskip_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct cell *cells = (struct cell *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &cells[ind]; + if (c != NULL) runner_do_unskip(c, e); + } +} +/** + * @brief Drift particles in real space. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift(struct runner *r, struct cell *c, int timer) { - /* Now, we can drift */ + TIMER_TIC; - /* Get some information first */ - const double timeBase = e->timeBase; - const int ti_old = c->ti_old; - const int ti_current = e->ti_current; - struct part *const parts = c->parts; - struct xpart *const xparts = c->xparts; - struct gpart *const gparts = c->gparts; + cell_drift(c, r->e); - /* Drift from the last time the cell was drifted to the current time */ - const double dt = (ti_current - ti_old) * timeBase; - float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; + if (timer) TIMER_TOC(timer_drift); +} - /* No children? */ - if (!c->split) { +/** + * @brief Mapper function to drift ALL particle and g-particles forward in time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void runner_do_drift_mapper(void *map_data, int num_elements, + void *extra_data) { - /* Check that we are actually going to move forward. */ - if (ti_current > ti_old) { + struct engine *e = (struct engine *)extra_data; + struct cell *cells = (struct cell *)map_data; - /* Loop over all the g-particles in the cell */ - const size_t nr_gparts = c->gcount; - for (size_t k = 0; k < nr_gparts; k++) { + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &cells[ind]; + if (c != NULL && c->nodeID == e->nodeID) cell_drift(c, e); + } +} - /* Get a handle on the gpart. */ - struct gpart *const gp = &gparts[k]; +/** + * @brief Perform the first half-kick on all the active particles in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_kick1(struct runner *r, struct cell *c, int timer) { - /* Drift... */ - drift_gpart(gp, dt, timeBase, ti_old, ti_current); + const struct engine *e = r->e; + struct part *restrict parts = c->parts; + struct xpart *restrict xparts = c->xparts; + struct gpart *restrict gparts = c->gparts; + struct spart *restrict sparts = c->sparts; + const int count = c->count; + const int gcount = c->gcount; + const int scount = c->scount; + const integertime_t ti_current = e->ti_current; + const double timeBase = e->timeBase; - /* Compute (square of) motion since last cell construction */ - const float dx2 = gp->x_diff[0] * gp->x_diff[0] + - gp->x_diff[1] * gp->x_diff[1] + - gp->x_diff[2] * gp->x_diff[2]; - dx2_max = (dx2_max > dx2) ? dx2_max : dx2; - } + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(c, e)) return; - /* Loop over all the particles in the cell */ - const size_t nr_parts = c->count; - for (size_t k = 0; k < nr_parts; k++) { + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_kick1(r, c->progeny[k], 0); + } else { - /* Get a handle on the part. */ - struct part *const p = &parts[k]; - struct xpart *const xp = &xparts[k]; + /* Loop over the parts in this cell. */ + for (int k = 0; k < count; k++) { - /* Drift... */ - drift_part(p, xp, dt, timeBase, ti_old, ti_current); + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; - /* Compute (square of) motion since last cell construction */ - const float dx2 = xp->x_diff[0] * xp->x_diff[0] + - xp->x_diff[1] * xp->x_diff[1] + - xp->x_diff[2] * xp->x_diff[2]; - dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + /* If particle needs to be kicked */ + if (part_is_active(p, e)) { - /* Maximal smoothing length */ - h_max = (h_max > p->h) ? h_max : p->h; - } + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, p->time_bin); - /* Now, get the maximal particle motion from its square */ - dx_max = sqrtf(dx2_max); +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = + get_integer_time_end(ti_current, p->time_bin); - } /* Check that we are actually going to move forward. */ + if (ti_end - ti_begin != ti_step) + error( + "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " + "ti_step=%lld time_bin=%d ti_current=%lld", + ti_end, ti_begin, ti_step, p->time_bin, ti_current); +#endif - else { - /* ti_old == ti_current, just keep the current cell values. */ - h_max = c->h_max; - dx_max = c->dx_max; + /* do the kick */ + kick_part(p, xp, ti_begin, ti_begin + ti_step / 2, timeBase); + } } - } - /* Otherwise, aggregate data from children. */ - else { + /* Loop over the gparts in this cell. */ + for (int k = 0; k < gcount; k++) { - /* Loop over the progeny and collect their data. */ - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; + /* Get a handle on the part. */ + struct gpart *restrict gp = &gparts[k]; - /* Recurse. */ - runner_do_drift(cp, e, drift); - dx_max = max(dx_max, cp->dx_max); - h_max = max(h_max, cp->h_max); + /* If the g-particle has no counterpart and needs to be kicked */ + if (gp->type == swift_type_dark_matter && gpart_is_active(gp, e)) { + + const integertime_t ti_step = get_integer_timestep(gp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, gp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = + get_integer_time_end(ti_current, gp->time_bin); + + if (ti_end - ti_begin != ti_step) error("Particle in wrong time-bin"); +#endif + + /* do the kick */ + kick_gpart(gp, ti_begin, ti_begin + ti_step / 2, timeBase); } - } + } - /* Store the values */ - c->h_max = h_max; - c->dx_max = dx_max; + /* Loop over the star particles in this cell. */ + for (int k = 0; k < scount; k++) { - /* Update the time of the last drift */ - c->ti_old = ti_current; -} + /* Get a handle on the s-part. */ + struct spart *restrict sp = &sparts[k]; -/** - * @brief Mapper function to drift particles and g-particles forward in time. - * - * @param map_data An array of #cell%s. - * @param num_elements Chunk size. - * @param extra_data Pointer to an #engine. - */ + /* If particle needs to be kicked */ + if (spart_is_active(sp, e)) { -void runner_do_drift_mapper(void *map_data, int num_elements, - void *extra_data) { + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, sp->time_bin); - struct engine *e = (struct engine *)extra_data; - struct cell *cells = (struct cell *)map_data; +#ifdef SWIFT_DEBUG_CHECKS + const integertime_t ti_end = + get_integer_time_end(ti_current, sp->time_bin); - for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &cells[ind]; -#ifdef WITH_MPI - if (c != NULL) runner_do_drift(c, e, (c->nodeID == e->nodeID)); -#else - if (c != NULL) runner_do_drift(c, e, 1); + if (ti_end - ti_begin != ti_step) error("Particle in wrong time-bin"); #endif + + /* do the kick */ + kick_spart(sp, ti_begin, ti_begin + ti_step / 2, timeBase); + } + } } + + if (timer) TIMER_TOC(timer_kick1); } /** - * @brief Kick particles in momentum space and collect statistics (floating - * time-step case) + * @brief Perform the second half-kick on all the active particles in a cell. + * + * Also prepares particles to be drifted. * * @param r The runner thread. * @param c The cell. * @param timer Are we timing this ? */ -void runner_do_kick(struct runner *r, struct cell *c, int timer) { +void runner_do_kick2(struct runner *r, struct cell *c, int timer) { const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; const double timeBase = e->timeBase; const int count = c->count; const int gcount = c->gcount; + const int scount = c->scount; struct part *restrict parts = c->parts; struct xpart *restrict xparts = c->xparts; struct gpart *restrict gparts = c->gparts; - const double const_G = e->physical_constants->const_newton_G; + struct spart *restrict sparts = c->sparts; TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) { - c->updated = 0; - c->g_updated = 0; - return; - } + if (!cell_is_active(c, e)) return; - int updated = 0, g_updated = 0; - int ti_end_min = max_nr_timesteps, ti_end_max = 0; + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_kick2(r, c->progeny[k], 0); + } else { - /* No children? */ - if (!c->split) { + /* Loop over the particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + struct xpart *restrict xp = &xparts[k]; + + /* If particle needs to be kicked */ + if (part_is_active(p, e)) { + + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, p->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error( + "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld " + "time_bin=%d ti_current=%lld", + ti_begin, ti_step, p->time_bin, ti_current); +#endif - /* Loop over the g-particles and kick the active ones. */ + /* Finish the time-step with a second half-kick */ + kick_part(p, xp, ti_begin + ti_step / 2, ti_begin + ti_step, timeBase); + + /* Prepare the values to be drifted */ + hydro_reset_predicted_values(p, xp); + } + } + + /* Loop over the g-particles in this cell. */ for (int k = 0; k < gcount; k++) { /* Get a handle on the part. */ struct gpart *restrict gp = &gparts[k]; /* If the g-particle has no counterpart and needs to be kicked */ - if (gp->id_or_neg_offset > 0) { + if (gp->type == swift_type_dark_matter && gpart_is_active(gp, e)) { - if (gpart_is_active(gp, e)) { + const integertime_t ti_step = get_integer_timestep(gp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, gp->time_bin); - /* First, finish the force calculation */ - gravity_end_force(gp, const_G); +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error("Particle in wrong time-bin"); +#endif - /* Compute the next timestep */ - const int new_dti = get_gpart_timestep(gp, e); + /* Finish the time-step with a second half-kick */ + kick_gpart(gp, ti_begin + ti_step / 2, ti_begin + ti_step, timeBase); + } + } - /* Now we have a time step, proceed with the kick */ - kick_gpart(gp, new_dti, timeBase); + /* Loop over the particles in this cell. */ + for (int k = 0; k < scount; k++) { - /* Number of updated g-particles */ - g_updated++; - } + /* Get a handle on the part. */ + struct spart *restrict sp = &sparts[k]; - /* Minimal time for next end of time-step */ - ti_end_min = min(gp->ti_end, ti_end_min); - ti_end_max = max(gp->ti_end, ti_end_max); + /* If particle needs to be kicked */ + if (spart_is_active(sp, e)) { + + const integertime_t ti_step = get_integer_timestep(sp->time_bin); + const integertime_t ti_begin = + get_integer_time_begin(ti_current, sp->time_bin); + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_begin + ti_step != ti_current) + error("Particle in wrong time-bin"); +#endif + + /* Finish the time-step with a second half-kick */ + kick_spart(sp, ti_begin + ti_step / 2, ti_begin + ti_step, timeBase); + + /* Prepare the values to be drifted */ + star_reset_predicted_values(sp); } } + } + if (timer) TIMER_TOC(timer_kick2); +} - /* Now do the hydro ones... */ +/** + * @brief Computes the next time-step of all active particles in this cell + * and update the cell's statistics. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_timestep(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const integertime_t ti_current = e->ti_current; + const int count = c->count; + const int gcount = c->gcount; + const int scount = c->scount; + struct part *restrict parts = c->parts; + struct xpart *restrict xparts = c->xparts; + struct gpart *restrict gparts = c->gparts; + struct spart *restrict sparts = c->sparts; + + TIMER_TIC; + + int updated = 0, g_updated = 0, s_updated = 0; + integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0; + + /* No children? */ + if (!c->split) { - /* Loop over the particles and kick the active ones. */ + /* Loop over the particles in this cell. */ for (int k = 0; k < count; k++) { /* Get a handle on the part. */ struct part *restrict p = &parts[k]; struct xpart *restrict xp = &xparts[k]; - /* If particle needs to be kicked */ + /* If particle needs updating */ if (part_is_active(p, e)) { - /* First, finish the force loop */ - hydro_end_force(p); - if (p->gpart != NULL) gravity_end_force(p->gpart, const_G); +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, p->time_bin); - /* Compute the next timestep (hydro condition) */ - const int new_dti = get_part_timestep(p, xp, e); + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif - /* Now we have a time step, proceed with the kick */ - kick_part(p, xp, new_dti, timeBase); + /* Get new time-step */ + const integertime_t ti_new_step = get_part_timestep(p, xp, e); + + /* Update particle */ + p->time_bin = get_time_bin(ti_new_step); + if (p->gpart != NULL) p->gpart->time_bin = get_time_bin(ti_new_step); /* Number of updated particles */ updated++; if (p->gpart != NULL) g_updated++; + + /* What is the next sync-point ? */ + ti_end_min = min(ti_current + ti_new_step, ti_end_min); + ti_end_max = max(ti_current + ti_new_step, ti_end_max); } - /* Minimal time for next end of time-step */ - ti_end_min = min(p->ti_end, ti_end_min); - ti_end_max = max(p->ti_end, ti_end_max); + else { /* part is inactive */ + + const integertime_t ti_end = + get_integer_time_end(ti_current, p->time_bin); + + /* What is the next sync-point ? */ + ti_end_min = min(ti_end, ti_end_min); + ti_end_max = max(ti_end, ti_end_max); + } } - } - /* Otherwise, aggregate data from children. */ - else { + /* Loop over the g-particles in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *restrict gp = &gparts[k]; + + /* If the g-particle has no counterpart */ + if (gp->type == swift_type_dark_matter) { + + /* need to be updated ? */ + if (gpart_is_active(gp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, gp->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + + /* Get new time-step */ + const integertime_t ti_new_step = get_gpart_timestep(gp, e); + + /* Update particle */ + gp->time_bin = get_time_bin(ti_new_step); + + /* Number of updated g-particles */ + g_updated++; + + /* What is the next sync-point ? */ + ti_end_min = min(ti_current + ti_new_step, ti_end_min); + ti_end_max = max(ti_current + ti_new_step, ti_end_max); + + } else { /* gpart is inactive */ + + const integertime_t ti_end = + get_integer_time_end(ti_current, gp->time_bin); + + /* What is the next sync-point ? */ + ti_end_min = min(ti_end, ti_end_min); + ti_end_max = max(ti_end, ti_end_max); + } + } + } + + /* Loop over the star particles in this cell. */ + for (int k = 0; k < scount; k++) { + + /* Get a handle on the part. */ + struct spart *restrict sp = &sparts[k]; + + /* need to be updated ? */ + if (spart_is_active(sp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Current end of time-step */ + const integertime_t ti_end = + get_integer_time_end(ti_current, sp->time_bin); + + if (ti_end != ti_current) + error("Computing time-step of rogue particle."); +#endif + /* Get new time-step */ + const integertime_t ti_new_step = get_spart_timestep(sp, e); + + /* Update particle */ + sp->time_bin = get_time_bin(ti_new_step); + sp->gpart->time_bin = get_time_bin(ti_new_step); + + /* Number of updated s-particles */ + s_updated++; + g_updated++; + + /* What is the next sync-point ? */ + ti_end_min = min(ti_current + ti_new_step, ti_end_min); + ti_end_max = max(ti_current + ti_new_step, ti_end_max); + + } else { /* star particle is inactive */ + + const integertime_t ti_end = + get_integer_time_end(ti_current, sp->time_bin); + + /* What is the next sync-point ? */ + ti_end_min = min(ti_end, ti_end_min); + ti_end_max = max(ti_end, ti_end_max); + } + } + } else { /* Loop over the progeny. */ for (int k = 0; k < 8; k++) @@ -1000,11 +1234,12 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) { struct cell *restrict cp = c->progeny[k]; /* Recurse */ - runner_do_kick(r, cp, 0); + runner_do_timestep(r, cp, 0); /* And aggregate */ updated += cp->updated; g_updated += cp->g_updated; + s_updated += cp->s_updated; ti_end_min = min(cp->ti_end_min, ti_end_min); ti_end_max = max(cp->ti_end_max, ti_end_max); } @@ -1013,31 +1248,105 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) { /* Store the values. */ c->updated = updated; c->g_updated = g_updated; + c->s_updated = s_updated; c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - if (timer) TIMER_TOC(timer_kick); + if (timer) TIMER_TOC(timer_timestep); +} + +/** + * @brief End the force calculation of all active particles in a cell + * by multiplying the acccelerations by the relevant constants + * + * @param r The #runner thread. + * @param c The #cell. + * @param timer Are we timing this ? + */ +void runner_do_end_force(struct runner *r, struct cell *c, int timer) { + + const struct engine *e = r->e; + const int count = c->count; + const int gcount = c->gcount; + const int scount = c->scount; + struct part *restrict parts = c->parts; + struct gpart *restrict gparts = c->gparts; + struct spart *restrict sparts = c->sparts; + const double const_G = e->physical_constants->const_newton_G; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(c, e)) return; + + /* Recurse? */ + if (c->split) { + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_end_force(r, c->progeny[k], 0); + } else { + + /* Loop over the gas particles in this cell. */ + for (int k = 0; k < count; k++) { + + /* Get a handle on the part. */ + struct part *restrict p = &parts[k]; + + if (part_is_active(p, e)) { + + /* First, finish the force loop */ + hydro_end_force(p); + if (p->gpart != NULL) gravity_end_force(p->gpart, const_G); + } + } + + /* Loop over the g-particles in this cell. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the gpart. */ + struct gpart *restrict gp = &gparts[k]; + + if (gp->type == swift_type_dark_matter) { + if (gpart_is_active(gp, e)) gravity_end_force(gp, const_G); + } + } + + /* Loop over the star particles in this cell. */ + for (int k = 0; k < scount; k++) { + + /* Get a handle on the part. */ + struct spart *restrict sp = &sparts[k]; + + if (spart_is_active(sp, e)) { + + /* First, finish the force loop */ + star_end_force(sp); + gravity_end_force(sp->gpart, const_G); + } + } + } + + if (timer) TIMER_TOC(timer_endforce); } /** - * @brief Construct the cell properties from the received particles + * @brief Construct the cell properties from the received #part. * * @param r The runner thread. * @param c The cell. * @param timer Are we timing this ? */ -void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { +void runner_do_recv_part(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_MPI const struct part *restrict parts = c->parts; - const struct gpart *restrict gparts = c->gparts; const size_t nr_parts = c->count; - const size_t nr_gparts = c->gcount; - // const int ti_current = r->e->ti_current; + const integertime_t ti_current = r->e->ti_current; TIMER_TIC; - int ti_end_min = max_nr_timesteps; - int ti_end_max = 0; + integertime_t ti_end_min = max_nr_timesteps; + integertime_t ti_end_max = 0; float h_max = 0.f; /* If this cell is a leaf, collect the particle data. */ @@ -1045,39 +1354,176 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { /* Collect everything... */ for (size_t k = 0; k < nr_parts; k++) { - const int ti_end = parts[k].ti_end; - // if(ti_end < ti_current) error("Received invalid particle !"); + const integertime_t ti_end = + get_integer_time_end(ti_current, parts[k].time_bin); ti_end_min = min(ti_end_min, ti_end); ti_end_max = max(ti_end_max, ti_end); h_max = max(h_max, parts[k].h); + +#ifdef SWIFT_DEBUG_CHECKS + if (parts[k].ti_drift != ti_current) + error("Received un-drifted particle !"); +#endif } + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_recv_part(r, c->progeny[k], 0); + ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); + ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + h_max = max(h_max, c->progeny[k]->h_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_end_min, ti_current); +#endif + + /* ... and store. */ + c->ti_end_min = ti_end_min; + c->ti_end_max = ti_end_max; + c->ti_old = ti_current; + c->h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_part); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #gpart. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_MPI + + const struct gpart *restrict gparts = c->gparts; + const size_t nr_gparts = c->gcount; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_end_min = max_nr_timesteps; + integertime_t ti_end_max = 0; + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ for (size_t k = 0; k < nr_gparts; k++) { - const int ti_end = gparts[k].ti_end; - // if(ti_end < ti_current) error("Received invalid particle !"); + const integertime_t ti_end = + get_integer_time_end(ti_current, gparts[k].time_bin); ti_end_min = min(ti_end_min, ti_end); ti_end_max = max(ti_end_max, ti_end); } + } + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_recv_gpart(r, c->progeny[k], 0); + ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); + ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_end_min, ti_current); +#endif + + /* ... and store. */ + c->ti_end_min = ti_end_min; + c->ti_end_max = ti_end_max; + c->ti_old = ti_current; + + if (timer) TIMER_TOC(timer_dorecv_gpart); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #spart. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_MPI + + const struct spart *restrict sparts = c->sparts; + const size_t nr_sparts = c->scount; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_end_min = max_nr_timesteps; + integertime_t ti_end_max = 0; + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ + for (size_t k = 0; k < nr_sparts; k++) { + const integertime_t ti_end = + get_integer_time_end(ti_current, sparts[k].time_bin); + ti_end_min = min(ti_end_min, ti_end); + ti_end_max = max(ti_end_max, ti_end); + } } /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) { - runner_do_recv_cell(r, c->progeny[k], 0); + runner_do_recv_spart(r, c->progeny[k], 0); ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); - h_max = max(h_max, c->progeny[k]->h_max); } } } +#ifdef SWIFT_DEBUG_CHECKS + if (ti_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_end_min, ti_current); +#endif + /* ... and store. */ c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - c->h_max = h_max; + c->ti_old = ti_current; + + if (timer) TIMER_TOC(timer_dorecv_spart); - if (timer) TIMER_TOC(timer_dorecv_cell); +#else + error("SWIFT was not compiled with MPI support."); +#endif } /** @@ -1125,11 +1571,20 @@ void *runner_main(void *data) { /* Check that we haven't scheduled an inactive task */ #ifdef SWIFT_DEBUG_CHECKS - if (cj == NULL) { /* self */ - if (!cell_is_active(ci, e) && t->type != task_type_sort) + t->ti_run = e->ti_current; +#ifndef WITH_MPI + if (ci == NULL && cj == NULL) { + + if (t->type != task_type_grav_gather_m && t->type != task_type_grav_fft) + error("Task not associated with cells!"); + + } else if (cj == NULL) { /* self */ + + if (!cell_is_active(ci, e) && t->type != task_type_sort && + t->type != task_type_send && t->type != task_type_recv) error( - "Task (type='%s/%s') should have been skipped ti_current=%d " - "c->ti_end_min=%d", + "Task (type='%s/%s') should have been skipped ti_current=%lld " + "c->ti_end_min=%lld", taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, ci->ti_end_min); @@ -1137,25 +1592,34 @@ void *runner_main(void *data) { if (!cell_is_active(ci, e) && t->type == task_type_sort && t->flags == 0) error( - "Task (type='%s/%s') should have been skipped ti_current=%d " - "c->ti_end_min=%d t->flags=%d", + "Task (type='%s/%s') should have been skipped ti_current=%lld " + "c->ti_end_min=%lld t->flags=%d", taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, ci->ti_end_min, t->flags); } else { /* pair */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) - error( - "Task (type='%s/%s') should have been skipped ti_current=%d " - "ci->ti_end_min=%d cj->ti_end_min=%d", - taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, - ci->ti_end_min, cj->ti_end_min); + + if (t->type != task_type_send && t->type != task_type_recv) + error( + "Task (type='%s/%s') should have been skipped ti_current=%lld " + "ci->ti_end_min=%lld cj->ti_end_min=%lld", + taskID_names[t->type], subtaskID_names[t->subtype], + e->ti_current, ci->ti_end_min, cj->ti_end_min); } +#endif #endif /* Different types of tasks... */ switch (t->type) { case task_type_self: - if (t->subtype == task_subtype_density) runner_doself1_density(r, ci); + if (t->subtype == task_subtype_density) { +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + runner_doself1_density_vec(r, ci); +#else + runner_doself1_density(r, ci); +#endif + } #ifdef EXTRA_HYDRO_LOOP else if (t->subtype == task_subtype_gradient) runner_doself1_gradient(r, ci); @@ -1231,8 +1695,19 @@ void *runner_main(void *data) { runner_do_extra_ghost(r, ci, 1); break; #endif - case task_type_kick: - runner_do_kick(r, ci, 1); + case task_type_drift: + runner_do_drift(r, ci, 1); + break; + case task_type_kick1: + runner_do_kick1(r, ci, 1); + break; + case task_type_kick2: + if (!(e->policy & engine_policy_cooling)) + runner_do_end_force(r, ci, 1); + runner_do_kick2(r, ci, 1); + break; + case task_type_timestep: + runner_do_timestep(r, ci, 1); break; #ifdef WITH_MPI case task_type_send: @@ -1244,8 +1719,15 @@ void *runner_main(void *data) { if (t->subtype == task_subtype_tend) { cell_unpack_ti_ends(ci, t->buff); free(t->buff); + } else if (t->subtype == task_subtype_xv || + t->subtype == task_subtype_rho) { + runner_do_recv_part(r, ci, 1); + } else if (t->subtype == task_subtype_gpart) { + runner_do_recv_gpart(r, ci, 1); + } else if (t->subtype == task_subtype_spart) { + runner_do_recv_spart(r, ci, 1); } else { - runner_do_recv_cell(r, ci, 1); + error("Unknown/invalid task subtype (%d).", t->subtype); } break; #endif @@ -1261,6 +1743,7 @@ void *runner_main(void *data) { runner_do_grav_fft(r); break; case task_type_cooling: + if (e->policy & engine_policy_cooling) runner_do_end_force(r, ci, 1); runner_do_cooling(r, t->ci, 1); break; case task_type_sourceterms: diff --git a/src/runner.h b/src/runner.h index a8caf24248c99438f16729e2cac3e1031535f62b..53e78b00657385c7185e0730d421707c87ccf382 100644 --- a/src/runner.h +++ b/src/runner.h @@ -23,6 +23,8 @@ #ifndef SWIFT_RUNNER_H #define SWIFT_RUNNER_H +#include "cache.h" + extern const double runner_shift[13][3]; extern const char runner_flip[27]; @@ -45,17 +47,25 @@ struct runner { /*! The engine owing this runner. */ struct engine *e; + + /*! The particle cache of this runner. */ + struct cache par_cache; }; /* Function prototypes. */ void runner_do_ghost(struct runner *r, struct cell *c, int timer); void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer); void runner_do_sort(struct runner *r, struct cell *c, int flag, int clock); -void runner_do_kick(struct runner *r, struct cell *c, int timer); +void runner_do_drift(struct runner *r, struct cell *c, int timer); +void runner_do_kick1(struct runner *r, struct cell *c, int timer); +void runner_do_kick2(struct runner *r, struct cell *c, int timer); +void runner_do_end_force(struct runner *r, struct cell *c, int timer); void runner_do_init(struct runner *r, struct cell *c, int timer); void runner_do_cooling(struct runner *r, struct cell *c, int timer); void runner_do_grav_external(struct runner *r, struct cell *c, int timer); void *runner_main(void *data); +void runner_do_unskip_mapper(void *map_data, int num_elements, + void *extra_data); void runner_do_drift_mapper(void *map_data, int num_elements, void *extra_data); #endif /* SWIFT_RUNNER_H */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 6bc8f2da808cc2d953482b90e9441b833384bc75..6fa04018088a05ed0319489e88677c3ebcabd0f2 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -32,9 +32,18 @@ #define _DOPAIR2(f) PASTE(runner_dopair2, f) #define DOPAIR2 _DOPAIR2(FUNCTION) +#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f) +#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION) + +#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f) +#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION) + #define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f) #define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) +#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f) +#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION) + #define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) #define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) @@ -98,6 +107,8 @@ #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) +#include "runner_doiact_nosort.h" + /** * @brief Compute the interactions between a cell pair. * @@ -112,7 +123,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -167,7 +178,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT(r2, dx, hi, pj->h, pi, pj); @@ -197,7 +208,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -213,7 +224,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -256,7 +267,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT(r2, dx, hi, pj->h, pi, pj); @@ -286,7 +297,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -315,7 +326,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -365,7 +376,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -395,7 +406,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -422,7 +433,14 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj); + return; + } +#endif + +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -497,7 +515,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -562,7 +580,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -593,7 +611,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ } -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -616,7 +634,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts, int *restrict ind, int count) { -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -656,7 +674,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 > 0.0f && r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -686,7 +704,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -707,7 +725,14 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR1_NOSORT(r, ci, cj); + return; + } +#endif + +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -721,10 +746,8 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; -#ifdef SWIFT_DEBUG_CHECKS - cell_is_drifted(ci, e); - cell_is_drifted(cj, e); -#endif + if (!cell_is_drifted(ci, e)) cell_drift(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift(cj, e); /* Get the sort ID. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -782,10 +805,18 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -844,10 +875,18 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hjg2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); @@ -877,7 +916,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -898,7 +937,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR2_NOSORT(r, ci, cj); + return; + } +#endif + +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -918,10 +964,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; -#ifdef SWIFT_DEBUG_CHECKS - cell_is_drifted(ci, e); - cell_is_drifted(cj, e); -#endif + if (!cell_is_drifted(ci, e)) error("Cell ci not drifted"); + if (!cell_is_drifted(cj, e)) error("Cell cj not drifted"); /* Get the shift ID. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -1012,10 +1056,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1063,10 +1115,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pj need to be updated too? */ if (part_is_active(pj, e)) @@ -1156,10 +1216,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, hj, pi, pj); @@ -1206,10 +1274,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pi need to be updated too? */ if (part_is_active(pi, e)) @@ -1267,7 +1343,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -1294,7 +1370,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1313,9 +1389,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; -#ifdef SWIFT_DEBUG_CHECKS - cell_is_drifted(c, e); -#endif + if (!cell_is_drifted(c, e)) cell_drift(c, e); struct part *restrict parts = c->parts; const int count = c->count; @@ -1354,6 +1428,14 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { struct part *restrict pj = &parts[indt[pjd]]; const float hj = pj->h; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Compute the pairwise distance. */ float r2 = 0.0f; float dx[3]; @@ -1365,7 +1447,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1418,10 +1500,18 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { const int doj = (part_is_active(pj, e)) && (r2 < hj * hj * kernel_gamma2); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hig2 || doj) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Which parts need to be updated? */ if (r2 < hig2 && doj) @@ -1504,7 +1594,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { } /* loop over all particles. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -1529,7 +1619,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1548,9 +1638,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; -#ifdef SWIFT_DEBUG_CHECKS - cell_is_drifted(c, e); -#endif + if (!cell_is_drifted(c, e)) error("Cell is not drifted"); struct part *restrict parts = c->parts; const int count = c->count; @@ -1597,10 +1685,18 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1651,10 +1747,18 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pj need to be updated too? */ if (part_is_active(pj, e)) @@ -1712,7 +1816,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { } /* loop over all particles. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -2007,8 +2111,14 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { } /* Otherwise, compute self-interaction. */ - else + else { +#if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \ + defined(GADGET2_SPH) + runner_doself1_density_vec(r, ci); +#else DOSELF1(r, ci); +#endif + } if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF); } diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 59a5ae496680390c23458bde65b4bba321ffe7a1..9d2606ceb06fd6d32592010376e867a6ae582bf0 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -25,8 +25,6 @@ #include "gravity.h" #include "part.h" -#define ICHECK -1000 - /** * @brief Compute the recursive upward sweep, i.e. construct the * multipoles in a cell hierarchy. diff --git a/src/runner_doiact_nosort.h b/src/runner_doiact_nosort.h new file mode 100644 index 0000000000000000000000000000000000000000..d38f01c6955e2ee9848698d2b46d3f4a14ad0873 --- /dev/null +++ b/src/runner_doiact_nosort.h @@ -0,0 +1,305 @@ + +/** + * @brief Compute the interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void DOPAIR1_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + if (!cell_is_drifted(ci, e)) cell_drift(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift(cj, e); + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + space_getsid(e->s, &ci, &cj, shift); + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + if (!part_is_active(pi, e)) continue; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pj = &parts_j[pjd]; + if (!part_is_active(pj, e)) continue; + const float hj = pj->h; + + double pjx[3]; + for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + const float hjg2 = hj * hj * kernel_gamma2; + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pi = &parts_i[pid]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pjx[k] - pi->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hjg2) { + IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); + } + + } /* loop over the parts in ci. */ + + } /* loop over the parts in cj. */ + + TIMER_TOC(TIMER_DOPAIR); +} + +/** + * @brief Compute the interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void DOPAIR2_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + if (!cell_is_drifted(ci, e)) cell_drift(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift(cj, e); + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + space_getsid(e->s, &ci, &cj, shift); + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + if (!part_is_active(pi, e)) continue; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + const float hjg2 = pj->h * pj->h * kernel_gamma2; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2 || r2 < hjg2) { + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pj = &parts_j[pjd]; + if (!part_is_active(pj, e)) continue; + const float hj = pj->h; + + double pjx[3]; + for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + const float hjg2 = hj * hj * kernel_gamma2; + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pi = &parts_i[pid]; + const float hig2 = pi->h * pi->h * kernel_gamma2; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pjx[k] - pi->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hjg2 || r2 < hig2) { + IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); + } + + } /* loop over the parts in ci. */ + + } /* loop over the parts in cj. */ + + TIMER_TOC(TIMER_DOPAIR); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param parts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param count The number of particles in @c ind. + * @param cj The second #cell. + */ +void DOPAIR_SUBSET_NOSORT(struct runner *r, struct cell *restrict ci, + struct part *restrict parts_i, int *restrict ind, + int count, struct cell *restrict cj) { + + struct engine *e = r->e; + + TIMER_TIC; + + const int count_j = cj->count; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the parts_i. */ + for (int pid = 0; pid < count; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[ind[pid]]; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; + + if (!part_is_active(pi, e)) + error("Trying to correct smoothing length of inactive particle !"); + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ + + TIMER_TOC(timer_dopair_subset); +} diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c new file mode 100644 index 0000000000000000000000000000000000000000..b91d288529c0706693d74b0c54d688ee0944aa29 --- /dev/null +++ b/src/runner_doiact_vec.c @@ -0,0 +1,874 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +#include "active.h" + +/* This object's header. */ +#include "runner_doiact_vec.h" + +#ifdef WITH_VECTORIZATION +/** + * @brief Compute the vector remainder interactions from the secondary cache. + * + * @param int_cache (return) secondary #cache of interactions between two + * particles. + * @param icount Interaction count. + * @param rhoSum (return) #vector holding the cumulative sum of the density + * update on pi. + * @param rho_dhSum (return) #vector holding the cumulative sum of the density + * gradient update on pi. + * @param wcountSum (return) #vector holding the cumulative sum of the wcount + * update on pi. + * @param wcount_dhSum (return) #vector holding the cumulative sum of the wcount + * gradient update on pi. + * @param div_vSum (return) #vector holding the cumulative sum of the divergence + * update on pi. + * @param curlvxSum (return) #vector holding the cumulative sum of the curl of + * vx update on pi. + * @param curlvySum (return) #vector holding the cumulative sum of the curl of + * vy update on pi. + * @param curlvzSum (return) #vector holding the cumulative sum of the curl of + * vz update on pi. + * @param v_hi_inv #vector of 1/h for pi. + * @param v_vix #vector of x velocity of pi. + * @param v_viy #vector of y velocity of pi. + * @param v_viz #vector of z velocity of pi. + * @param icount_align (return) Interaction count after the remainder + * interactions have been performed, should be a multiple of the vector length. + */ +__attribute__((always_inline)) INLINE static void calcRemInteractions( + struct c2_cache *const int_cache, const int icount, vector *rhoSum, + vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum, + vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum, + vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz, + int *icount_align) { + +#ifdef HAVE_AVX512_F + KNL_MASK_16 knl_mask, knl_mask2; +#endif + vector int_mask, int_mask2; + + /* Work out the number of remainder interactions and pad secondary cache. */ + *icount_align = icount; + int rem = icount % (NUM_VEC_PROC * VEC_SIZE); + if (rem != 0) { + int pad = (NUM_VEC_PROC * VEC_SIZE) - rem; + *icount_align += pad; + +/* Initialise masks to true. */ +#ifdef HAVE_AVX512_F + knl_mask = 0xFFFF; + knl_mask2 = 0xFFFF; + int_mask.m = vec_setint1(0xFFFFFFFF); + int_mask2.m = vec_setint1(0xFFFFFFFF); +#else + int_mask.m = vec_setint1(0xFFFFFFFF); + int_mask2.m = vec_setint1(0xFFFFFFFF); +#endif + /* Pad secondary cache so that there are no contributions in the interaction + * function. */ + for (int i = icount; i < *icount_align; i++) { + int_cache->mq[i] = 0.f; + int_cache->r2q[i] = 1.f; + int_cache->dxq[i] = 0.f; + int_cache->dyq[i] = 0.f; + int_cache->dzq[i] = 0.f; + int_cache->vxq[i] = 0.f; + int_cache->vyq[i] = 0.f; + int_cache->vzq[i] = 0.f; + } + + /* Zero parts of mask that represent the padded values.*/ + if (pad < VEC_SIZE) { +#ifdef HAVE_AVX512_F + knl_mask2 = knl_mask2 >> pad; +#else + for (int i = VEC_SIZE - pad; i < VEC_SIZE; i++) int_mask2.i[i] = 0; +#endif + } else { +#ifdef HAVE_AVX512_F + knl_mask = knl_mask >> (VEC_SIZE - rem); + knl_mask2 = 0; +#else + for (int i = rem; i < VEC_SIZE; i++) int_mask.i[i] = 0; + int_mask2.v = vec_setzero(); +#endif + } + + /* Perform remainder interaction and remove remainder from aligned + * interaction count. */ + *icount_align = icount - rem; + runner_iact_nonsym_2_vec_density( + &int_cache->r2q[*icount_align], &int_cache->dxq[*icount_align], + &int_cache->dyq[*icount_align], &int_cache->dzq[*icount_align], + v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[*icount_align], + &int_cache->vyq[*icount_align], &int_cache->vzq[*icount_align], + &int_cache->mq[*icount_align], rhoSum, rho_dhSum, wcountSum, + wcount_dhSum, div_vSum, curlvxSum, curlvySum, curlvzSum, int_mask, + int_mask2, +#ifdef HAVE_AVX512_F + knl_mask, knl_mask2); +#else + 0, 0); +#endif + } +} + +/** + * @brief Left-packs the values needed by an interaction into the secondary + * cache (Supports AVX, AVX2 and AVX512 instruction sets). + * + * @param mask Contains which particles need to interact. + * @param pjd Index of the particle to store into. + * @param v_r2 #vector of the separation between two particles squared. + * @param v_dx #vector of the x separation between two particles. + * @param v_dy #vector of the y separation between two particles. + * @param v_dz #vector of the z separation between two particles. + * @param v_mj #vector of the mass of particle pj. + * @param v_vjx #vector of x velocity of pj. + * @param v_vjy #vector of y velocity of pj. + * @param v_vjz #vector of z velocity of pj. + * @param cell_cache #cache of all particles in the cell. + * @param int_cache (return) secondary #cache of interactions between two + * particles. + * @param icount Interaction count. + * @param rhoSum #vector holding the cumulative sum of the density update on pi. + * @param rho_dhSum #vector holding the cumulative sum of the density gradient + * update on pi. + * @param wcountSum #vector holding the cumulative sum of the wcount update on + * pi. + * @param wcount_dhSum #vector holding the cumulative sum of the wcount gradient + * update on pi. + * @param div_vSum #vector holding the cumulative sum of the divergence update + * on pi. + * @param curlvxSum #vector holding the cumulative sum of the curl of vx update + * on pi. + * @param curlvySum #vector holding the cumulative sum of the curl of vy update + * on pi. + * @param curlvzSum #vector holding the cumulative sum of the curl of vz update + * on pi. + * @param v_hi_inv #vector of 1/h for pi. + * @param v_vix #vector of x velocity of pi. + * @param v_viy #vector of y velocity of pi. + * @param v_viz #vector of z velocity of pi. + */ +__attribute__((always_inline)) INLINE static void storeInteractions( + const int mask, const int pjd, vector *v_r2, vector *v_dx, vector *v_dy, + vector *v_dz, vector *v_mj, vector *v_vjx, vector *v_vjy, vector *v_vjz, + const struct cache *const cell_cache, struct c2_cache *const int_cache, + int *icount, vector *rhoSum, vector *rho_dhSum, vector *wcountSum, + vector *wcount_dhSum, vector *div_vSum, vector *curlvxSum, + vector *curlvySum, vector *curlvzSum, vector v_hi_inv, vector v_vix, + vector v_viy, vector v_viz) { + +/* Left-pack values needed into the secondary cache using the interaction mask. + */ +#if defined(HAVE_AVX2) || defined(HAVE_AVX512_F) + int pack = 0; + +#ifdef HAVE_AVX512_F + pack += __builtin_popcount(mask); + VEC_LEFT_PACK(v_r2->v, mask, &int_cache->r2q[*icount]); + VEC_LEFT_PACK(v_dx->v, mask, &int_cache->dxq[*icount]); + VEC_LEFT_PACK(v_dy->v, mask, &int_cache->dyq[*icount]); + VEC_LEFT_PACK(v_dz->v, mask, &int_cache->dzq[*icount]); + VEC_LEFT_PACK(v_mj->v, mask, &int_cache->mq[*icount]); + VEC_LEFT_PACK(v_vjx->v, mask, &int_cache->vxq[*icount]); + VEC_LEFT_PACK(v_vjy->v, mask, &int_cache->vyq[*icount]); + VEC_LEFT_PACK(v_vjz->v, mask, &int_cache->vzq[*icount]); +#else + vector v_mask; + VEC_FORM_PACKED_MASK(mask, v_mask.m, pack); + + VEC_LEFT_PACK(v_r2->v, v_mask.m, &int_cache->r2q[*icount]); + VEC_LEFT_PACK(v_dx->v, v_mask.m, &int_cache->dxq[*icount]); + VEC_LEFT_PACK(v_dy->v, v_mask.m, &int_cache->dyq[*icount]); + VEC_LEFT_PACK(v_dz->v, v_mask.m, &int_cache->dzq[*icount]); + VEC_LEFT_PACK(v_mj->v, v_mask.m, &int_cache->mq[*icount]); + VEC_LEFT_PACK(v_vjx->v, v_mask.m, &int_cache->vxq[*icount]); + VEC_LEFT_PACK(v_vjy->v, v_mask.m, &int_cache->vyq[*icount]); + VEC_LEFT_PACK(v_vjz->v, v_mask.m, &int_cache->vzq[*icount]); + +#endif /* HAVE_AVX512_F */ + + (*icount) += pack; +#else + /* Quicker to do it serially in AVX rather than use intrinsics. */ + for (int bit_index = 0; bit_index < VEC_SIZE; bit_index++) { + if (mask & (1 << bit_index)) { + /* Add this interaction to the queue. */ + int_cache->r2q[*icount] = v_r2->f[bit_index]; + int_cache->dxq[*icount] = v_dx->f[bit_index]; + int_cache->dyq[*icount] = v_dy->f[bit_index]; + int_cache->dzq[*icount] = v_dz->f[bit_index]; + int_cache->mq[*icount] = cell_cache->m[pjd + bit_index]; + int_cache->vxq[*icount] = cell_cache->vx[pjd + bit_index]; + int_cache->vyq[*icount] = cell_cache->vy[pjd + bit_index]; + int_cache->vzq[*icount] = cell_cache->vz[pjd + bit_index]; + + (*icount)++; + } + } + +#endif /* defined(HAVE_AVX2) || defined(HAVE_AVX512_F) */ + + /* Flush the c2 cache if it has reached capacity. */ + if (*icount >= (C2_CACHE_SIZE - (NUM_VEC_PROC * VEC_SIZE))) { + + int icount_align = *icount; + + /* Peform remainder interactions. */ + calcRemInteractions(int_cache, *icount, rhoSum, rho_dhSum, wcountSum, + wcount_dhSum, div_vSum, curlvxSum, curlvySum, curlvzSum, + v_hi_inv, v_vix, v_viy, v_viz, &icount_align); + + vector int_mask, int_mask2; + int_mask.m = vec_setint1(0xFFFFFFFF); + int_mask2.m = vec_setint1(0xFFFFFFFF); + + /* Perform interactions. */ + for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) { + runner_iact_nonsym_2_vec_density( + &int_cache->r2q[pjd], &int_cache->dxq[pjd], &int_cache->dyq[pjd], + &int_cache->dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz, + &int_cache->vxq[pjd], &int_cache->vyq[pjd], &int_cache->vzq[pjd], + &int_cache->mq[pjd], rhoSum, rho_dhSum, wcountSum, wcount_dhSum, + div_vSum, curlvxSum, curlvySum, curlvzSum, int_mask, int_mask2, 0, 0); + } + + /* Reset interaction count. */ + *icount = 0; + } +} +#endif /* WITH_VECTORIZATION */ + +/** + * @brief Compute the cell self-interaction (non-symmetric) using vector + * intrinsics with one particle pi at a time. + * + * @param r The #runner. + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE void runner_doself1_density_vec( + struct runner *r, struct cell *restrict c) { + +#ifdef WITH_VECTORIZATION + const struct engine *e = r->e; + int doi_mask; + struct part *restrict pi; + int count_align; + int num_vec_proc = NUM_VEC_PROC; + + struct part *restrict parts = c->parts; + const int count = c->count; + + vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2; + + TIMER_TIC + + if (!cell_is_active(c, e)) return; + + if (!cell_is_drifted(c, e)) cell_drift(c, e); + + /* Get the particle cache from the runner and re-allocate + * the cache if it is not big enough for the cell. */ + struct cache *restrict cell_cache = &r->par_cache; + + if (cell_cache->count < count) { + cache_init(cell_cache, count); + } + + /* Read the particles from the cell and store them locally in the cache. */ + cache_read_particles(c, cell_cache); + + /* Create secondary cache to store particle interactions. */ + struct c2_cache int_cache; + int icount = 0, icount_align = 0; + + /* Loop over the particles in the cell. */ + for (int pid = 0; pid < count; pid++) { + + /* Get a pointer to the ith particle. */ + pi = &parts[pid]; + + /* Is the ith particle active? */ + if (!part_is_active(pi, e)) continue; + + vector pix, piy, piz; + + const float hi = cell_cache->h[pid]; + + /* Fill particle pi vectors. */ + pix.v = vec_set1(cell_cache->x[pid]); + piy.v = vec_set1(cell_cache->y[pid]); + piz.v = vec_set1(cell_cache->z[pid]); + v_hi.v = vec_set1(hi); + v_vix.v = vec_set1(cell_cache->vx[pid]); + v_viy.v = vec_set1(cell_cache->vy[pid]); + v_viz.v = vec_set1(cell_cache->vz[pid]); + + const float hig2 = hi * hi * kernel_gamma2; + v_hig2.v = vec_set1(hig2); + + /* Reset cumulative sums of update vectors. */ + vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, + curlvySum, curlvzSum; + + /* Get the inverse of hi. */ + vector v_hi_inv; + + v_hi_inv = vec_reciprocal(v_hi); + + rhoSum.v = vec_setzero(); + rho_dhSum.v = vec_setzero(); + wcountSum.v = vec_setzero(); + wcount_dhSum.v = vec_setzero(); + div_vSum.v = vec_setzero(); + curlvxSum.v = vec_setzero(); + curlvySum.v = vec_setzero(); + curlvzSum.v = vec_setzero(); + + /* Pad cache if there is a serial remainder. */ + count_align = count; + int rem = count % (num_vec_proc * VEC_SIZE); + if (rem != 0) { + int pad = (num_vec_proc * VEC_SIZE) - rem; + + count_align += pad; + /* Set positions to the same as particle pi so when the r2 > 0 mask is + * applied these extra contributions are masked out.*/ + for (int i = count; i < count_align; i++) { + cell_cache->x[i] = pix.f[0]; + cell_cache->y[i] = piy.f[0]; + cell_cache->z[i] = piz.f[0]; + } + } + + vector pjx, pjy, pjz; + vector pjvx, pjvy, pjvz, mj; + vector pjx2, pjy2, pjz2; + vector pjvx2, pjvy2, pjvz2, mj2; + + /* Find all of particle pi's interacions and store needed values in the + * secondary cache.*/ + for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) { + + /* Load 2 sets of vectors from the particle cache. */ + pjx.v = vec_load(&cell_cache->x[pjd]); + pjy.v = vec_load(&cell_cache->y[pjd]); + pjz.v = vec_load(&cell_cache->z[pjd]); + pjvx.v = vec_load(&cell_cache->vx[pjd]); + pjvy.v = vec_load(&cell_cache->vy[pjd]); + pjvz.v = vec_load(&cell_cache->vz[pjd]); + mj.v = vec_load(&cell_cache->m[pjd]); + + pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]); + pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]); + pjz2.v = vec_load(&cell_cache->z[pjd + VEC_SIZE]); + pjvx2.v = vec_load(&cell_cache->vx[pjd + VEC_SIZE]); + pjvy2.v = vec_load(&cell_cache->vy[pjd + VEC_SIZE]); + pjvz2.v = vec_load(&cell_cache->vz[pjd + VEC_SIZE]); + mj2.v = vec_load(&cell_cache->m[pjd + VEC_SIZE]); + + /* Compute the pairwise distance. */ + vector v_dx_tmp, v_dy_tmp, v_dz_tmp; + vector v_dx_tmp2, v_dy_tmp2, v_dz_tmp2, v_r2_2; + + v_dx_tmp.v = vec_sub(pix.v, pjx.v); + v_dy_tmp.v = vec_sub(piy.v, pjy.v); + v_dz_tmp.v = vec_sub(piz.v, pjz.v); + v_dx_tmp2.v = vec_sub(pix.v, pjx2.v); + v_dy_tmp2.v = vec_sub(piy.v, pjy2.v); + v_dz_tmp2.v = vec_sub(piz.v, pjz2.v); + + v_r2.v = vec_mul(v_dx_tmp.v, v_dx_tmp.v); + v_r2.v = vec_fma(v_dy_tmp.v, v_dy_tmp.v, v_r2.v); + v_r2.v = vec_fma(v_dz_tmp.v, v_dz_tmp.v, v_r2.v); + v_r2_2.v = vec_mul(v_dx_tmp2.v, v_dx_tmp2.v); + v_r2_2.v = vec_fma(v_dy_tmp2.v, v_dy_tmp2.v, v_r2_2.v); + v_r2_2.v = vec_fma(v_dz_tmp2.v, v_dz_tmp2.v, v_r2_2.v); + +/* Form a mask from r2 < hig2 and r2 > 0.*/ +#ifdef HAVE_AVX512_F + // KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check; + KNL_MASK_16 doi_mask_check, doi_mask2, doi_mask2_check; + + doi_mask_check = vec_cmp_gt(v_r2.v, vec_setzero()); + doi_mask = vec_cmp_lt(v_r2.v, v_hig2.v); + + doi_mask2_check = vec_cmp_gt(v_r2_2.v, vec_setzero()); + doi_mask2 = vec_cmp_lt(v_r2_2.v, v_hig2.v); + + doi_mask = doi_mask & doi_mask_check; + doi_mask2 = doi_mask2 & doi_mask2_check; + +#else + vector v_doi_mask, v_doi_mask_check, v_doi_mask2, v_doi_mask2_check; + int doi_mask2; + + /* Form r2 > 0 mask and r2 < hig2 mask. */ + v_doi_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero()); + v_doi_mask.v = vec_cmp_lt(v_r2.v, v_hig2.v); + + /* Form r2 > 0 mask and r2 < hig2 mask. */ + v_doi_mask2_check.v = vec_cmp_gt(v_r2_2.v, vec_setzero()); + v_doi_mask2.v = vec_cmp_lt(v_r2_2.v, v_hig2.v); + + /* Combine two masks and form integer mask. */ + doi_mask = vec_cmp_result(vec_and(v_doi_mask.v, v_doi_mask_check.v)); + doi_mask2 = vec_cmp_result(vec_and(v_doi_mask2.v, v_doi_mask2_check.v)); +#endif /* HAVE_AVX512_F */ + + /* If there are any interactions left pack interaction values into c2 + * cache. */ + if (doi_mask) { + storeInteractions(doi_mask, pjd, &v_r2, &v_dx_tmp, &v_dy_tmp, &v_dz_tmp, + &mj, &pjvx, &pjvy, &pjvz, cell_cache, &int_cache, + &icount, &rhoSum, &rho_dhSum, &wcountSum, + &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, + &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz); + } + if (doi_mask2) { + storeInteractions( + doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_tmp2, &v_dy_tmp2, + &v_dz_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2, cell_cache, &int_cache, + &icount, &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, &div_vSum, + &curlvxSum, &curlvySum, &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz); + } + } + + /* Perform padded vector remainder interactions if any are present. */ + calcRemInteractions(&int_cache, icount, &rhoSum, &rho_dhSum, &wcountSum, + &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, + &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz, + &icount_align); + + /* Initialise masks to true in case remainder interactions have been + * performed. */ + vector int_mask, int_mask2; +#ifdef HAVE_AVX512_F + KNL_MASK_16 knl_mask = 0xFFFF; + KNL_MASK_16 knl_mask2 = 0xFFFF; + int_mask.m = vec_setint1(0xFFFFFFFF); + int_mask2.m = vec_setint1(0xFFFFFFFF); +#else + int_mask.m = vec_setint1(0xFFFFFFFF); + int_mask2.m = vec_setint1(0xFFFFFFFF); +#endif + + /* Perform interaction with 2 vectors. */ + for (int pjd = 0; pjd < icount_align; pjd += (num_vec_proc * VEC_SIZE)) { + runner_iact_nonsym_2_vec_density( + &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd], + &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz, + &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd], + &int_cache.mq[pjd], &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, + &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2, +#ifdef HAVE_AVX512_F + knl_mask, knl_mask2); +#else + 0, 0); +#endif + } + + /* Perform horizontal adds on vector sums and store result in particle pi. + */ + VEC_HADD(rhoSum, pi->rho); + VEC_HADD(rho_dhSum, pi->density.rho_dh); + VEC_HADD(wcountSum, pi->density.wcount); + VEC_HADD(wcount_dhSum, pi->density.wcount_dh); + VEC_HADD(div_vSum, pi->density.div_v); + VEC_HADD(curlvxSum, pi->density.rot_v[0]); + VEC_HADD(curlvySum, pi->density.rot_v[1]); + VEC_HADD(curlvzSum, pi->density.rot_v[2]); + + /* Reset interaction count. */ + icount = 0; + } /* loop over all particles. */ + + TIMER_TOC(timer_doself_density); +#endif /* WITH_VECTORIZATION */ +} + +/** + * @brief Compute the cell self-interaction (non-symmetric) using vector + * intrinsics with two particle pis at a time. + * + * CURRENTLY BROKEN DO NOT USE. + * + * @param r The #runner. + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE void runner_doself1_density_vec_2( + struct runner *r, struct cell *restrict c) { + +#ifdef WITH_VECTORIZATION + const struct engine *e = r->e; + int doi_mask; + int doi2_mask; + struct part *restrict pi; + struct part *restrict pi2; + int count_align; + + vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2; + vector v_hi2, v_vix2, v_viy2, v_viz2, v_hig2_2, v2_r2; + + TIMER_TIC + + if (!cell_is_active(c, e)) return; + + if (!cell_is_drifted(c, e)) cell_drift(c, e); + + /* TODO: Need to find two active particles, not just one. */ + + struct part *restrict parts = c->parts; + const int count = c->count; + + /* Get the particle cache from the runner and re-allocate + * the cache if it is not big enough for the cell. */ + struct cache *restrict cell_cache = &r->par_cache; + + if (cell_cache->count < count) { + cache_init(cell_cache, count); + } + + /* Read the particles from the cell and store them locally in the cache. */ + cache_read_particles(c, &r->par_cache); + + /* Create two secondary caches. */ + int icount = 0, icount_align = 0; + struct c2_cache int_cache; + + int icount2 = 0, icount_align2 = 0; + struct c2_cache int_cache2; + + /* Loop over the particles in the cell. */ + for (int pid = 0; pid < count; pid += 2) { + + /* Get a pointer to the ith particle and next i particle. */ + pi = &parts[pid]; + pi2 = &parts[pid + 1]; + + /* Is the ith particle active? */ + if (!part_is_active(pi, e)) continue; + + vector pix, piy, piz; + vector pix2, piy2, piz2; + + const float hi = cell_cache->h[pid]; + const float hi2 = cell_cache->h[pid + 1]; + + /* Fill pi position vector. */ + pix.v = vec_set1(cell_cache->x[pid]); + piy.v = vec_set1(cell_cache->y[pid]); + piz.v = vec_set1(cell_cache->z[pid]); + v_hi.v = vec_set1(hi); + v_vix.v = vec_set1(cell_cache->vx[pid]); + v_viy.v = vec_set1(cell_cache->vy[pid]); + v_viz.v = vec_set1(cell_cache->vz[pid]); + + pix2.v = vec_set1(cell_cache->x[pid + 1]); + piy2.v = vec_set1(cell_cache->y[pid + 1]); + piz2.v = vec_set1(cell_cache->z[pid + 1]); + v_hi2.v = vec_set1(hi2); + v_vix2.v = vec_set1(cell_cache->vx[pid + 1]); + v_viy2.v = vec_set1(cell_cache->vy[pid + 1]); + v_viz2.v = vec_set1(cell_cache->vz[pid + 1]); + + const float hig2 = hi * hi * kernel_gamma2; + const float hig2_2 = hi2 * hi2 * kernel_gamma2; + v_hig2.v = vec_set1(hig2); + v_hig2_2.v = vec_set1(hig2_2); + + vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, + curlvySum, curlvzSum; + vector rhoSum2, rho_dhSum2, wcountSum2, wcount_dhSum2, div_vSum2, + curlvxSum2, curlvySum2, curlvzSum2; + + vector v_hi_inv, v_hi_inv2; + + v_hi_inv = vec_reciprocal(v_hi); + v_hi_inv2 = vec_reciprocal(v_hi2); + + rhoSum.v = vec_setzero(); + rho_dhSum.v = vec_setzero(); + wcountSum.v = vec_setzero(); + wcount_dhSum.v = vec_setzero(); + div_vSum.v = vec_setzero(); + curlvxSum.v = vec_setzero(); + curlvySum.v = vec_setzero(); + curlvzSum.v = vec_setzero(); + + rhoSum2.v = vec_setzero(); + rho_dhSum2.v = vec_setzero(); + wcountSum2.v = vec_setzero(); + wcount_dhSum2.v = vec_setzero(); + div_vSum2.v = vec_setzero(); + curlvxSum2.v = vec_setzero(); + curlvySum2.v = vec_setzero(); + curlvzSum2.v = vec_setzero(); + + /* Pad cache if there is a serial remainder. */ + count_align = count; + int rem = count % (NUM_VEC_PROC * VEC_SIZE); + if (rem != 0) { + int pad = (NUM_VEC_PROC * VEC_SIZE) - rem; + + count_align += pad; + /* Set positions to the same as particle pi so when the r2 > 0 mask is + * applied these extra contributions are masked out.*/ + for (int i = count; i < count_align; i++) { + cell_cache->x[i] = pix.f[0]; + cell_cache->y[i] = piy.f[0]; + cell_cache->z[i] = piz.f[0]; + } + } + + vector pjx, pjy, pjz; + vector pjvx, pjvy, pjvz, mj; + vector pjx2, pjy2, pjz2; + vector pjvx2, pjvy2, pjvz2, mj2; + + /* Find all of particle pi's interacions and store needed values in + * secondary cache.*/ + for (int pjd = 0; pjd < count_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) { + + /* Load 2 sets of vectors from the particle cache. */ + pjx.v = vec_load(&cell_cache->x[pjd]); + pjy.v = vec_load(&cell_cache->y[pjd]); + pjz.v = vec_load(&cell_cache->z[pjd]); + pjvx.v = vec_load(&cell_cache->vx[pjd]); + pjvy.v = vec_load(&cell_cache->vy[pjd]); + pjvz.v = vec_load(&cell_cache->vz[pjd]); + mj.v = vec_load(&cell_cache->m[pjd]); + + pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]); + pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]); + pjz2.v = vec_load(&cell_cache->z[pjd + VEC_SIZE]); + pjvx2.v = vec_load(&cell_cache->vx[pjd + VEC_SIZE]); + pjvy2.v = vec_load(&cell_cache->vy[pjd + VEC_SIZE]); + pjvz2.v = vec_load(&cell_cache->vz[pjd + VEC_SIZE]); + mj2.v = vec_load(&cell_cache->m[pjd + VEC_SIZE]); + + /* Compute the pairwise distance. */ + vector v_dx_tmp, v_dy_tmp, v_dz_tmp; + vector v_dx_tmp2, v_dy_tmp2, v_dz_tmp2, v_r2_2; + vector v_dx2_tmp, v_dy2_tmp, v_dz2_tmp; + vector v_dx2_tmp2, v_dy2_tmp2, v_dz2_tmp2, v2_r2_2; + + v_dx_tmp.v = vec_sub(pix.v, pjx.v); + v_dy_tmp.v = vec_sub(piy.v, pjy.v); + v_dz_tmp.v = vec_sub(piz.v, pjz.v); + v_dx_tmp2.v = vec_sub(pix.v, pjx2.v); + v_dy_tmp2.v = vec_sub(piy.v, pjy2.v); + v_dz_tmp2.v = vec_sub(piz.v, pjz2.v); + + v_dx2_tmp.v = vec_sub(pix2.v, pjx.v); + v_dy2_tmp.v = vec_sub(piy2.v, pjy.v); + v_dz2_tmp.v = vec_sub(piz2.v, pjz.v); + v_dx2_tmp2.v = vec_sub(pix2.v, pjx2.v); + v_dy2_tmp2.v = vec_sub(piy2.v, pjy2.v); + v_dz2_tmp2.v = vec_sub(piz2.v, pjz2.v); + + v_r2.v = vec_mul(v_dx_tmp.v, v_dx_tmp.v); + v_r2.v = vec_fma(v_dy_tmp.v, v_dy_tmp.v, v_r2.v); + v_r2.v = vec_fma(v_dz_tmp.v, v_dz_tmp.v, v_r2.v); + v_r2_2.v = vec_mul(v_dx_tmp2.v, v_dx_tmp2.v); + v_r2_2.v = vec_fma(v_dy_tmp2.v, v_dy_tmp2.v, v_r2_2.v); + v_r2_2.v = vec_fma(v_dz_tmp2.v, v_dz_tmp2.v, v_r2_2.v); + + v2_r2.v = vec_mul(v_dx2_tmp.v, v_dx2_tmp.v); + v2_r2.v = vec_fma(v_dy2_tmp.v, v_dy2_tmp.v, v2_r2.v); + v2_r2.v = vec_fma(v_dz2_tmp.v, v_dz2_tmp.v, v2_r2.v); + v2_r2_2.v = vec_mul(v_dx2_tmp2.v, v_dx2_tmp2.v); + v2_r2_2.v = vec_fma(v_dy2_tmp2.v, v_dy2_tmp2.v, v2_r2_2.v); + v2_r2_2.v = vec_fma(v_dz2_tmp2.v, v_dz2_tmp2.v, v2_r2_2.v); + +/* Form a mask from r2 < hig2 and r2 > 0.*/ +#ifdef HAVE_AVX512_F + // KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check; + KNL_MASK_16 doi_mask_check, doi_mask2, doi_mask2_check; + KNL_MASK_16 doi2_mask_check, doi2_mask2, doi2_mask2_check; + + doi_mask_check = vec_cmp_gt(v_r2.v, vec_setzero()); + doi_mask = vec_cmp_lt(v_r2.v, v_hig2.v); + + doi2_mask_check = vec_cmp_gt(v2_r2.v, vec_setzero()); + doi2_mask = vec_cmp_lt(v2_r2.v, v_hig2_2.v); + + doi_mask2_check = vec_cmp_gt(v_r2_2.v, vec_setzero()); + doi_mask2 = vec_cmp_lt(v_r2_2.v, v_hig2.v); + + doi2_mask2_check = vec_cmp_gt(v2_r2_2.v, vec_setzero()); + doi2_mask2 = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v); + + doi_mask = doi_mask & doi_mask_check; + doi_mask2 = doi_mask2 & doi_mask2_check; + + doi2_mask = doi2_mask & doi2_mask_check; + doi2_mask2 = doi2_mask2 & doi2_mask2_check; +#else + vector v_doi_mask, v_doi_mask_check, v_doi_mask2, v_doi_mask2_check; + int doi_mask2; + + vector v_doi2_mask, v_doi2_mask_check, v_doi2_mask2, v_doi2_mask2_check; + int doi2_mask2; + + v_doi_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero()); + v_doi_mask.v = vec_cmp_lt(v_r2.v, v_hig2.v); + + v_doi2_mask_check.v = vec_cmp_gt(v2_r2.v, vec_setzero()); + v_doi2_mask.v = vec_cmp_lt(v2_r2.v, v_hig2_2.v); + + v_doi_mask2_check.v = vec_cmp_gt(v_r2_2.v, vec_setzero()); + v_doi_mask2.v = vec_cmp_lt(v_r2_2.v, v_hig2.v); + + v_doi2_mask2_check.v = vec_cmp_gt(v2_r2_2.v, vec_setzero()); + v_doi2_mask2.v = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v); + + doi_mask = vec_cmp_result(vec_and(v_doi_mask.v, v_doi_mask_check.v)); + doi_mask2 = vec_cmp_result(vec_and(v_doi_mask2.v, v_doi_mask2_check.v)); + doi2_mask = vec_cmp_result(vec_and(v_doi2_mask.v, v_doi2_mask_check.v)); + doi2_mask2 = + vec_cmp_result(vec_and(v_doi2_mask2.v, v_doi2_mask2_check.v)); +#endif /* HAVE_AVX512_F */ + + /* Hit or miss? */ + // if (doi_mask) { + storeInteractions(doi_mask, pjd, &v_r2, &v_dx_tmp, &v_dy_tmp, &v_dz_tmp, + &mj, &pjvx, &pjvy, &pjvz, cell_cache, &int_cache, + &icount, &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, + &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, v_hi_inv, + v_vix, v_viy, v_viz); + //} + // if (doi2_mask) { + storeInteractions( + doi2_mask, pjd, &v2_r2, &v_dx2_tmp, &v_dy2_tmp, &v_dz2_tmp, &mj, + &pjvx, &pjvy, &pjvz, cell_cache, &int_cache2, &icount2, &rhoSum2, + &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2, + &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2, v_viz2); + //} + /* Hit or miss? */ + // if (doi_mask2) { + storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_tmp2, + &v_dy_tmp2, &v_dz_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2, + cell_cache, &int_cache, &icount, &rhoSum, &rho_dhSum, + &wcountSum, &wcount_dhSum, &div_vSum, &curlvxSum, + &curlvySum, &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz); + //} + // if (doi2_mask2) { + storeInteractions(doi2_mask2, pjd + VEC_SIZE, &v2_r2_2, &v_dx2_tmp2, + &v_dy2_tmp2, &v_dz2_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2, + cell_cache, &int_cache2, &icount2, &rhoSum2, + &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2, + &curlvxSum2, &curlvySum2, &curlvzSum2, v_hi_inv2, + v_vix2, v_viy2, v_viz2); + //} + } + + /* Perform padded vector remainder interactions if any are present. */ + calcRemInteractions(&int_cache, icount, &rhoSum, &rho_dhSum, &wcountSum, + &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, + &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz, + &icount_align); + + calcRemInteractions(&int_cache2, icount2, &rhoSum2, &rho_dhSum2, + &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2, + &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2, + v_viz2, &icount_align2); + + /* Initialise masks to true incase remainder interactions have been + * performed. */ + vector int_mask, int_mask2; + vector int2_mask, int2_mask2; +#ifdef HAVE_AVX512_F + KNL_MASK_16 knl_mask = 0xFFFF; + KNL_MASK_16 knl_mask2 = 0xFFFF; + int_mask.m = vec_setint1(0xFFFFFFFF); + int_mask2.m = vec_setint1(0xFFFFFFFF); + int2_mask.m = vec_setint1(0xFFFFFFFF); + int2_mask2.m = vec_setint1(0xFFFFFFFF); +#else + int_mask.m = vec_setint1(0xFFFFFFFF); + int_mask2.m = vec_setint1(0xFFFFFFFF); + + int2_mask.m = vec_setint1(0xFFFFFFFF); + int2_mask2.m = vec_setint1(0xFFFFFFFF); +#endif + + /* Perform interaction with 2 vectors. */ + for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) { + runner_iact_nonsym_2_vec_density( + &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd], + &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz, + &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd], + &int_cache.mq[pjd], &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, + &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2, +#ifdef HAVE_AVX512_F + knl_mask, knl_mask2); +#else + 0, 0); +#endif + } + + for (int pjd = 0; pjd < icount_align2; pjd += (NUM_VEC_PROC * VEC_SIZE)) { + runner_iact_nonsym_2_vec_density( + &int_cache2.r2q[pjd], &int_cache2.dxq[pjd], &int_cache2.dyq[pjd], + &int_cache2.dzq[pjd], v_hi_inv2, v_vix2, v_viy2, v_viz2, + &int_cache2.vxq[pjd], &int_cache2.vyq[pjd], &int_cache2.vzq[pjd], + &int_cache2.mq[pjd], &rhoSum2, &rho_dhSum2, &wcountSum2, + &wcount_dhSum2, &div_vSum2, &curlvxSum2, &curlvySum2, &curlvzSum2, + int2_mask, int2_mask2, +#ifdef HAVE_AVX512_F + knl_mask, knl_mask2); +#else + 0, 0); +#endif + } + /* Perform horizontal adds on vector sums and store result in particle pi. + */ + VEC_HADD(rhoSum, pi->rho); + VEC_HADD(rho_dhSum, pi->density.rho_dh); + VEC_HADD(wcountSum, pi->density.wcount); + VEC_HADD(wcount_dhSum, pi->density.wcount_dh); + VEC_HADD(div_vSum, pi->density.div_v); + VEC_HADD(curlvxSum, pi->density.rot_v[0]); + VEC_HADD(curlvySum, pi->density.rot_v[1]); + VEC_HADD(curlvzSum, pi->density.rot_v[2]); + + VEC_HADD(rhoSum2, pi2->rho); + VEC_HADD(rho_dhSum2, pi2->density.rho_dh); + VEC_HADD(wcountSum2, pi2->density.wcount); + VEC_HADD(wcount_dhSum2, pi2->density.wcount_dh); + VEC_HADD(div_vSum2, pi2->density.div_v); + VEC_HADD(curlvxSum2, pi2->density.rot_v[0]); + VEC_HADD(curlvySum2, pi2->density.rot_v[1]); + VEC_HADD(curlvzSum2, pi2->density.rot_v[2]); + + /* Reset interaction count. */ + icount = 0; + icount2 = 0; + } /* loop over all particles. */ + + TIMER_TOC(timer_doself_density); +#endif /* WITH_VECTORIZATION */ +} diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_vec.h new file mode 100644 index 0000000000000000000000000000000000000000..9bb24f12cedf03ec49a5a03f92d308f92d49aa54 --- /dev/null +++ b/src/runner_doiact_vec.h @@ -0,0 +1,39 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#ifndef SWIFT_RUNNER_VEC_H +#define SWIFT_RUNNER_VEC_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers */ +#include "cell.h" +#include "engine.h" +#include "hydro.h" +#include "part.h" +#include "runner.h" +#include "timers.h" +#include "vector.h" + +/* Function prototypes. */ +void runner_doself1_density_vec(struct runner *r, struct cell *restrict c); +void runner_doself1_density_vec_2(struct runner *r, struct cell *restrict c); + +#endif /* SWIFT_RUNNER_VEC_H */ diff --git a/src/scheduler.c b/src/scheduler.c index 0d7c8c4754bac931c7886200176e3e9441c63c53..f98c1082afbf7ec029a7556e36eb9d18ed37bd0a 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -132,9 +132,14 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Non-splittable task? */ if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) || - ((t->type == task_type_kick) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) { t->type = task_type_none; + t->subtype = task_subtype_none; + t->cj = NULL; t->skip = 1; break; } @@ -214,7 +219,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Get the sort ID, use space_getsid and not t->flags to make sure we get ci and cj swapped if needed. */ double shift[3]; - int sid = space_getsid(s->space, &ci, &cj, shift); + const int sid = space_getsid(s->space, &ci, &cj, shift); /* Should this task be split-up? */ if (ci->split && cj->split && @@ -690,6 +695,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, enum task_subtypes subtype, int flags, int wait, struct cell *ci, struct cell *cj, int tight) { +#ifdef SWIFT_DEBUG_CHECKS + if (ci == NULL && cj != NULL) + error("Added a task with ci==NULL and cj!=NULL type=%s/%s", + taskID_names[type], subtaskID_names[subtype]); +#endif + /* Get the next free task. */ const int ind = atomic_inc(&s->tasks_next); @@ -782,7 +793,10 @@ void scheduler_set_unlocks(struct scheduler *s) { for (int i = 0; i < t->nr_unlock_tasks; i++) { for (int j = i + 1; j < t->nr_unlock_tasks; j++) { if (t->unlock_tasks[i] == t->unlock_tasks[j]) - error("duplicate unlock!"); + error("duplicate unlock! t->type=%s/%s unlocking type=%s/%s", + taskID_names[t->type], subtaskID_names[t->subtype], + taskID_names[t->unlock_tasks[i]->type], + subtaskID_names[t->unlock_tasks[i]->subtype]); } } } @@ -959,7 +973,16 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_ghost: if (t->ci == t->ci->super) cost = wscale * t->ci->count; break; - case task_type_kick: + case task_type_drift: + cost = wscale * t->ci->count; + break; + case task_type_kick1: + cost = wscale * t->ci->count; + break; + case task_type_kick2: + cost = wscale * t->ci->count; + break; + case task_type_timestep: cost = wscale * t->ci->count; break; case task_type_init: @@ -1052,7 +1075,7 @@ void scheduler_start(struct scheduler *s) { /* Check we have not missed an active task */ #ifdef SWIFT_DEBUG_CHECKS - const int ti_current = s->space->e->ti_current; + const integertime_t ti_current = s->space->e->ti_current; if (ti_current > 0) { @@ -1062,13 +1085,24 @@ void scheduler_start(struct scheduler *s) { struct cell *ci = t->ci; struct cell *cj = t->cj; - if (cj == NULL) { /* self */ + if (t->type == task_type_none) continue; + + /* Don't check MPI stuff */ + if (t->type == task_type_send || t->type == task_type_recv) continue; + + if (ci == NULL && cj == NULL) { + + if (t->type != task_type_grav_gather_m && t->type != task_type_grav_fft) + error("Task not associated with cells!"); + + } else if (cj == NULL) { /* self */ if (ci->ti_end_min == ti_current && t->skip && - t->type != task_type_sort) + t->type != task_type_sort && t->type) error( - "Task (type='%s/%s') should not have been skipped ti_current=%d " - "c->ti_end_min=%d", + "Task (type='%s/%s') should not have been skipped " + "ti_current=%lld " + "c->ti_end_min=%lld", taskID_names[t->type], subtaskID_names[t->subtype], ti_current, ci->ti_end_min); @@ -1076,20 +1110,26 @@ void scheduler_start(struct scheduler *s) { if (ci->ti_end_min == ti_current && t->skip && t->type == task_type_sort && t->flags == 0) error( - "Task (type='%s/%s') should not have been skipped ti_current=%d " - "c->ti_end_min=%d t->flags=%d", + "Task (type='%s/%s') should not have been skipped " + "ti_current=%lld " + "c->ti_end_min=%lld t->flags=%d", taskID_names[t->type], subtaskID_names[t->subtype], ti_current, ci->ti_end_min, t->flags); } else { /* pair */ - if ((ci->ti_end_min == ti_current || cj->ti_end_min == ti_current) && - t->skip) - error( - "Task (type='%s/%s') should not have been skipped ti_current=%d " - "ci->ti_end_min=%d cj->ti_end_min=%d", - taskID_names[t->type], subtaskID_names[t->subtype], ti_current, - ci->ti_end_min, cj->ti_end_min); + if (t->skip) { + + /* Check that the pair is active if the local cell is active */ + if ((ci->ti_end_min == ti_current && ci->nodeID == engine_rank) || + (cj->ti_end_min == ti_current && cj->nodeID == engine_rank)) + error( + "Task (type='%s/%s') should not have been skipped " + "ti_current=%lld " + "ci->ti_end_min=%lld cj->ti_end_min=%lld", + taskID_names[t->type], subtaskID_names[t->subtype], ti_current, + ci->ti_end_min, cj->ti_end_min); + } } } } @@ -1137,7 +1177,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { /* Otherwise, look for a suitable queue. */ else { #ifdef WITH_MPI - int err; + int err = MPI_SUCCESS; #endif /* Find the previous owner for each task type, and do @@ -1147,7 +1187,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_sub_self: case task_type_sort: case task_type_ghost: - case task_type_kick: + case task_type_kick1: + case task_type_kick2: + case task_type_drift: + case task_type_timestep: case task_type_init: qid = t->ci->super->owner; break; @@ -1161,19 +1204,29 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_recv: #ifdef WITH_MPI if (t->subtype == task_subtype_tend) { - t->buff = malloc(sizeof(int) * t->ci->pcell_size); - err = MPI_Irecv(t->buff, t->ci->pcell_size, MPI_INT, t->ci->nodeID, - t->flags, MPI_COMM_WORLD, &t->req); - } else { + t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size); + err = MPI_Irecv(t->buff, t->ci->pcell_size * sizeof(integertime_t), + MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, + &t->req); + } else if (t->subtype == task_subtype_xv || + t->subtype == task_subtype_rho) { err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + // message( "receiving %i parts with tag=%i from %i to %i." , + // t->ci->count , t->flags , t->ci->nodeID , s->nodeID ); + // fflush(stdout); + } else if (t->subtype == task_subtype_gpart) { + err = MPI_Irecv(t->ci->gparts, t->ci->gcount, gpart_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } else if (t->subtype == task_subtype_spart) { + err = MPI_Irecv(t->ci->sparts, t->ci->scount, spart_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } else { + error("Unknown communication sub-type"); } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit irecv for particle data."); } - // message( "receiving %i parts with tag=%i from %i to %i." , - // t->ci->count , t->flags , t->ci->nodeID , s->nodeID ); - // fflush(stdout); qid = 1 % s->nr_queues; #else error("SWIFT was not compiled with MPI support."); @@ -1182,20 +1235,35 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_send: #ifdef WITH_MPI if (t->subtype == task_subtype_tend) { - t->buff = malloc(sizeof(int) * t->ci->pcell_size); + t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size); cell_pack_ti_ends(t->ci, t->buff); - err = MPI_Isend(t->buff, t->ci->pcell_size, MPI_INT, t->cj->nodeID, - t->flags, MPI_COMM_WORLD, &t->req); - } else { + err = MPI_Isend(t->buff, t->ci->pcell_size * sizeof(integertime_t), + MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, + &t->req); + } else if (t->subtype == task_subtype_xv || + t->subtype == task_subtype_rho) { +#ifdef SWIFT_DEBUG_CHECKS + for (int k = 0; k < t->ci->count; k++) + if (t->ci->parts[k].ti_drift != s->space->e->ti_current) + error("Sending un-drifted particle !"); +#endif err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + // message( "sending %i parts with tag=%i from %i to %i." , + // t->ci->count , t->flags , s->nodeID , t->cj->nodeID ); + // fflush(stdout); + } else if (t->subtype == task_subtype_gpart) { + err = MPI_Isend(t->ci->gparts, t->ci->gcount, gpart_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } else if (t->subtype == task_subtype_spart) { + err = MPI_Isend(t->ci->sparts, t->ci->scount, spart_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } else { + error("Unknown communication sub-type"); } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit isend for particle data."); } - // message( "sending %i parts with tag=%i from %i to %i." , - // t->ci->count , t->flags , s->nodeID , t->cj->nodeID ); - // fflush(stdout); qid = 0; #else error("SWIFT was not compiled with MPI support."); @@ -1408,8 +1476,8 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, lock_init(&s->lock); /* Allocate the queues. */ - if ((s->queues = (struct queue *)malloc(sizeof(struct queue) * nr_queues)) == - NULL) + if (posix_memalign((void **)&s->queues, queue_struct_align, + sizeof(struct queue) * nr_queues) != 0) error("Failed to allocate queues."); /* Initialize each queue. */ diff --git a/src/serial_io.c b/src/serial_io.c index b9ad0fbaa856a889d3f84bb42013282f3640fd5e..eaf5541992981463213db9685290fa9f624a4130 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -46,6 +46,7 @@ #include "io_properties.h" #include "kernel_hydro.h" #include "part.h" +#include "stars_io.h" #include "units.h" /*----------------------------------------------------------------------------- @@ -397,11 +398,16 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, * @param dim (output) The dimension of the volume read from the file. * @param parts (output) The array of #part (gas particles) read from the file. * @param gparts (output) The array of #gpart read from the file. + * @param sparts (output) Array of #spart particles. * @param Ngas (output) The number of #part read from the file on that node. * @param Ngparts (output) The number of #gpart read from the file on that node. + * @param Nstars (output) The number of #spart read from the file on that node. * @param periodic (output) 1 if the volume is periodic, 0 if not. * @param flag_entropy (output) 1 if the ICs contained Entropy in the * InternalEnergy field + * @param with_hydro Are we reading gas particles ? + * @param with_gravity Are we reading/creating #gpart arrays ? + * @param with_stars Are we reading star particles ? * @param mpi_rank The MPI rank of this node * @param mpi_size The number of MPI ranks * @param comm The MPI communicator @@ -418,19 +424,23 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - size_t* Ngas, size_t* Ngparts, int* periodic, - int* flag_entropy, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info, int dry_run) { + struct spart** sparts, size_t* Ngas, size_t* Ngparts, + size_t* Nstars, int* periodic, int* flag_entropy, + int with_hydro, int with_gravity, int with_stars, + int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, + int dry_run) { + hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ double boxSize[3] = {0.0, -1.0, -1.0}; /* GADGET has 6 particle types. We only keep the type 0 & 1 for now*/ - int numParticles[NUM_PARTICLE_TYPES] = {0}; - int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + long long numParticles[NUM_PARTICLE_TYPES] = {0}; + long long numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; size_t N[NUM_PARTICLE_TYPES] = {0}; long long N_total[NUM_PARTICLE_TYPES] = {0}; long long offset[NUM_PARTICLE_TYPES] = {0}; int dimension = 3; /* Assume 3D if nothing is specified */ + size_t Ndm = 0; struct UnitSystem* ic_units = malloc(sizeof(struct UnitSystem)); /* First read some information about the content */ @@ -472,12 +482,13 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units, readAttribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp); *flag_entropy = flag_entropy_temp[0]; readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); - readAttribute(h_grp, "NumPart_Total", UINT, numParticles); - readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); + readAttribute(h_grp, "NumPart_Total", LONGLONG, numParticles); + readAttribute(h_grp, "NumPart_Total_HighWord", LONGLONG, + numParticles_highWord); for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) - N_total[ptype] = ((long long)numParticles[ptype]) + - ((long long)numParticles_highWord[ptype] << 32); + N_total[ptype] = + (numParticles[ptype]) + (numParticles_highWord[ptype] << 32); dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; @@ -536,7 +547,7 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units, /* Now need to broadcast that information to all ranks. */ MPI_Bcast(flag_entropy, 1, MPI_INT, 0, comm); MPI_Bcast(periodic, 1, MPI_INT, 0, comm); - MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG, 0, comm); + MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG_INT, 0, comm); MPI_Bcast(dim, 3, MPI_DOUBLE, 0, comm); MPI_Bcast(ic_units, sizeof(struct UnitSystem), MPI_BYTE, 0, comm); @@ -547,19 +558,32 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units, } /* Allocate memory to store SPH particles */ - *Ngas = N[0]; - if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) != - 0) - error("Error while allocating memory for particles"); - bzero(*parts, *Ngas * sizeof(struct part)); - - /* Allocate memory to store all particles */ - const size_t Ndm = N[1]; - *Ngparts = N[1] + N[0]; - if (posix_memalign((void*)gparts, gpart_align, - *Ngparts * sizeof(struct gpart)) != 0) - error("Error while allocating memory for gravity particles"); - bzero(*gparts, *Ngparts * sizeof(struct gpart)); + if (with_hydro) { + *Ngas = N[0]; + if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) != + 0) + error("Error while allocating memory for SPH particles"); + bzero(*parts, *Ngas * sizeof(struct part)); + } + + /* Allocate memory to store star particles */ + if (with_stars) { + *Nstars = N[STAR]; + if (posix_memalign((void*)sparts, spart_align, + *Nstars * sizeof(struct spart)) != 0) + error("Error while allocating memory for star particles"); + bzero(*sparts, *Nstars * sizeof(struct spart)); + } + + /* Allocate memory to store all gravity particles */ + if (with_gravity) { + Ndm = N[1]; + *Ngparts = (with_hydro ? N[GAS] : 0) + N[DM] + (with_stars ? N[STAR] : 0); + if (posix_memalign((void*)gparts, gpart_align, + *Ngparts * sizeof(struct gpart)) != 0) + error("Error while allocating memory for gravity particles"); + bzero(*gparts, *Ngparts * sizeof(struct gpart)); + } /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / */ /* (1024.*1024.)); */ @@ -602,13 +626,24 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units, switch (ptype) { case GAS: - Nparticles = *Ngas; - hydro_read_particles(*parts, list, &num_fields); + if (with_hydro) { + Nparticles = *Ngas; + hydro_read_particles(*parts, list, &num_fields); + } break; case DM: - Nparticles = Ndm; - darkmatter_read_particles(*gparts, list, &num_fields); + if (with_gravity) { + Nparticles = Ndm; + darkmatter_read_particles(*gparts, list, &num_fields); + } + break; + + case STAR: + if (with_stars) { + Nparticles = *Nstars; + star_read_particles(*sparts, list, &num_fields); + } break; default: @@ -634,16 +669,21 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units, MPI_Barrier(comm); } - /* Clean up */ - free(ic_units); - /* Prepare the DM particles */ - if (!dry_run) prepare_dm_gparts(*gparts, Ndm); + if (!dry_run && with_gravity) prepare_dm_gparts(*gparts, Ndm); - /* Now duplicate the hydro particle into gparts */ - if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + /* Duplicate the hydro particles into gparts */ + if (!dry_run && with_gravity && with_hydro) + duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + + /* Duplicate the star particles into gparts */ + if (!dry_run && with_gravity && with_stars) + duplicate_star_gparts(*sparts, *gparts, *Nstars, Ndm + *Ngas); /* message("Done Reading particles..."); */ + + /* Clean up */ + free(ic_units); } /** @@ -673,17 +713,19 @@ void write_output_serial(struct engine* e, const char* baseName, hid_t h_file = 0, h_grp = 0; const size_t Ngas = e->s->nr_parts; + const size_t Nstars = e->s->nr_sparts; const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; int numFiles = 1; struct part* parts = e->s->parts; struct gpart* gparts = e->s->gparts; struct gpart* dmparts = NULL; + struct spart* sparts = e->s->sparts; static int outputCount = 0; FILE* xmfFile = 0; /* Number of unassociated gparts */ - const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; /* File name */ char fileName[FILENAME_BUFFER_SIZE]; @@ -691,15 +733,15 @@ void write_output_serial(struct engine* e, const char* baseName, outputCount); /* Compute offset in the file and total number of particles */ - size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; + size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0, 0, Nstars, 0}; long long N_total[NUM_PARTICLE_TYPES] = {0}; long long offset[NUM_PARTICLE_TYPES] = {0}; - MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm); + MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG_INT, MPI_SUM, comm); for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) N_total[ptype] = offset[ptype] + N[ptype]; /* The last rank now has the correct N_total. Let's broadcast from there */ - MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm); + MPI_Bcast(&N_total, 6, MPI_LONG_LONG_INT, mpi_size - 1, comm); /* Now everybody konws its offset and the total number of particles of each * type */ @@ -909,7 +951,11 @@ void write_output_serial(struct engine* e, const char* baseName, /* Write DM particles */ Nparticles = Ndm; darkmatter_write_particles(dmparts, list, &num_fields); + break; + case STAR: + Nparticles = Nstars; + star_write_particles(sparts, list, &num_fields); break; default: @@ -923,7 +969,10 @@ void write_output_serial(struct engine* e, const char* baseName, internal_units, snapshot_units); /* Free temporary array */ - free(dmparts); + if (dmparts) { + free(dmparts); + dmparts = 0; + } /* Close particle group */ H5Gclose(h_grp); diff --git a/src/serial_io.h b/src/serial_io.h index a2226e5cd9848ff2515b15111af43ccc67275a28..94dd68b93626411ec7dc314d783d80c9e0e967b6 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -36,9 +36,11 @@ void read_ic_serial(char* fileName, const struct UnitSystem* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - size_t* Ngas, size_t* Ngparts, int* periodic, - int* flag_entropy, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info, int dry_run); + struct spart** sparts, size_t* Ngas, size_t* Ngparts, + size_t* Nstars, int* periodic, int* flag_entropy, + int with_hydro, int with_gravity, int with_stars, + int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info, + int dry_run); void write_output_serial(struct engine* e, const char* baseName, const struct UnitSystem* internal_units, diff --git a/src/single_io.c b/src/single_io.c index ceeba4eb80a47c3feed7e898deb5e1fe7e427c0c..b279f22086833bc689919f41a8904232e234a394 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -45,6 +45,7 @@ #include "io_properties.h" #include "kernel_hydro.h" #include "part.h" +#include "stars_io.h" #include "units.h" /*----------------------------------------------------------------------------- @@ -312,14 +313,18 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, * @param fileName The file to read. * @param internal_units The system units used internally * @param dim (output) The dimension of the volume. - * @param parts (output) Array of Gas particles. + * @param parts (output) Array of #part particles. * @param gparts (output) Array of #gpart particles. + * @param sparts (output) Array of #spart particles. * @param Ngas (output) number of Gas particles read. * @param Ngparts (output) The number of #gpart read. + * @param Nstars (output) The number of #spart read. * @param periodic (output) 1 if the volume is periodic, 0 if not. * @param flag_entropy (output) 1 if the ICs contained Entropy in the - * InternalEnergy - * field + * InternalEnergy field + * @param with_hydro Are we reading gas particles ? + * @param with_gravity Are we reading/creating #gpart arrays ? + * @param with_stars Are we reading star particles ? * @param dry_run If 1, don't read the particle. Only allocates the arrays. * * Opens the HDF5 file fileName and reads the particles contained @@ -332,8 +337,10 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, */ void read_ic_single(char* fileName, const struct UnitSystem* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - size_t* Ngas, size_t* Ngparts, int* periodic, - int* flag_entropy, int dry_run) { + struct spart** sparts, size_t* Ngas, size_t* Ngparts, + size_t* Nstars, int* periodic, int* flag_entropy, + int with_hydro, int with_gravity, int with_stars, + int dry_run) { hid_t h_file = 0, h_grp = 0; /* GADGET has only cubic boxes (in cosmological mode) */ @@ -343,7 +350,7 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units, int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; size_t N[NUM_PARTICLE_TYPES] = {0}; int dimension = 3; /* Assume 3D if nothing is specified */ - size_t Ndm; + size_t Ndm = 0; /* Open file */ /* message("Opening file '%s' as IC.", fileName); */ @@ -439,19 +446,32 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units, units_conversion_factor(ic_units, internal_units, UNIT_CONV_LENGTH); /* Allocate memory to store SPH particles */ - *Ngas = N[0]; - if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) != - 0) - error("Error while allocating memory for SPH particles"); - bzero(*parts, *Ngas * sizeof(struct part)); - - /* Allocate memory to store all particles */ - Ndm = N[1]; - *Ngparts = N[1] + N[0]; - if (posix_memalign((void*)gparts, gpart_align, - *Ngparts * sizeof(struct gpart)) != 0) - error("Error while allocating memory for gravity particles"); - bzero(*gparts, *Ngparts * sizeof(struct gpart)); + if (with_hydro) { + *Ngas = N[GAS]; + if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) != + 0) + error("Error while allocating memory for SPH particles"); + bzero(*parts, *Ngas * sizeof(struct part)); + } + + /* Allocate memory to store star particles */ + if (with_stars) { + *Nstars = N[STAR]; + if (posix_memalign((void*)sparts, spart_align, + *Nstars * sizeof(struct spart)) != 0) + error("Error while allocating memory for star particles"); + bzero(*sparts, *Nstars * sizeof(struct spart)); + } + + /* Allocate memory to store all gravity particles */ + if (with_gravity) { + Ndm = N[DM]; + *Ngparts = (with_hydro ? N[GAS] : 0) + N[DM] + (with_stars ? N[STAR] : 0); + if (posix_memalign((void*)gparts, gpart_align, + *Ngparts * sizeof(struct gpart)) != 0) + error("Error while allocating memory for gravity particles"); + bzero(*gparts, *Ngparts * sizeof(struct gpart)); + } /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / * (1024.*1024.)); */ @@ -482,13 +502,24 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units, switch (ptype) { case GAS: - Nparticles = *Ngas; - hydro_read_particles(*parts, list, &num_fields); + if (with_hydro) { + Nparticles = *Ngas; + hydro_read_particles(*parts, list, &num_fields); + } break; case DM: - Nparticles = Ndm; - darkmatter_read_particles(*gparts, list, &num_fields); + if (with_gravity) { + Nparticles = Ndm; + darkmatter_read_particles(*gparts, list, &num_fields); + } + break; + + case STAR: + if (with_stars) { + Nparticles = *Nstars; + star_read_particles(*sparts, list, &num_fields); + } break; default: @@ -505,10 +536,15 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units, } /* Prepare the DM particles */ - if (!dry_run) prepare_dm_gparts(*gparts, Ndm); + if (!dry_run && with_gravity) prepare_dm_gparts(*gparts, Ndm); + + /* Duplicate the hydro particles into gparts */ + if (!dry_run && with_gravity && with_hydro) + duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); - /* Now duplicate the hydro particle into gparts */ - if (!dry_run) duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + /* Duplicate the star particles into gparts */ + if (!dry_run && with_gravity && with_stars) + duplicate_star_gparts(*sparts, *gparts, *Nstars, Ndm + *Ngas); /* message("Done Reading particles..."); */ @@ -541,18 +577,20 @@ void write_output_single(struct engine* e, const char* baseName, hid_t h_file = 0, h_grp = 0; const size_t Ngas = e->s->nr_parts; + const size_t Nstars = e->s->nr_sparts; const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; int numFiles = 1; struct part* parts = e->s->parts; struct gpart* gparts = e->s->gparts; struct gpart* dmparts = NULL; + struct spart* sparts = e->s->sparts; static int outputCount = 0; /* Number of unassociated gparts */ - const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0; - long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; + long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0, 0, Nstars, 0}; /* File name */ char fileName[FILENAME_BUFFER_SIZE]; @@ -729,6 +767,11 @@ void write_output_single(struct engine* e, const char* baseName, darkmatter_write_particles(dmparts, list, &num_fields); break; + case STAR: + N = Nstars; + star_write_particles(sparts, list, &num_fields); + break; + default: error("Particle Type %d not yet supported. Aborting", ptype); } @@ -739,7 +782,10 @@ void write_output_single(struct engine* e, const char* baseName, internal_units, snapshot_units); /* Free temporary array */ - free(dmparts); + if (dmparts) { + free(dmparts); + dmparts = NULL; + } /* Close particle group */ H5Gclose(h_grp); diff --git a/src/single_io.h b/src/single_io.h index 51a30a7bc6af7f3aaf5708a3d2df14982e026e3e..bc803b262f70f72ea93090d56112f5a70737c840 100644 --- a/src/single_io.h +++ b/src/single_io.h @@ -31,7 +31,9 @@ void read_ic_single(char* fileName, const struct UnitSystem* internal_units, double dim[3], struct part** parts, struct gpart** gparts, - size_t* Ngas, size_t* Ndm, int* periodic, int* flag_entropy, + struct spart** sparts, size_t* Ngas, size_t* Ndm, + size_t* Nstars, int* periodic, int* flag_entropy, + int with_hydro, int with_gravity, int with_stars, int dry_run); void write_output_single(struct engine* e, const char* baseName, diff --git a/src/space.c b/src/space.c index 6e6a0768ff6a3a2982fd23edd84d61ac9afd5515..802dc30d1bcd44d4cce46b2a803afade07f5d685 100644 --- a/src/space.c +++ b/src/space.c @@ -52,6 +52,7 @@ #include "memswap.h" #include "minmax.h" #include "runner.h" +#include "stars.h" #include "threadpool.h" #include "tools.h" @@ -107,6 +108,7 @@ struct parallel_sort { struct part *parts; struct gpart *gparts; struct xpart *xparts; + struct spart *sparts; int *ind; struct qstack *stack; unsigned int stack_size; @@ -173,6 +175,8 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, * * @param s The #space. * @param c The #cell to recycle. + * @param rec_begin Pointer to the start of the list of cells to recycle. + * @param rec_end Pointer to the end of the list of cells to recycle. */ void space_rebuild_recycle_rec(struct space *s, struct cell *c, struct cell **rec_begin, struct cell **rec_end) { @@ -208,10 +212,14 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->sorted = 0; c->count = 0; c->gcount = 0; + c->scount = 0; c->init = NULL; c->extra_ghost = NULL; c->ghost = NULL; - c->kick = NULL; + c->kick1 = NULL; + c->kick2 = NULL; + c->timestep = NULL; + c->drift = NULL; c->cooling = NULL; c->sourceterms = NULL; c->super = c; @@ -243,7 +251,7 @@ void space_regrid(struct space *s, int verbose) { const size_t nr_parts = s->nr_parts; const ticks tic = getticks(); - const int ti_current = (s->e != NULL) ? s->e->ti_current : 0; + const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; /* Run through the cells and get the current h_max. */ // tic = getticks(); @@ -343,6 +351,12 @@ void space_regrid(struct space *s, int verbose) { if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) { +/* Be verbose about this. */ +#ifdef SWIFT_DEBUG_CHECKS + message("re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]); + fflush(stdout); +#endif + /* Free the old cells, if they were allocated. */ if (s->cells_top != NULL) { threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper, @@ -386,6 +400,7 @@ void space_regrid(struct space *s, int verbose) { c->depth = 0; c->count = 0; c->gcount = 0; + c->scount = 0; c->super = c; c->ti_old = ti_current; lock_init(&c->lock); @@ -459,16 +474,20 @@ void space_rebuild(struct space *s, int verbose) { const ticks tic = getticks(); - /* Be verbose about this. */ - // message("re)building space..."); fflush(stdout); +/* Be verbose about this. */ +#ifdef SWIFT_DEBUG_CHECKS + if (s->e->nodeID == 0 || verbose) message("re)building space"); + fflush(stdout); +#endif /* Re-grid if necessary, or just re-set the cell data. */ space_regrid(s, verbose); size_t nr_parts = s->nr_parts; size_t nr_gparts = s->nr_gparts; + size_t nr_sparts = s->nr_sparts; struct cell *restrict cells_top = s->cells_top; - const int ti_current = (s->e != NULL) ? s->e->ti_current : 0; + const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; /* Run through the particles and get their cell index. Allocates an index that is larger than the number of particles to avoid @@ -487,25 +506,37 @@ void space_rebuild(struct space *s, int verbose) { if (s->size_gparts > 0) space_gparts_get_cell_index(s, gind, cells_top, verbose); + /* Run through the star particles and get their cell index. */ + const size_t sind_size = s->size_sparts + 100; + int *sind; + if ((sind = (int *)malloc(sizeof(int) * sind_size)) == NULL) + error("Failed to allocate temporary s-particle indices."); + if (s->size_sparts > 0) + space_sparts_get_cell_index(s, sind, cells_top, verbose); + #ifdef WITH_MPI + const int local_nodeID = s->e->nodeID; /* Move non-local parts to the end of the list. */ - const int local_nodeID = s->e->nodeID; for (size_t k = 0; k < nr_parts;) { if (cells_top[ind[k]].nodeID != local_nodeID) { nr_parts -= 1; + /* Swap the particle */ const struct part tp = s->parts[k]; s->parts[k] = s->parts[nr_parts]; s->parts[nr_parts] = tp; + /* Swap the link with the gpart */ if (s->parts[k].gpart != NULL) { s->parts[k].gpart->id_or_neg_offset = -k; } if (s->parts[nr_parts].gpart != NULL) { s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; } + /* Swap the xpart */ const struct xpart txp = s->xparts[k]; s->xparts[k] = s->xparts[nr_parts]; s->xparts[nr_parts] = txp; + /* Swap the index */ const int t = ind[k]; ind[k] = ind[nr_parts]; ind[nr_parts] = t; @@ -529,20 +560,67 @@ void space_rebuild(struct space *s, int verbose) { } #endif + /* Move non-local sparts to the end of the list. */ + for (size_t k = 0; k < nr_sparts;) { + if (cells_top[sind[k]].nodeID != local_nodeID) { + nr_sparts -= 1; + /* Swap the particle */ + const struct spart tp = s->sparts[k]; + s->sparts[k] = s->sparts[nr_sparts]; + s->sparts[nr_sparts] = tp; + /* Swap the link with the gpart */ + if (s->sparts[k].gpart != NULL) { + s->sparts[k].gpart->id_or_neg_offset = -k; + } + if (s->sparts[nr_sparts].gpart != NULL) { + s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts; + } + /* Swap the index */ + const int t = sind[k]; + sind[k] = sind[nr_sparts]; + sind[nr_sparts] = t; + } else { + /* Increment when not exchanging otherwise we need to retest "k".*/ + k++; + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all sparts are in the correct place (untested). */ + for (size_t k = 0; k < nr_sparts; k++) { + if (cells_top[sind[k]].nodeID != local_nodeID) { + error("Failed to move all non-local sparts to send list"); + } + } + for (size_t k = nr_sparts; k < s->nr_sparts; k++) { + if (cells_top[sind[k]].nodeID == local_nodeID) { + error("Failed to remove local sparts from send list"); + } + } +#endif + /* Move non-local gparts to the end of the list. */ for (size_t k = 0; k < nr_gparts;) { if (cells_top[gind[k]].nodeID != local_nodeID) { nr_gparts -= 1; + /* Swap the particle */ const struct gpart tp = s->gparts[k]; s->gparts[k] = s->gparts[nr_gparts]; s->gparts[nr_gparts] = tp; - if (s->gparts[k].id_or_neg_offset <= 0) { + /* Swap the link with part/spart */ + if (s->gparts[k].type == swift_type_gas) { s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } else if (s->gparts[k].type == swift_type_star) { + s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; } - if (s->gparts[nr_gparts].id_or_neg_offset <= 0) { + if (s->gparts[nr_gparts].type == swift_type_gas) { s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = &s->gparts[nr_gparts]; + } else if (s->gparts[nr_gparts].type == swift_type_star) { + s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; } + /* Swap the index */ const int t = gind[k]; gind[k] = gind[nr_gparts]; gind[nr_gparts] = t; @@ -570,14 +648,17 @@ void space_rebuild(struct space *s, int verbose) { the parts arrays. */ size_t nr_parts_exchanged = s->nr_parts - nr_parts; size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts; + size_t nr_sparts_exchanged = s->nr_sparts - nr_sparts; engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged, - nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged); + nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged, + nr_sparts, &sind[nr_sparts], &nr_sparts_exchanged); /* Set the new particle counts. */ s->nr_parts = nr_parts + nr_parts_exchanged; s->nr_gparts = nr_gparts + nr_gparts_exchanged; + s->nr_sparts = nr_sparts + nr_sparts_exchanged; - /* Re-allocate the index array if needed.. */ + /* Re-allocate the index array for the parts if needed.. */ if (s->nr_parts + 1 > ind_size) { int *ind_new; if ((ind_new = (int *)malloc(sizeof(int) * (s->nr_parts + 1))) == NULL) @@ -587,10 +668,20 @@ void space_rebuild(struct space *s, int verbose) { ind = ind_new; } + /* Re-allocate the index array for the sparts if needed.. */ + if (s->nr_sparts + 1 > sind_size) { + int *sind_new; + if ((sind_new = (int *)malloc(sizeof(int) * (s->nr_sparts + 1))) == NULL) + error("Failed to allocate temporary s-particle indices."); + memcpy(sind_new, sind, sizeof(int) * nr_sparts); + free(sind); + sind = sind_new; + } + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]}; - /* Assign each particle to its cell. */ + /* Assign each received part to its cell. */ for (size_t k = nr_parts; k < s->nr_parts; k++) { const struct part *const p = &s->parts[k]; ind[k] = @@ -603,28 +694,81 @@ void space_rebuild(struct space *s, int verbose) { } nr_parts = s->nr_parts; + /* Assign each received spart to its cell. */ + for (size_t k = nr_sparts; k < s->nr_sparts; k++) { + const struct spart *const sp = &s->sparts[k]; + sind[k] = + cell_getid(cdim, sp->x[0] * ih[0], sp->x[1] * ih[1], sp->x[2] * ih[2]); +#ifdef SWIFT_DEBUG_CHECKS + if (cells_top[sind[k]].nodeID != local_nodeID) + error("Received s-part that does not belong to me (nodeID=%i).", + cells_top[sind[k]].nodeID); +#endif + } + nr_sparts = s->nr_sparts; + #endif /* WITH_MPI */ /* Sort the parts according to their cells. */ if (nr_parts > 0) space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose); - /* Re-link the gparts. */ - if (nr_parts > 0 && nr_gparts > 0) part_relink_gparts(s->parts, nr_parts, 0); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the part have been sorted correctly. */ + for (size_t k = 0; k < nr_parts; k++) { + const struct part *p = &s->parts[k]; + + /* New cell index */ + const int new_ind = + cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1], + p->x[2] * s->iwidth[2]); + + /* New cell of this part */ + const struct cell *c = &s->cells_top[new_ind]; + + if (ind[k] != new_ind) + error("part's new cell index not matching sorted index."); + + if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] || + p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] || + p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2]) + error("part not sorted into the right top-level cell!"); + } +#endif + + /* Sort the sparts according to their cells. */ + if (nr_sparts > 0) + space_sparts_sort(s, sind, nr_sparts, 0, s->nr_cells - 1, verbose); #ifdef SWIFT_DEBUG_CHECKS - /* Verify space_sort_struct. */ - for (size_t k = 1; k < nr_parts; k++) { - if (ind[k - 1] > ind[k]) { - error("Sort failed!"); - } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0], - s->parts[k].x[1] * s->iwidth[1], - s->parts[k].x[2] * s->iwidth[2])) { - error("Incorrect indices!"); - } + /* Verify that the spart have been sorted correctly. */ + for (size_t k = 0; k < nr_sparts; k++) { + const struct spart *sp = &s->sparts[k]; + + /* New cell index */ + const int new_sind = + cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1], + sp->x[2] * s->iwidth[2]); + + /* New cell of this spart */ + const struct cell *c = &s->cells_top[new_sind]; + + if (sind[k] != new_sind) + error("spart's new cell index not matching sorted index."); + + if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] || + sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] || + sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2]) + error("spart not sorted into the right top-level cell!"); } #endif + /* Re-link the gparts to their (s-)particles. */ + if (nr_parts > 0 && nr_gparts > 0) + part_relink_gparts_to_parts(s->parts, nr_parts, 0); + if (nr_sparts > 0 && nr_gparts > 0) + part_relink_gparts_to_sparts(s->sparts, nr_sparts, 0); + /* Extract the cell counts from the sorted indices. */ size_t last_index = 0; ind[nr_parts] = s->nr_cells; // sentinel. @@ -635,12 +779,23 @@ void space_rebuild(struct space *s, int verbose) { } } + /* Extract the cell counts from the sorted indices. */ + size_t last_sindex = 0; + sind[nr_sparts] = s->nr_cells; // sentinel. + for (size_t k = 0; k < nr_sparts; k++) { + if (sind[k] < sind[k + 1]) { + cells_top[sind[k]].scount = k - last_sindex + 1; + last_sindex = k + 1; + } + } + /* We no longer need the indices as of here. */ free(ind); + free(sind); #ifdef WITH_MPI - /* Re-allocate the index array if needed.. */ + /* Re-allocate the index array for the gparts if needed.. */ if (s->nr_gparts + 1 > gind_size) { int *gind_new; if ((gind_new = (int *)malloc(sizeof(int) * (s->nr_gparts + 1))) == NULL) @@ -650,7 +805,7 @@ void space_rebuild(struct space *s, int verbose) { gind = gind_new; } - /* Assign each particle to its cell. */ + /* Assign each received gpart to its cell. */ for (size_t k = nr_gparts; k < s->nr_gparts; k++) { const struct gpart *const p = &s->gparts[k]; gind[k] = @@ -658,21 +813,48 @@ void space_rebuild(struct space *s, int verbose) { #ifdef SWIFT_DEBUG_CHECKS if (cells_top[gind[k]].nodeID != s->e->nodeID) - error("Received part that does not belong to me (nodeID=%i).", + error("Received g-part that does not belong to me (nodeID=%i).", cells_top[gind[k]].nodeID); #endif } nr_gparts = s->nr_gparts; -#endif +#endif /* WITH_MPI */ /* Sort the gparts according to their cells. */ if (nr_gparts > 0) space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose); +#ifdef SWIFT_DEBUG_CHECKS + /* Verify that the gpart have been sorted correctly. */ + for (size_t k = 0; k < nr_gparts; k++) { + const struct gpart *gp = &s->gparts[k]; + + /* New cell index */ + const int new_gind = + cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1], + gp->x[2] * s->iwidth[2]); + + /* New cell of this gpart */ + const struct cell *c = &s->cells_top[new_gind]; + + if (gind[k] != new_gind) + error("gpart's new cell index not matching sorted index."); + + if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] || + gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] || + gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2]) + error("gpart not sorted into the right top-level cell!"); + } +#endif + /* Re-link the parts. */ if (nr_parts > 0 && nr_gparts > 0) - part_relink_parts(s->gparts, nr_gparts, s->parts); + part_relink_parts_to_gparts(s->gparts, nr_gparts, s->parts); + + /* Re-link the sparts. */ + if (nr_sparts > 0 && nr_gparts > 0) + part_relink_sparts_to_gparts(s->gparts, nr_gparts, s->sparts); /* Extract the cell counts from the sorted indices. */ size_t last_gindex = 0; @@ -689,26 +871,8 @@ void space_rebuild(struct space *s, int verbose) { #ifdef SWIFT_DEBUG_CHECKS /* Verify that the links are correct */ - for (size_t k = 0; k < nr_gparts; ++k) { - - if (s->gparts[k].id_or_neg_offset < 0) { - - const struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset]; - - if (part->gpart != &s->gparts[k]) error("Linking problem !"); - - if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] || - s->gparts[k].x[2] != part->x[2]) - error("Linked particles are not at the same position !"); - } - } - for (size_t k = 0; k < nr_parts; ++k) { - - if (s->parts[k].gpart != NULL && - s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) { - error("Linking problem !"); - } - } + part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts, + nr_sparts, verbose); #endif /* Hook the cells up to the parts. */ @@ -716,15 +880,18 @@ void space_rebuild(struct space *s, int verbose) { struct part *finger = s->parts; struct xpart *xfinger = s->xparts; struct gpart *gfinger = s->gparts; + struct spart *sfinger = s->sparts; for (int k = 0; k < s->nr_cells; k++) { struct cell *restrict c = &cells_top[k]; c->ti_old = ti_current; c->parts = finger; c->xparts = xfinger; c->gparts = gfinger; + c->sparts = sfinger; finger = &finger[c->count]; xfinger = &xfinger[c->count]; gfinger = &gfinger[c->gcount]; + sfinger = &sfinger[c->scount]; } // message( "hooking up cells took %.3f %s." , // clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -830,6 +997,13 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); ind[k] = index; +#ifdef SWIFT_DEBUG_CHECKS + if (pos_x > dim_x || pos_y > dim_y || pos_z > pos_z || pos_x < 0. || + pos_y < 0. || pos_z < 0.) + error("Particle outside of simulation box. p->x=[%e %e %e]", pos_x, pos_y, + pos_z); +#endif + /* Update the position */ p->x[0] = pos_x; p->x[1] = pos_y; @@ -889,8 +1063,58 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, } /** - * @brief Computes the cell index of all the particles and update the cell - * count. + * @brief #threadpool mapper function to compute the s-particle cell indices. + * + * @param map_data Pointer towards the s-particles. + * @param nr_sparts The number of s-particles to treat. + * @param extra_data Pointers to the space and index list + */ +void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, + void *extra_data) { + + /* Unpack the data */ + struct spart *restrict sparts = (struct spart *)map_data; + struct index_data *data = (struct index_data *)extra_data; + struct space *s = data->s; + int *const ind = data->ind + (ptrdiff_t)(sparts - s->sparts); + + /* Get some constants */ + const double dim_x = s->dim[0]; + const double dim_y = s->dim[1]; + const double dim_z = s->dim[2]; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + const double ih_x = s->iwidth[0]; + const double ih_y = s->iwidth[1]; + const double ih_z = s->iwidth[2]; + + for (int k = 0; k < nr_sparts; k++) { + + /* Get the particle */ + struct spart *restrict sp = &sparts[k]; + + const double old_pos_x = sp->x[0]; + const double old_pos_y = sp->x[1]; + const double old_pos_z = sp->x[2]; + + /* Put it back into the simulation volume */ + const double pos_x = box_wrap(old_pos_x, 0.0, dim_x); + const double pos_y = box_wrap(old_pos_y, 0.0, dim_y); + const double pos_z = box_wrap(old_pos_z, 0.0, dim_z); + + /* Get its cell index */ + const int index = + cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); + ind[k] = index; + + /* Update the position */ + sp->x[0] = pos_x; + sp->x[1] = pos_y; + sp->x[2] = pos_z; + } +} + +/** + * @brief Computes the cell index of all the particles. * * @param s The #space. * @param ind The array of indices to fill. @@ -917,8 +1141,7 @@ void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells, } /** - * @brief Computes the cell index of all the g-particles and update the cell - * gcount. + * @brief Computes the cell index of all the g-particles. * * @param s The #space. * @param gind The array of indices to fill. @@ -944,6 +1167,33 @@ void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, clocks_getunit()); } +/** + * @brief Computes the cell index of all the s-particles. + * + * @param s The #space. + * @param sind The array of indices to fill. + * @param cells The array of #cell to update. + * @param verbose Are we talkative ? + */ +void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells, + int verbose) { + + const ticks tic = getticks(); + + /* Pack the extra information */ + struct index_data data; + data.s = s; + data.cells = cells; + data.ind = sind; + + threadpool_map(&s->e->threadpool, space_sparts_get_cell_index_mapper, + s->sparts, s->nr_sparts, sizeof(struct spart), 1000, &data); + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + /** * @brief Sort the particles and condensed particles according to the given * indices. @@ -993,7 +1243,7 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, if (ind[i - 1] > ind[i]) error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); - message("Sorting succeeded."); + if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); #endif /* Clean up. */ @@ -1055,18 +1305,207 @@ void space_parts_sort_mapper(void *map_data, int num_elements, #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ - for (int k = i; k <= jj; k++) - if (ind[k] > pivot) { - message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", - k, ind[k], pivot, i, j); - error("Partition failed (<=pivot)."); + if (i != j) { + for (int k = i; k <= jj; k++) { + if (ind[k] > pivot) { + message( + "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, + ind[k], pivot, i, j); + error("Partition failed (<=pivot)."); + } } - for (int k = jj + 1; k <= j; k++) - if (ind[k] <= pivot) { - message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", - k, ind[k], pivot, i, j); - error("Partition failed (>pivot)."); + for (int k = jj + 1; k <= j; k++) { + if (ind[k] <= pivot) { + message( + "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, + ind[k], pivot, i, j); + error("Partition failed (>pivot)."); + } } + } +#endif + + /* Split-off largest interval. */ + if (jj - i > j - jj + 1) { + + /* Recurse on the left? */ + if (jj > i && pivot > min) { + qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; + while (sort_struct->stack[qid].ready) + ; + sort_struct->stack[qid].i = i; + sort_struct->stack[qid].j = jj; + sort_struct->stack[qid].min = min; + sort_struct->stack[qid].max = pivot; + if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) + error("Qstack overflow."); + sort_struct->stack[qid].ready = 1; + } + + /* Recurse on the right? */ + if (jj + 1 < j && pivot + 1 < max) { + i = jj + 1; + min = pivot + 1; + } else + break; + + } else { + + /* Recurse on the right? */ + if (pivot + 1 < max) { + qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; + while (sort_struct->stack[qid].ready) + ; + sort_struct->stack[qid].i = jj + 1; + sort_struct->stack[qid].j = j; + sort_struct->stack[qid].min = pivot + 1; + sort_struct->stack[qid].max = max; + if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) + error("Qstack overflow."); + sort_struct->stack[qid].ready = 1; + } + + /* Recurse on the left? */ + if (jj > i && pivot > min) { + j = jj; + max = pivot; + } else + break; + } + + } /* loop over sub-intervals. */ + + atomic_dec(&sort_struct->waiting); + + } /* main loop. */ +} + +/** + * @brief Sort the s-particles according to the given indices. + * + * @param s The #space. + * @param ind The indices with respect to which the #spart are sorted. + * @param N The number of parts + * @param min Lowest index. + * @param max highest index. + * @param verbose Are we talkative ? + */ +void space_sparts_sort(struct space *s, int *ind, size_t N, int min, int max, + int verbose) { + + const ticks tic = getticks(); + + /* Populate a parallel_sort structure with the input data */ + struct parallel_sort sort_struct; + sort_struct.sparts = s->sparts; + sort_struct.ind = ind; + sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; + if ((sort_struct.stack = + malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL) + error("Failed to allocate sorting stack."); + for (unsigned int i = 0; i < sort_struct.stack_size; i++) + sort_struct.stack[i].ready = 0; + + /* Add the first interval. */ + sort_struct.stack[0].i = 0; + sort_struct.stack[0].j = N - 1; + sort_struct.stack[0].min = min; + sort_struct.stack[0].max = max; + sort_struct.stack[0].ready = 1; + sort_struct.first = 0; + sort_struct.last = 1; + sort_struct.waiting = 1; + + /* Launch the sorting tasks with a stride of zero such that the same + map data is passed to each thread. */ + threadpool_map(&s->e->threadpool, space_sparts_sort_mapper, &sort_struct, + s->e->threadpool.num_threads, 0, 1, NULL); + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify space_sort_struct. */ + for (size_t i = 1; i < N; i++) + if (ind[i - 1] > ind[i]) + error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, + ind[i - 1], i, ind[i], min, max); + if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); +#endif + + /* Clean up. */ + free(sort_struct.stack); + + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +void space_sparts_sort_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Unpack the mapping data. */ + struct parallel_sort *sort_struct = (struct parallel_sort *)map_data; + + /* Pointers to the sorting data. */ + int *ind = sort_struct->ind; + struct spart *sparts = sort_struct->sparts; + + /* Main loop. */ + while (sort_struct->waiting) { + + /* Grab an interval off the queue. */ + int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size; + + /* Wait for the entry to be ready, or for the sorting do be done. */ + while (!sort_struct->stack[qid].ready) + if (!sort_struct->waiting) return; + + /* Get the stack entry. */ + ptrdiff_t i = sort_struct->stack[qid].i; + ptrdiff_t j = sort_struct->stack[qid].j; + int min = sort_struct->stack[qid].min; + int max = sort_struct->stack[qid].max; + sort_struct->stack[qid].ready = 0; + + /* Loop over sub-intervals. */ + while (1) { + + /* Bring beer. */ + const int pivot = (min + max) / 2; + /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.", + i, j, min, max, pivot); */ + + /* One pass of QuickSort's partitioning. */ + ptrdiff_t ii = i; + ptrdiff_t jj = j; + while (ii < jj) { + while (ii <= j && ind[ii] <= pivot) ii++; + while (jj >= i && ind[jj] > pivot) jj--; + if (ii < jj) { + memswap(&ind[ii], &ind[jj], sizeof(int)); + memswap(&sparts[ii], &sparts[jj], sizeof(struct spart)); + } + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Verify space_sort_struct. */ + if (i != j) { + for (int k = i; k <= jj; k++) { + if (ind[k] > pivot) { + message( + "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li " + "min=%i max=%i.", + k, ind[k], pivot, i, j, min, max); + error("Partition failed (<=pivot)."); + } + } + for (int k = jj + 1; k <= j; k++) { + if (ind[k] <= pivot) { + message( + "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, + ind[k], pivot, i, j); + error("Partition failed (>pivot)."); + } + } + } #endif /* Split-off largest interval. */ @@ -1171,7 +1610,7 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, if (ind[i - 1] > ind[i]) error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); - message("Sorting succeeded."); + if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); #endif /* Clean up. */ @@ -1231,18 +1670,24 @@ void space_gparts_sort_mapper(void *map_data, int num_elements, #ifdef SWIFT_DEBUG_CHECKS /* Verify space_sort_struct. */ - for (int k = i; k <= jj; k++) - if (ind[k] > pivot) { - message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", - k, ind[k], pivot, i, j); - error("Partition failed (<=pivot)."); + if (i != j) { + for (int k = i; k <= jj; k++) { + if (ind[k] > pivot) { + message( + "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, + ind[k], pivot, i, j); + error("Partition failed (<=pivot)."); + } } - for (int k = jj + 1; k <= j; k++) - if (ind[k] <= pivot) { - message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", - k, ind[k], pivot, i, j); - error("Partition failed (>pivot)."); + for (int k = jj + 1; k <= j; k++) { + if (ind[k] <= pivot) { + message( + "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.", k, + ind[k], pivot, i, j); + error("Partition failed (>pivot)."); + } } + } #endif /* Split-off largest interval. */ @@ -1456,27 +1901,63 @@ void space_map_cells_pre(struct space *s, int full, * @param s The #space in which the cell lives. * @param c The #cell to split recursively. * @param buff A buffer for particle sorting, should be of size at least - * max(c->count, c->gount) or @c NULL. + * c->count or @c NULL. + * @param sbuff A buffer for particle sorting, should be of size at least + * c->scount or @c NULL. + * @param gbuff A buffer for particle sorting, should be of size at least + * c->gcount or @c NULL. */ -void space_split_recursive(struct space *s, struct cell *c, int *buff) { +void space_split_recursive(struct space *s, struct cell *c, + struct cell_buff *buff, struct cell_buff *sbuff, + struct cell_buff *gbuff) { const int count = c->count; const int gcount = c->gcount; + const int scount = c->scount; const int depth = c->depth; int maxdepth = 0; float h_max = 0.0f; - int ti_end_min = max_nr_timesteps, ti_end_max = 0; - struct cell *temp; + integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0; struct part *parts = c->parts; struct gpart *gparts = c->gparts; + struct spart *sparts = c->sparts; struct xpart *xparts = c->xparts; struct engine *e = s->e; /* If the buff is NULL, allocate it, and remember to free it. */ - const int allocate_buffer = (buff == NULL); - if (allocate_buffer && - (buff = (int *)malloc(sizeof(int) * max(count, gcount))) == NULL) - error("Failed to allocate temporary indices."); + const int allocate_buffer = (buff == NULL && gbuff == NULL && sbuff == NULL); + if (allocate_buffer) { + if (count > 0) { + if (posix_memalign((void *)&buff, SWIFT_STRUCT_ALIGNMENT, + sizeof(struct cell_buff) * count) != 0) + error("Failed to allocate temporary indices."); + for (int k = 0; k < count; k++) { + buff[k].x[0] = parts[k].x[0]; + buff[k].x[1] = parts[k].x[1]; + buff[k].x[2] = parts[k].x[2]; + } + } + if (gcount > 0) { + if (posix_memalign((void *)&gbuff, SWIFT_STRUCT_ALIGNMENT, + sizeof(struct cell_buff) * gcount) != 0) + error("Failed to allocate temporary indices."); + for (int k = 0; k < gcount; k++) { + gbuff[k].x[0] = gparts[k].x[0]; + gbuff[k].x[1] = gparts[k].x[1]; + gbuff[k].x[2] = gparts[k].x[2]; + } + } + if (scount > 0) { + if (posix_memalign((void *)&sbuff, SWIFT_STRUCT_ALIGNMENT, + sizeof(struct cell_buff) * scount) != 0) + error("Failed to allocate temporary indices."); + for (int k = 0; k < scount; k++) { + sbuff[k].x[0] = sparts[k].x[0]; + sbuff[k].x[1] = sparts[k].x[1]; + sbuff[k].x[2] = sparts[k].x[2]; + } + } + } /* Check the depth. */ while (depth > (maxdepth = s->maxdepth)) { @@ -1490,47 +1971,57 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) { } /* Split or let it be? */ - if (count > space_splitsize || gcount > space_splitsize) { + if (count > space_splitsize || gcount > space_splitsize || + scount > space_splitsize) { /* No longer just a leaf. */ c->split = 1; /* Create the cell's progeny. */ + space_getcells(s, 8, c->progeny); for (int k = 0; k < 8; k++) { - temp = space_getcell(s); - temp->count = 0; - temp->gcount = 0; - temp->ti_old = e->ti_current; - temp->loc[0] = c->loc[0]; - temp->loc[1] = c->loc[1]; - temp->loc[2] = c->loc[2]; - temp->width[0] = c->width[0] / 2; - temp->width[1] = c->width[1] / 2; - temp->width[2] = c->width[2] / 2; - temp->dmin = c->dmin / 2; - if (k & 4) temp->loc[0] += temp->width[0]; - if (k & 2) temp->loc[1] += temp->width[1]; - if (k & 1) temp->loc[2] += temp->width[2]; - temp->depth = c->depth + 1; - temp->split = 0; - temp->h_max = 0.0; - temp->dx_max = 0.f; - temp->nodeID = c->nodeID; - temp->parent = c; - temp->super = NULL; - c->progeny[k] = temp; + struct cell *cp = c->progeny[k]; + cp->count = 0; + cp->gcount = 0; + cp->scount = 0; + cp->ti_old = c->ti_old; + cp->loc[0] = c->loc[0]; + cp->loc[1] = c->loc[1]; + cp->loc[2] = c->loc[2]; + cp->width[0] = c->width[0] / 2; + cp->width[1] = c->width[1] / 2; + cp->width[2] = c->width[2] / 2; + cp->dmin = c->dmin / 2; + if (k & 4) cp->loc[0] += cp->width[0]; + if (k & 2) cp->loc[1] += cp->width[1]; + if (k & 1) cp->loc[2] += cp->width[2]; + cp->depth = c->depth + 1; + cp->split = 0; + cp->h_max = 0.0; + cp->dx_max = 0.f; + cp->nodeID = c->nodeID; + cp->parent = c; + cp->super = NULL; } /* Split the cell data. */ - cell_split(c, c->parts - s->parts, buff); + cell_split(c, c->parts - s->parts, c->sparts - s->sparts, buff, sbuff, + gbuff); /* Remove any progeny with zero parts. */ + struct cell_buff *progeny_buff = buff, *progeny_gbuff = gbuff, + *progeny_sbuff = sbuff; for (int k = 0; k < 8; k++) - if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) { + if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0 && + c->progeny[k]->scount == 0) { space_recycle(s, c->progeny[k]); c->progeny[k] = NULL; } else { - space_split_recursive(s, c->progeny[k], buff); + space_split_recursive(s, c->progeny[k], progeny_buff, progeny_sbuff, + progeny_gbuff); + progeny_buff += c->progeny[k]->count; + progeny_gbuff += c->progeny[k]->gcount; + progeny_sbuff += c->progeny[k]->scount; h_max = max(h_max, c->progeny[k]->h_max); ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); @@ -1553,7 +2044,8 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) { struct part *p = &parts[k]; struct xpart *xp = &xparts[k]; const float h = p->h; - const int ti_end = p->ti_end; + const integertime_t ti_end = + get_integer_time_end(e->ti_current, p->time_bin); xp->x_diff[0] = 0.f; xp->x_diff[1] = 0.f; xp->x_diff[2] = 0.f; @@ -1563,13 +2055,21 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) { } for (int k = 0; k < gcount; k++) { struct gpart *gp = &gparts[k]; - const int ti_end = gp->ti_end; + const integertime_t ti_end = + get_integer_time_end(e->ti_current, gp->time_bin); gp->x_diff[0] = 0.f; gp->x_diff[1] = 0.f; gp->x_diff[2] = 0.f; if (ti_end < ti_end_min) ti_end_min = ti_end; if (ti_end > ti_end_max) ti_end_max = ti_end; } + for (int k = 0; k < scount; k++) { + struct spart *sp = &sparts[k]; + const integertime_t ti_end = + get_integer_time_end(e->ti_current, sp->time_bin); + if (ti_end < ti_end_min) ti_end_min = ti_end; + if (ti_end > ti_end_max) ti_end_max = ti_end; + } } /* Set the values for this cell. */ @@ -1582,6 +2082,9 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) { if (s->nr_parts > 0) c->owner = ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts; + else if (s->nr_sparts > 0) + c->owner = + ((c->sparts - s->sparts) % s->nr_sparts) * s->nr_queues / s->nr_sparts; else if (s->nr_gparts > 0) c->owner = ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues / s->nr_gparts; @@ -1589,7 +2092,11 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) { c->owner = 0; /* Ok, there is really nothing on this rank... */ /* Clean up. */ - if (allocate_buffer) free(buff); + if (allocate_buffer) { + if (buff != NULL) free(buff); + if (gbuff != NULL) free(gbuff); + if (sbuff != NULL) free(sbuff); + } } /** @@ -1608,7 +2115,7 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { for (int ind = 0; ind < num_cells; ind++) { struct cell *c = &cells_top[ind]; - space_split_recursive(s, c, NULL); + space_split_recursive(s, c, NULL, NULL, NULL); } #ifdef SWIFT_DEBUG_CHECKS @@ -1654,7 +2161,7 @@ void space_recycle(struct space *s, struct cell *c) { * @param s The #space. * @param list_begin Pointer to the first #cell in the linked list of * cells joined by their @c next pointers. - * @param list_begin Pointer to the last #cell in the linked list of + * @param list_end Pointer to the last #cell in the linked list of * cells joined by their @c next pointers. It is assumed that this * cell's @c next pointer is @c NULL. */ @@ -1695,39 +2202,46 @@ void space_recycle_list(struct space *s, struct cell *list_begin, * If we have no cells, allocate a new chunk of memory and pick one from there. * * @param s The #space. + * @param nr_cells Number of #cell to pick up. + * @param cells Array of @c nr_cells #cell pointers in which to store the + * new cells. */ -struct cell *space_getcell(struct space *s) { +void space_getcells(struct space *s, int nr_cells, struct cell **cells) { /* Lock the space. */ lock_lock(&s->lock); - /* Is the buffer empty? */ - if (s->cells_sub == NULL) { - if (posix_memalign((void *)&s->cells_sub, cell_align, - space_cellallocchunk * sizeof(struct cell)) != 0) - error("Failed to allocate more cells."); + /* For each requested cell... */ + for (int j = 0; j < nr_cells; j++) { - /* Constructed a linked list */ - for (int k = 0; k < space_cellallocchunk - 1; k++) - s->cells_sub[k].next = &s->cells_sub[k + 1]; - s->cells_sub[space_cellallocchunk - 1].next = NULL; - } + /* Is the buffer empty? */ + if (s->cells_sub == NULL) { + if (posix_memalign((void *)&s->cells_sub, cell_align, + space_cellallocchunk * sizeof(struct cell)) != 0) + error("Failed to allocate more cells."); - /* Pick off the next cell. */ - struct cell *c = s->cells_sub; - s->cells_sub = c->next; - s->tot_cells += 1; + /* Constructed a linked list */ + for (int k = 0; k < space_cellallocchunk - 1; k++) + s->cells_sub[k].next = &s->cells_sub[k + 1]; + s->cells_sub[space_cellallocchunk - 1].next = NULL; + } + + /* Pick off the next cell. */ + cells[j] = s->cells_sub; + s->cells_sub = cells[j]->next; + s->tot_cells += 1; + } /* Unlock the space. */ lock_unlock_blind(&s->lock); /* Init some things in the cell we just got. */ - bzero(c, sizeof(struct cell)); - c->nodeID = -1; - if (lock_init(&c->lock) != 0 || lock_init(&c->glock) != 0) - error("Failed to initialize cell spinlocks."); - - return c; + for (int j = 0; j < nr_cells; j++) { + bzero(cells[j], sizeof(struct cell)); + cells[j]->nodeID = -1; + if (lock_init(&cells[j]->lock) != 0 || lock_init(&cells[j]->glock) != 0) + error("Failed to initialize cell spinlocks."); + } } /** @@ -1754,6 +2268,11 @@ void space_init_parts(struct space *s) { #endif hydro_first_init_part(&p[i], &xp[i]); + +#ifdef SWIFT_DEBUG_CHECKS + p->ti_drift = 0; + p->ti_kick = 0; +#endif } } @@ -1800,6 +2319,32 @@ void space_init_gparts(struct space *s) { } } +/** + * @brief Initialises all the s-particles by setting them into a valid state + * + * Calls star_first_init_spart() on all the particles + */ +void space_init_sparts(struct space *s) { + + const size_t nr_sparts = s->nr_sparts; + struct spart *restrict sp = s->sparts; + + for (size_t i = 0; i < nr_sparts; ++i) { + +#ifdef HYDRO_DIMENSION_2D + sp[i].x[2] = 0.f; + sp[i].v[2] = 0.f; +#endif + +#ifdef HYDRO_DIMENSION_1D + sp[i].x[1] = sp[i].x[2] = 0.f; + sp[i].v[1] = sp[i].v[2] = 0.f; +#endif + + star_first_init_spart(&sp[i]); + } +} + /** * @brief Split the space into cells given the array of particles. * @@ -1808,8 +2353,10 @@ void space_init_gparts(struct space *s) { * @param dim Spatial dimensions of the domain. * @param parts Array of Gas particles. * @param gparts Array of Gravity particles. + * @param sparts Array of star particles. * @param Npart The number of Gas particles in the space. * @param Ngpart The number of Gravity particles in the space. + * @param Nspart The number of star particles in the space. * @param periodic flag whether the domain is periodic or not. * @param gravity flag whether we are doing gravity or not. * @param verbose Print messages to stdout or not. @@ -1822,8 +2369,9 @@ void space_init_gparts(struct space *s) { */ void space_init(struct space *s, const struct swift_params *params, double dim[3], struct part *parts, struct gpart *gparts, - size_t Npart, size_t Ngpart, int periodic, int gravity, - int verbose, int dry_run) { + struct spart *sparts, size_t Npart, size_t Ngpart, + size_t Nspart, int periodic, int gravity, int verbose, + int dry_run) { /* Clean-up everything */ bzero(s, sizeof(struct space)); @@ -1841,6 +2389,9 @@ void space_init(struct space *s, const struct swift_params *params, s->nr_gparts = Ngpart; s->size_gparts = Ngpart; s->gparts = gparts; + s->nr_sparts = Nspart; + s->size_sparts = Nspart; + s->sparts = sparts; s->nr_queues = 1; /* Temporary value until engine construction */ /* Decide on the minimal top-level cell size */ @@ -1900,6 +2451,11 @@ void space_init(struct space *s, const struct swift_params *params, gparts[k].x[1] += shift[1]; gparts[k].x[2] += shift[2]; } + for (size_t k = 0; k < Nspart; k++) { + sparts[k].x[0] += shift[0]; + sparts[k].x[1] += shift[1]; + sparts[k].x[2] += shift[2]; + } } if (!dry_run) { @@ -1931,9 +2487,23 @@ void space_init(struct space *s, const struct swift_params *params, if (gparts[k].x[j] < 0 || gparts[k].x[j] >= dim[j]) error("Not all g-particles are within the specified domain."); } + + /* Same for the sparts */ + if (periodic) { + for (size_t k = 0; k < Nspart; k++) + for (int j = 0; j < 3; j++) { + while (sparts[k].x[j] < 0) sparts[k].x[j] += dim[j]; + while (sparts[k].x[j] >= dim[j]) sparts[k].x[j] -= dim[j]; + } + } else { + for (size_t k = 0; k < Nspart; k++) + for (int j = 0; j < 3; j++) + if (sparts[k].x[j] < 0 || sparts[k].x[j] >= dim[j]) + error("Not all s-particles are within the specified domain."); + } } - /* Allocate the extra parts array. */ + /* Allocate the extra parts array for the gas particles. */ if (Npart > 0) { if (posix_memalign((void *)&s->xparts, xpart_align, Npart * sizeof(struct xpart)) != 0) @@ -1945,6 +2515,7 @@ void space_init(struct space *s, const struct swift_params *params, space_init_parts(s); space_init_xparts(s); space_init_gparts(s); + space_init_sparts(s); /* Init the space lock. */ if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock."); @@ -1974,12 +2545,25 @@ void space_link_cleanup(struct space *s) { * @param s The #space to check. * @param ti_current The (integer) time. */ -void space_check_drift_point(struct space *s, int ti_current) { +void space_check_drift_point(struct space *s, integertime_t ti_current) { /* Recursively check all cells */ space_map_cells_pre(s, 1, cell_check_drift_point, &ti_current); } +/** + * @brief Checks that all particles and local cells have a non-zero time-step. + */ +void space_check_timesteps(struct space *s) { +#ifdef SWIFT_DEBUG_CHECKS + + for (int i = 0; i < s->nr_cells; ++i) { + cell_check_timesteps(&s->cells_top[i]); + } + +#endif +} + /** * @brief Frees up the memory allocated for this #space */ @@ -1990,4 +2574,5 @@ void space_clean(struct space *s) { free(s->parts); free(s->xparts); free(s->gparts); + free(s->sparts); } diff --git a/src/space.h b/src/space.h index 4aea2a07560865c8d8a474f069b370748e12e65e..a25149e8fe6971b24856a2a60cae23747fbc56ac 100644 --- a/src/space.h +++ b/src/space.h @@ -108,6 +108,9 @@ struct space { /*! The total number of g-parts in the space. */ size_t nr_gparts, size_gparts; + /*! The total number of g-parts in the space. */ + size_t nr_sparts, size_sparts; + /*! The particle data (cells have pointers to this). */ struct part *parts; @@ -117,6 +120,9 @@ struct space { /*! The g-particle data (cells have pointers to this). */ struct gpart *gparts; + /*! The s-particle data (cells have pointers to this). */ + struct spart *sparts; + /*! General-purpose lock for this space. */ swift_lock_type lock; @@ -139,6 +145,10 @@ struct space { struct gpart *gparts_foreign; size_t nr_gparts_foreign, size_gparts_foreign; + /*! Buffers for g-parts that we will receive from foreign cells. */ + struct spart *sparts_foreign; + size_t nr_sparts_foreign, size_sparts_foreign; + #endif }; @@ -147,13 +157,16 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose); void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose); -struct cell *space_getcell(struct space *s); +void space_sparts_sort(struct space *s, int *ind, size_t N, int min, int max, + int verbose); +void space_getcells(struct space *s, int nr_cells, struct cell **cells); int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift); void space_init(struct space *s, const struct swift_params *params, double dim[3], struct part *parts, struct gpart *gparts, - size_t Npart, size_t Ngpart, int periodic, int gravity, - int verbose, int dry_run); + struct spart *sparts, size_t Npart, size_t Ngpart, + size_t Nspart, int periodic, int gravity, int verbose, + int dry_run); void space_sanitize(struct space *s); void space_map_cells_pre(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data); @@ -169,6 +182,8 @@ void space_parts_sort_mapper(void *map_data, int num_elements, void *extra_data); void space_gparts_sort_mapper(void *map_data, int num_elements, void *extra_data); +void space_sparts_sort_mapper(void *map_data, int num_elements, + void *extra_data); void space_rebuild(struct space *s, int verbose); void space_recycle(struct space *s, struct cell *c); void space_recycle_list(struct space *s, struct cell *list_begin, @@ -180,12 +195,17 @@ void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells, int verbose); void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, int verbose); +void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells, + int verbose); void space_do_parts_sort(); void space_do_gparts_sort(); +void space_do_sparts_sort(); void space_init_parts(struct space *s); void space_init_gparts(struct space *s); +void space_init_sparts(struct space *s); void space_link_cleanup(struct space *s); -void space_check_drift_point(struct space *s, int ti_current); +void space_check_drift_point(struct space *s, integertime_t ti_current); +void space_check_timesteps(struct space *s); void space_clean(struct space *s); #endif /* SWIFT_SPACE_H */ diff --git a/src/stars.h b/src/stars.h new file mode 100644 index 0000000000000000000000000000000000000000..ade47ff57298c13bf205e991548945576a802293 --- /dev/null +++ b/src/stars.h @@ -0,0 +1,30 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_STAR_H +#define SWIFT_STAR_H + +/* Config parameters. */ +#include "../config.h" + +/* So far only one model here */ +/* Straight-forward import */ +#include "./stars/Default/star.h" +#include "./stars/Default/star_iact.h" + +#endif diff --git a/src/stars/Default/star.h b/src/stars/Default/star.h new file mode 100644 index 0000000000000000000000000000000000000000..61ae4aeb5c51e18e39c3f4c6855d7c6ddfe05abb --- /dev/null +++ b/src/stars/Default/star.h @@ -0,0 +1,86 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEFAULT_STAR_H +#define SWIFT_DEFAULT_STAR_H + +#include <float.h> +#include "minmax.h" + +/** + * @brief Computes the gravity time-step of a given star particle. + * + * @param sp Pointer to the s-particle data. + */ +__attribute__((always_inline)) INLINE static float star_compute_timestep( + const struct spart* const sp) { + + return FLT_MAX; +} + +/** + * @brief Initialises the s-particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void star_first_init_spart( + struct spart* sp) { + + sp->time_bin = 0; +} + +/** + * @brief Prepares a s-particle for its interactions + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void star_init_spart( + struct spart* sp) {} + +/** + * @brief Sets the values to be predicted in the drifts to their values at a + * kick time + * + * @param sp The particle. + */ +__attribute__((always_inline)) INLINE static void star_reset_predicted_values( + struct spart* restrict sp) {} + +/** + * @brief Finishes the calculation of (non-gravity) forces acting on stars + * + * Multiplies the forces and accelerations by the appropiate constants + * + * @param sp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void star_end_force( + struct spart* sp) {} + +/** + * @brief Kick the additional variables + * + * @param sp The particle to act upon + * @param dt The time-step for this kick + */ +__attribute__((always_inline)) INLINE static void star_kick_extra( + struct spart* sp, float dt) {} + +#endif /* SWIFT_DEFAULT_STAR_H */ diff --git a/src/stars/Default/star_debug.h b/src/stars/Default/star_debug.h new file mode 100644 index 0000000000000000000000000000000000000000..d940afac2eb67c97481f48a4bda6fa56085166d5 --- /dev/null +++ b/src/stars/Default/star_debug.h @@ -0,0 +1,31 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEFAULT_STAR_DEBUG_H +#define SWIFT_DEFAULT_STAR_DEBUG_H + +__attribute__((always_inline)) INLINE static void star_debug_particle( + const struct spart* p) { + printf( + "x=[%.3e,%.3e,%.3e], " + "v_full=[%.3e,%.3e,%.3e] p->mass=%.3e \n t_begin=%d, t_end=%d\n", + p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2], + p->mass, p->ti_begin, p->ti_end); +} + +#endif /* SWIFT_DEFAULT_STAR_DEBUG_H */ diff --git a/src/stars/Default/star_iact.h b/src/stars/Default/star_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/stars/Default/star_io.h b/src/stars/Default/star_io.h new file mode 100644 index 0000000000000000000000000000000000000000..96bbdce6d83dc241d05e7dd1754f476dc0b8e5f9 --- /dev/null +++ b/src/stars/Default/star_io.h @@ -0,0 +1,72 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEFAULT_STAR_IO_H +#define SWIFT_DEFAULT_STAR_IO_H + +#include "io_properties.h" + +/** + * @brief Specifies which s-particle fields to read from a dataset + * + * @param sparts The s-particle array. + * @param list The list of i/o properties to read. + * @param num_fields The number of i/o fields to read. + */ +void star_read_particles(struct spart* sparts, struct io_props* list, + int* num_fields) { + + /* Say how much we want to read */ + *num_fields = 4; + + /* List what we want to read */ + list[0] = io_make_input_field("Coordinates", DOUBLE, 3, COMPULSORY, + UNIT_CONV_LENGTH, sparts, x); + list[1] = io_make_input_field("Velocities", FLOAT, 3, COMPULSORY, + UNIT_CONV_SPEED, sparts, v); + list[2] = io_make_input_field("Masses", FLOAT, 1, COMPULSORY, UNIT_CONV_MASS, + sparts, mass); + list[3] = io_make_input_field("ParticleIDs", LONGLONG, 1, COMPULSORY, + UNIT_CONV_NO_UNITS, sparts, id); +} + +/** + * @brief Specifies which s-particle fields to write to a dataset + * + * @param sparts The s-particle array. + * @param list The list of i/o properties to write. + * @param num_fields The number of i/o fields to write. + */ +void star_write_particles(struct spart* sparts, struct io_props* list, + int* num_fields) { + + /* Say how much we want to read */ + *num_fields = 4; + + /* List what we want to read */ + list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, + sparts, x); + list[1] = + io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, sparts, v); + list[2] = + io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, sparts, mass); + list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, + sparts, id); +} + +#endif /* SWIFT_DEFAULT_STAR_IO_H */ diff --git a/src/stars/Default/star_part.h b/src/stars/Default/star_part.h new file mode 100644 index 0000000000000000000000000000000000000000..e958e3d68bc58855a4f57f24d876cfaf73362bd6 --- /dev/null +++ b/src/stars/Default/star_part.h @@ -0,0 +1,52 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_DEFAULT_STAR_PART_H +#define SWIFT_DEFAULT_STAR_PART_H + +/* Some standard headers. */ +#include <stdlib.h> + +/** + * @brief Particle fields for the star particles. + * + * All quantities related to gravity are stored in the associate #gpart. + */ +struct spart { + + /*! Particle ID. */ + long long id; + + /*! Pointer to corresponding gravity part. */ + struct gpart* gpart; + + /*! Particle position. */ + double x[3]; + + /*! Particle velocity. */ + float v[3]; + + /*! Star mass */ + float mass; + + /*! Particle time bin */ + timebin_t time_bin; + +} SWIFT_STRUCT_ALIGN; + +#endif /* SWIFT_DEFAULT_STAR_PART_H */ diff --git a/src/stars_io.h b/src/stars_io.h new file mode 100644 index 0000000000000000000000000000000000000000..18a13ec19163008f1c8e9f64cf544ddf812db655 --- /dev/null +++ b/src/stars_io.h @@ -0,0 +1,26 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_STAR_IO_H +#define SWIFT_STAR_IO_H + +#include "./const.h" + +#include "./stars/Default/star_io.h" + +#endif /* SWIFT_STAR_IO_H */ diff --git a/src/statistics.c b/src/statistics.c index 7a567a447a7514634435823e03bec5e4ac157d4e..297d88c1f25c1b5b42be8edcd1282fd437964894 100644 --- a/src/statistics.c +++ b/src/statistics.c @@ -104,8 +104,9 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) { const struct part *restrict parts = (struct part *)map_data; const struct xpart *restrict xparts = s->xparts + (ptrdiff_t)(parts - s->parts); - const int ti_current = s->e->ti_current; + const integertime_t ti_current = s->e->ti_current; const double timeBase = s->e->timeBase; + const double time = s->e->time; struct statistics *const global_stats = data->stats; /* Required for external potential energy */ @@ -124,20 +125,27 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) { const struct xpart *xp = &xparts[k]; const struct gpart *gp = (p->gpart != NULL) ? gp = p->gpart : NULL; - /* Get useful variables */ - const float dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; - const double x[3] = {p->x[0], p->x[1], p->x[2]}; + /* Get useful time variables */ + const integertime_t ti_begin = + get_integer_time_begin(ti_current, p->time_bin); + const integertime_t ti_step = get_integer_timestep(p->time_bin); + const float dt = (ti_current - (ti_begin + ti_step / 2)) * timeBase; + + /* Get the total acceleration */ float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]}; if (gp != NULL) { a_tot[0] += gp->a_grav[0]; a_tot[1] += gp->a_grav[1]; a_tot[2] += gp->a_grav[2]; } + + /* Extrapolate velocities to current time */ const float v[3] = {xp->v_full[0] + a_tot[0] * dt, xp->v_full[1] + a_tot[1] * dt, xp->v_full[2] + a_tot[2] * dt}; const float m = hydro_get_mass(p); + const double x[3] = {p->x[0], p->x[1], p->x[2]}; /* Collect mass */ stats.mass += m; @@ -154,15 +162,14 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) { /* Collect energies. */ stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); + stats.E_int += m * hydro_get_internal_energy(p); + stats.E_rad += cooling_get_radiated_energy(xp); stats.E_pot_self += 0.f; if (gp != NULL) - stats.E_pot_ext += - m * external_gravity_get_potential_energy(potential, phys_const, gp); - stats.E_int += m * hydro_get_internal_energy(p, dt); - stats.E_rad += cooling_get_radiated_energy(xp); - + stats.E_pot_ext += m * external_gravity_get_potential_energy( + time, potential, phys_const, gp); /* Collect entropy */ - stats.entropy += m * hydro_get_entropy(p, dt); + stats.entropy += m * hydro_get_entropy(p); } /* Now write back to memory */ @@ -184,8 +191,9 @@ void stats_collect_gpart_mapper(void *map_data, int nr_gparts, const struct index_data *data = (struct index_data *)extra_data; const struct space *s = data->s; const struct gpart *restrict gparts = (struct gpart *)map_data; - const int ti_current = s->e->ti_current; + const integertime_t ti_current = s->e->ti_current; const double timeBase = s->e->timeBase; + const double time = s->e->time; struct statistics *const global_stats = data->stats; /* Required for external potential energy */ @@ -206,13 +214,18 @@ void stats_collect_gpart_mapper(void *map_data, int nr_gparts, if (gp->id_or_neg_offset < 0) continue; /* Get useful variables */ - const float dt = (ti_current - (gp->ti_begin + gp->ti_end) / 2) * timeBase; - const double x[3] = {gp->x[0], gp->x[1], gp->x[2]}; + const integertime_t ti_begin = + get_integer_time_begin(ti_current, gp->time_bin); + const integertime_t ti_step = get_integer_timestep(gp->time_bin); + const float dt = (ti_current - (ti_begin + ti_step / 2)) * timeBase; + + /* Extrapolate velocities */ const float v[3] = {gp->v_full[0] + gp->a_grav[0] * dt, gp->v_full[1] + gp->a_grav[1] * dt, gp->v_full[2] + gp->a_grav[2] * dt}; const float m = gp->mass; + const double x[3] = {gp->x[0], gp->x[1], gp->x[2]}; /* Collect mass */ stats.mass += m; @@ -230,8 +243,8 @@ void stats_collect_gpart_mapper(void *map_data, int nr_gparts, /* Collect energies. */ stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); stats.E_pot_self += 0.f; - stats.E_pot_ext += - m * external_gravity_get_potential_energy(potential, phys_const, gp); + stats.E_pot_ext += m * external_gravity_get_potential_energy( + time, potential, phys_const, gp); } /* Now write back to memory */ diff --git a/src/swift.h b/src/swift.h index 2928c263525f57a7ee999b50547aa374b456f556..c08a4f3209d9eea0fe02ad9112179a0ed7ccae1e 100644 --- a/src/swift.h +++ b/src/swift.h @@ -23,7 +23,9 @@ #include "../config.h" /* Local headers. */ +#include "active.h" #include "atomic.h" +#include "cache.h" #include "cell.h" #include "clocks.h" #include "const.h" @@ -53,6 +55,7 @@ #include "sourceterms.h" #include "space.h" #include "task.h" +#include "timeline.h" #include "timers.h" #include "tools.h" #include "units.h" diff --git a/src/task.c b/src/task.c index ea97fdd1bb930d005889fa7c73a3f2cb7b5f054a..b05d782af305b25bf95b25279c6abc2e1f4037c2 100644 --- a/src/task.c +++ b/src/task.c @@ -48,13 +48,15 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", "pair", "sub_self", - "sub_pair", "init", "ghost", "extra_ghost", "kick", - "send", "recv", "grav_gather_m", "grav_fft", "grav_mm", - "grav_up", "cooling", "sourceterms"}; + "none", "sort", "self", "pair", "sub_self", + "sub_pair", "init", "ghost", "extra_ghost", "drift", + "kick1", "kick2", "timestep", "send", "recv", + "grav_gather_m", "grav_fft", "grav_mm", "grav_up", "cooling", + "sourceterms"}; const char *subtaskID_names[task_subtype_count] = { - "none", "density", "gradient", "force", "grav", "external_grav", "tend"}; + "none", "density", "gradient", "force", "grav", "external_grav", + "tend", "xv", "rho", "gpart", "spart"}; /** * @brief Computes the overlap between the parts array of two given cells. @@ -147,9 +149,12 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( break; case task_type_init: - case task_type_kick: + case task_type_kick1: + case task_type_kick2: + case task_type_timestep: case task_type_send: case task_type_recv: + case task_type_drift: if (t->ci->count > 0 && t->ci->gcount > 0) return task_action_all; else if (t->ci->count > 0) @@ -260,6 +265,11 @@ void task_unlock(struct task *t) { /* Act based on task type. */ switch (type) { + case task_type_drift: + cell_unlocktree(ci); + cell_gunlocktree(ci); + break; + case task_type_sort: cell_unlocktree(ci); break; @@ -327,6 +337,15 @@ int task_lock(struct task *t) { #endif break; + case task_type_drift: + if (ci->hold || ci->ghold) return 0; + if (cell_locktree(ci) != 0) return 0; + if (cell_glocktree(ci) != 0) { + cell_unlocktree(ci); + return 0; + } + break; + case task_type_sort: if (cell_locktree(ci) != 0) return 0; break; diff --git a/src/task.h b/src/task.h index c9425fdd137e2c1708dbd05436d1db685bdd3bfd..f2733318a34421fa39f3130f9e76f1ed09246d55 100644 --- a/src/task.h +++ b/src/task.h @@ -45,7 +45,10 @@ enum task_types { task_type_init, task_type_ghost, task_type_extra_ghost, - task_type_kick, + task_type_drift, + task_type_kick1, + task_type_kick2, + task_type_timestep, task_type_send, task_type_recv, task_type_grav_gather_m, @@ -68,6 +71,10 @@ enum task_subtypes { task_subtype_grav, task_subtype_external_grav, task_subtype_tend, + task_subtype_xv, + task_subtype_rho, + task_subtype_gpart, + task_subtype_spart, task_subtype_count } __attribute__((packed)); @@ -157,6 +164,10 @@ struct task { ticks tic, toc; #endif +#ifdef SWIFT_DEBUG_CHECKS + int ti_run; +#endif + } SWIFT_STRUCT_ALIGN; /* Function prototypes. */ diff --git a/src/threadpool.c b/src/threadpool.c index 35e5f2139de0689d9761d0d8f19030a076329cba..c11fd8121bb02f36fce1796d79a7eb55a38102c4 100644 --- a/src/threadpool.c +++ b/src/threadpool.c @@ -90,7 +90,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) { /* Initialize the thread counters. */ tp->num_threads = num_threads; tp->num_threads_waiting = 0; - + /* If there is only a single thread, do nothing more as of here as we will just do work in the (blocked) calling thread. */ if (num_threads == 1) return; @@ -147,7 +147,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) { void threadpool_map(struct threadpool *tp, threadpool_map_function map_function, void *map_data, size_t N, int stride, int chunk, void *extra_data) { - + /* If we just have a single thread, call the map function directly. */ if (tp->num_threads == 1) { map_function(map_data, N, extra_data); diff --git a/src/threadpool.h b/src/threadpool.h index 76aa0c119610c4d540e117f046b286095a9c676d..f9c7eeffb700adc579ec05902193b888cdd6363d 100644 --- a/src/threadpool.h +++ b/src/threadpool.h @@ -32,9 +32,6 @@ typedef void (*threadpool_map_function)(void *map_data, int num_elements, /* Data of a threadpool. */ struct threadpool { - /* Number of threads in this pool. */ - int num_threads; - /* The threads themselves. */ pthread_t *threads; @@ -48,6 +45,9 @@ struct threadpool { map_data_chunk; volatile threadpool_map_function map_function; + /* Number of threads in this pool. */ + int num_threads; + /* Counter for the number of threads that are done. */ volatile int num_threads_waiting, num_threads_running; }; diff --git a/src/timeline.h b/src/timeline.h new file mode 100644 index 0000000000000000000000000000000000000000..c73b2432b219a8ab0254d21c59102841557a57b9 --- /dev/null +++ b/src/timeline.h @@ -0,0 +1,122 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_TIMELINE_H +#define SWIFT_TIMELINE_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "inline.h" +#include "intrinsics.h" + +#include <math.h> + +typedef long long integertime_t; +typedef char timebin_t; + +/*! The number of time bins */ +#define num_time_bins 56 + +/*! The maximal number of timesteps in a simulation */ +#define max_nr_timesteps (1LL << (num_time_bins + 1)) + +/** + * @brief Returns the integer time interval corresponding to a time bin + * + * @param bin The time bin of interest. + */ +static INLINE integertime_t get_integer_timestep(timebin_t bin) { + + if (bin <= 0) return 0; + return 1LL << (bin + 1); +} + +/** + * @brief Returns the time bin corresponding to a given time_step size. + * + * Assumes that integertime_t maps to an unsigned long long. + */ +static INLINE timebin_t get_time_bin(integertime_t time_step) { + + /* ((int) log_2(time_step)) - 1 */ + return (timebin_t)(62 - intrinsics_clzll(time_step)); +} + +/** + * @brief Returns the physical time interval corresponding to a time bin. + * + * @param bin The time bin of interest. + * @param timeBase the minimal time-step size of the simulation. + */ +static INLINE double get_timestep(timebin_t bin, double timeBase) { + + return get_integer_timestep(bin) * timeBase; +} + +/** + * @brief Returns the integer time corresponding to the start of the time-step + * given by a time-bin. + * + * @param ti_current The current time on the integer time line. + * @param bin The time bin of interest. + */ +static INLINE integertime_t get_integer_time_begin(integertime_t ti_current, + timebin_t bin) { + + const integertime_t dti = get_integer_timestep(bin); + if (dti == 0) + return 0; + else + return dti * ((ti_current - 1) / dti); +} + +/** + * @brief Returns the integer time corresponding to the start of the time-step + * given by a time-bin. + * + * @param ti_current The current time on the integer time line. + * @param bin The time bin of interest. + */ +static INLINE integertime_t get_integer_time_end(integertime_t ti_current, + timebin_t bin) { + + const integertime_t dti = get_integer_timestep(bin); + if (dti == 0) + return 0; + else + return dti * ceil((double)ti_current / (double)dti); +} + +/** + * @brief Returns the highest active time bin at a given point on the time line. + * + * @param time The current point on the time line. + */ +static INLINE timebin_t get_max_active_bin(integertime_t time) { + + if (time == 0) return num_time_bins; + + timebin_t bin = 1; + while (!((1LL << (bin + 1)) & time)) ++bin; + + return bin; +} + +#endif /* SWIFT_TIMELINE_H */ diff --git a/src/timers.h b/src/timers.h index bc877d4094425a4948290d2c7c099f49cbd44280..50f630e7fc355808596967d8d7d887583674d24a 100644 --- a/src/timers.h +++ b/src/timers.h @@ -33,7 +33,10 @@ enum { timer_prepare, timer_init, timer_drift, - timer_kick, + timer_kick1, + timer_kick2, + timer_timestep, + timer_endforce, timer_dosort, timer_doself_density, timer_doself_gradient, @@ -57,7 +60,9 @@ enum { timer_dopair_subset, timer_do_ghost, timer_do_extra_ghost, - timer_dorecv_cell, + timer_dorecv_part, + timer_dorecv_gpart, + timer_dorecv_spart, timer_gettask, timer_qget, timer_qsteal, diff --git a/src/timestep.h b/src/timestep.h index db52911ec1e8fbf31f35e8877e0a7ae7ba5ee478..432f0fd2c4eb713e11272546cfe84e8f6c342cbd 100644 --- a/src/timestep.h +++ b/src/timestep.h @@ -23,39 +23,41 @@ #include "../config.h" /* Local headers. */ -#include "const.h" #include "cooling.h" #include "debug.h" +#include "timeline.h" + /** * @brief Compute a valid integer time-step form a given time-step * * @param new_dt The time-step to convert. - * @param ti_begin The (integer) start of the previous time-step. - * @param ti_end The (integer) end of the previous time-step. + * @param old_bin The old time bin. + * @param ti_current The current time on the integer time-line. * @param timeBase_inv The inverse of the system's minimal time-step. */ -__attribute__((always_inline)) INLINE static int get_integer_timestep( - float new_dt, int ti_begin, int ti_end, double timeBase_inv) { +__attribute__((always_inline)) INLINE static integertime_t +make_integer_timestep(float new_dt, timebin_t old_bin, integertime_t ti_current, + double timeBase_inv) { /* Convert to integer time */ - int new_dti = (int)(new_dt * timeBase_inv); + integertime_t new_dti = (integertime_t)(new_dt * timeBase_inv); - /* Recover the current timestep */ - const int current_dti = ti_end - ti_begin; + /* Current time-step */ + integertime_t current_dti = get_integer_timestep(old_bin); + integertime_t ti_end = get_integer_time_end(ti_current, old_bin); /* Limit timestep increase */ - if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); + if (old_bin > 0) new_dti = min(new_dti, 2 * current_dti); /* Put this timestep on the time line */ - int dti_timeline = max_nr_timesteps; - while (new_dti < dti_timeline) dti_timeline /= 2; + integertime_t dti_timeline = max_nr_timesteps; + while (new_dti < dti_timeline) dti_timeline /= 2LL; new_dti = dti_timeline; /* Make sure we are allowed to increase the timestep size */ if (new_dti > current_dti) { if ((max_nr_timesteps - ti_end) % new_dti > 0) new_dti = current_dti; } - return new_dti; } @@ -65,25 +67,26 @@ __attribute__((always_inline)) INLINE static int get_integer_timestep( * @param gp The #gpart. * @param e The #engine (used to get some constants). */ -__attribute__((always_inline)) INLINE static int get_gpart_timestep( +__attribute__((always_inline)) INLINE static integertime_t get_gpart_timestep( const struct gpart *restrict gp, const struct engine *restrict e) { - const float new_dt_external = external_gravity_timestep( - e->time, e->external_potential, e->physical_constants, gp); + float new_dt = FLT_MAX; - /* const float new_dt_self = */ - /* gravity_compute_timestep_self(e->physical_constants, gp); */ - const float new_dt_self = FLT_MAX; // MATTHIEU + if (e->policy & engine_policy_external_gravity) + new_dt = + min(new_dt, external_gravity_timestep(e->time, e->external_potential, + e->physical_constants, gp)); - float new_dt = min(new_dt_external, new_dt_self); + if (e->policy & engine_policy_self_gravity) + new_dt = min(new_dt, gravity_compute_timestep_self(gp)); /* Limit timestep within the allowed range */ new_dt = min(new_dt, e->dt_max); new_dt = max(new_dt, e->dt_min); /* Convert to integer time */ - const int new_dti = - get_integer_timestep(new_dt, gp->ti_begin, gp->ti_end, e->timeBase_inv); + const integertime_t new_dti = make_integer_timestep( + new_dt, gp->time_bin, e->ti_current, e->timeBase_inv); return new_dti; } @@ -95,7 +98,7 @@ __attribute__((always_inline)) INLINE static int get_gpart_timestep( * @param xp The #xpart partner of p. * @param e The #engine (used to get some constants). */ -__attribute__((always_inline)) INLINE static int get_part_timestep( +__attribute__((always_inline)) INLINE static integertime_t get_part_timestep( const struct part *restrict p, const struct xpart *restrict xp, const struct engine *restrict e) { @@ -112,14 +115,13 @@ __attribute__((always_inline)) INLINE static int get_part_timestep( float new_dt_grav = FLT_MAX; if (p->gpart != NULL) { - const float new_dt_external = external_gravity_timestep( - e->time, e->external_potential, e->physical_constants, p->gpart); - - /* const float new_dt_self = */ - /* gravity_compute_timestep_self(e->physical_constants, p->gpart); */ - const float new_dt_self = FLT_MAX; // MATTHIEU + if (e->policy & engine_policy_external_gravity) + new_dt_grav = min(new_dt_grav, external_gravity_timestep( + e->time, e->external_potential, + e->physical_constants, p->gpart)); - new_dt_grav = min(new_dt_external, new_dt_self); + if (e->policy & engine_policy_self_gravity) + new_dt_grav = min(new_dt_grav, gravity_compute_timestep_self(p->gpart)); } /* Final time-step is minimum of hydro and gravity */ @@ -138,8 +140,38 @@ __attribute__((always_inline)) INLINE static int get_part_timestep( new_dt = max(new_dt, e->dt_min); /* Convert to integer time */ - const int new_dti = - get_integer_timestep(new_dt, p->ti_begin, p->ti_end, e->timeBase_inv); + const integertime_t new_dti = make_integer_timestep( + new_dt, p->time_bin, e->ti_current, e->timeBase_inv); + + return new_dti; +} + +/** + * @brief Compute the new (integer) time-step of a given #spart + * + * @param sp The #spart. + * @param e The #engine (used to get some constants). + */ +__attribute__((always_inline)) INLINE static integertime_t get_spart_timestep( + const struct spart *restrict sp, const struct engine *restrict e) { + + float new_dt = star_compute_timestep(sp); + + if (e->policy & engine_policy_external_gravity) + new_dt = min(new_dt, + external_gravity_timestep(e->time, e->external_potential, + e->physical_constants, sp->gpart)); + + if (e->policy & engine_policy_self_gravity) + new_dt = min(new_dt, gravity_compute_timestep_self(sp->gpart)); + + /* Limit timestep within the allowed range */ + new_dt = min(new_dt, e->dt_max); + new_dt = max(new_dt, e->dt_min); + + /* Convert to integer time */ + const integertime_t new_dti = make_integer_timestep( + new_dt, sp->time_bin, e->ti_current, e->timeBase_inv); return new_dti; } diff --git a/src/tools.c b/src/tools.c index e526bb1b838f6d97b72eadb4070f3f2a94938c04..ab11d1f5930cf5319aaf6424f1559f144718e154 100644 --- a/src/tools.c +++ b/src/tools.c @@ -558,7 +558,181 @@ void shuffle_particles(struct part *parts, const int count) { } /** - * @brief Computes the forces between all g-particles using the N^2 algorithm + * @brief Compares two values based on their relative difference: |a - b|/|a + + * b| + * + * @param a Value a + * @param b Value b + * @param threshold The limit on the relative difference between the two values + * @param absDiff Absolute difference: |a - b| + * @param absSum Absolute sum: |a + b| + * @param relDiff Relative difference: |a - b|/|a + b| + * + * @return 1 if difference found, 0 otherwise + */ +int compare_values(double a, double b, double threshold, double *absDiff, + double *absSum, double *relDiff) { + + int result = 0; + *absDiff = 0.0, *absSum = 0.0, *relDiff = 0.0; + + *absDiff = fabs(a - b); + *absSum = fabs(a + b); + if (*absSum > 0.f) { + *relDiff = *absDiff / *absSum; + } + + if (*relDiff > threshold) { + result = 1; + } + + return result; +} + +/** + * @brief Compares two particles' properties using the relative difference and a + * threshold. + * + * @param a Particle A + * @param b Particle B + * @param threshold The limit on the relative difference between the two values + * + * @return 1 if difference found, 0 otherwise + */ +int compare_particles(struct part a, struct part b, double threshold) { + +#ifdef GADGET2_SPH + + int result = 0; + double absDiff = 0.0, absSum = 0.0, relDiff = 0.0; + + for (int k = 0; k < 3; k++) { + if (compare_values(a.x[k], b.x[k], threshold, &absDiff, &absSum, + &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for x[%d] of " + "particle %lld.", + relDiff, threshold, k, a.id); + message("a = %e, b = %e", a.x[k], b.x[k]); + result = 1; + } + } + for (int k = 0; k < 3; k++) { + if (compare_values(a.v[k], b.v[k], threshold, &absDiff, &absSum, + &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for v[%d] of " + "particle %lld.", + relDiff, threshold, k, a.id); + message("a = %e, b = %e", a.v[k], b.v[k]); + result = 1; + } + } + for (int k = 0; k < 3; k++) { + if (compare_values(a.a_hydro[k], b.a_hydro[k], threshold, &absDiff, &absSum, + &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for a_hydro[%d] " + "of particle %lld.", + relDiff, threshold, k, a.id); + message("a = %e, b = %e", a.a_hydro[k], b.a_hydro[k]); + result = 1; + } + } + if (compare_values(a.rho, b.rho, threshold, &absDiff, &absSum, &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for rho of " + "particle %lld.", + relDiff, threshold, a.id); + message("a = %e, b = %e", a.rho, b.rho); + result = 1; + } + if (compare_values(a.density.rho_dh, b.density.rho_dh, threshold, &absDiff, + &absSum, &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for rho_dh of " + "particle %lld.", + relDiff, threshold, a.id); + message("a = %e, b = %e", a.density.rho_dh, b.density.rho_dh); + result = 1; + } + if (compare_values(a.density.wcount, b.density.wcount, threshold, &absDiff, + &absSum, &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for wcount of " + "particle %lld.", + relDiff, threshold, a.id); + message("a = %e, b = %e", a.density.wcount, b.density.wcount); + result = 1; + } + if (compare_values(a.density.wcount_dh, b.density.wcount_dh, threshold, + &absDiff, &absSum, &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for wcount_dh of " + "particle %lld.", + relDiff, threshold, a.id); + message("a = %e, b = %e", a.density.wcount_dh, b.density.wcount_dh); + result = 1; + } + if (compare_values(a.force.h_dt, b.force.h_dt, threshold, &absDiff, &absSum, + &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for h_dt of " + "particle %lld.", + relDiff, threshold, a.id); + message("a = %e, b = %e", a.force.h_dt, b.force.h_dt); + result = 1; + } + if (compare_values(a.force.v_sig, b.force.v_sig, threshold, &absDiff, &absSum, + &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for v_sig of " + "particle %lld.", + relDiff, threshold, a.id); + message("a = %e, b = %e", a.force.v_sig, b.force.v_sig); + result = 1; + } + if (compare_values(a.entropy_dt, b.entropy_dt, threshold, &absDiff, &absSum, + &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for entropy_dt of " + "particle %lld.", + relDiff, threshold, a.id); + message("a = %e, b = %e", a.entropy_dt, b.entropy_dt); + result = 1; + } + if (compare_values(a.density.div_v, b.density.div_v, threshold, &absDiff, + &absSum, &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for div_v of " + "particle %lld.", + relDiff, threshold, a.id); + message("a = %e, b = %e", a.density.div_v, b.density.div_v); + result = 1; + } + for (int k = 0; k < 3; k++) { + if (compare_values(a.density.rot_v[k], b.density.rot_v[k], threshold, + &absDiff, &absSum, &relDiff)) { + message( + "Relative difference (%e) larger than tolerance (%e) for rot_v[%d] " + "of particle %lld.", + relDiff, threshold, k, a.id); + message("a = %e, b = %e", a.density.rot_v[k], b.density.rot_v[k]); + result = 1; + } + } + + return result; + +#else + + error("Function not supported for this flavour of SPH"); + return 0; + +#endif +} + +/** @brief Computes the forces between all g-particles using the N^2 algorithm * * Overwrites the accelerations of the gparts with the values. * Do not use for actual runs. diff --git a/src/tools.h b/src/tools.h index 43ddd946c3e8cdf53139bb917135dffd8a8acd12..ece3078dce7cc8ab4b15538a1e5d9a990d81b36d 100644 --- a/src/tools.h +++ b/src/tools.h @@ -47,4 +47,8 @@ void shuffle_particles(struct part *parts, const int count); void gravity_n2(struct gpart *gparts, const int gcount, const struct phys_const *constants, float rlr); +int compare_values(double a, double b, double threshold, double *absDiff, + double *absSum, double *relDiff); +int compare_particles(struct part a, struct part b, double threshold); + #endif /* SWIFT_TOOL_H */ diff --git a/src/vector.h b/src/vector.h index 53869fd2594227d3332d7435f47cdff7cded224b..5e7c978ce6c3df9b1fbc47be2a43ee76c85a352a 100644 --- a/src/vector.h +++ b/src/vector.h @@ -46,19 +46,33 @@ #define VEC_FLOAT __m512 #define VEC_DBL __m512d #define VEC_INT __m512i +#define KNL_MASK_16 __mmask16 #define vec_load(a) _mm512_load_ps(a) +#define vec_store(a, addr) _mm512_store_ps(addr, a) +#define vec_setzero() _mm512_setzero_ps() +#define vec_setintzero() _mm512_setzero_epi32() #define vec_set1(a) _mm512_set1_ps(a) +#define vec_setint1(a) _mm512_set1_epi32(a) #define vec_set(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ _mm512_set_ps(p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a) #define vec_dbl_set(a, b, c, d, e, f, g, h) \ _mm512_set_pd(h, g, f, e, d, c, b, a) +#define vec_add(a, b) _mm512_add_ps(a, b) +#define vec_sub(a, b) _mm512_sub_ps(a, b) +#define vec_mul(a, b) _mm512_mul_ps(a, b) +#define vec_fma(a, b, c) _mm512_fmadd_ps(a, b, c) #define vec_sqrt(a) _mm512_sqrt_ps(a) -#define vec_rcp(a) _mm512_rcp_ps(a) -#define vec_rsqrt(a) _mm512_rsqrt_ps(a) +#define vec_rcp(a) _mm512_rcp14_ps(a) +#define vec_rsqrt(a) _mm512_rsqrt14_ps(a) #define vec_ftoi(a) _mm512_cvttps_epi32(a) #define vec_fmin(a, b) _mm512_min_ps(a, b) #define vec_fmax(a, b) _mm512_max_ps(a, b) #define vec_fabs(a) _mm512_andnot_ps(_mm512_set1_ps(-0.f), a) +#define vec_floor(a) _mm512_floor_ps(a) +#define vec_cmp_gt(a, b) _mm512_cmp_ps_mask(a, b, _CMP_GT_OQ) +#define vec_cmp_lt(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ) +#define vec_cmp_lte(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ) +#define vec_and(a, b) _mm512_and_ps(a, b) #define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 0)) #define vec_todbl_hi(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 1)) #define vec_dbl_tofloat(a, b) _mm512_insertf128(_mm512_castps128_ps512(a), b, 1) @@ -86,15 +100,28 @@ .f[6] = a, .f[7] = a, .f[8] = a, .f[9] = a, .f[10] = a, .f[11] = a, \ .f[12] = a, .f[13] = a, .f[14] = a, .f[15] = a \ } +#define VEC_HADD(a, b) b += _mm512_reduce_add_ps(a.v) +#define VEC_FORM_PACKED_MASK(mask, v_mask, pack) \ + pack += __builtin_popcount(mask); +#define VEC_LEFT_PACK(a, mask, result) \ + _mm512_mask_compressstoreu_ps(result, mask, a) #elif defined(HAVE_AVX) #define VEC_SIZE 8 #define VEC_FLOAT __m256 #define VEC_DBL __m256d #define VEC_INT __m256i #define vec_load(a) _mm256_load_ps(a) +#define vec_store(a, addr) _mm256_store_ps(addr, a) +#define vec_unaligned_store(a, addr) _mm256_storeu_ps(addr, a) +#define vec_setzero() _mm256_setzero_ps() +#define vec_setintzero() _mm256_setzero_si256() #define vec_set1(a) _mm256_set1_ps(a) +#define vec_setint1(a) _mm256_set1_epi32(a) #define vec_set(a, b, c, d, e, f, g, h) _mm256_set_ps(h, g, f, e, d, c, b, a) #define vec_dbl_set(a, b, c, d) _mm256_set_pd(d, c, b, a) +#define vec_add(a, b) _mm256_add_ps(a, b) +#define vec_sub(a, b) _mm256_sub_ps(a, b) +#define vec_mul(a, b) _mm256_mul_ps(a, b) #define vec_sqrt(a) _mm256_sqrt_ps(a) #define vec_rcp(a) _mm256_rcp_ps(a) #define vec_rsqrt(a) _mm256_rsqrt_ps(a) @@ -102,6 +129,12 @@ #define vec_fmin(a, b) _mm256_min_ps(a, b) #define vec_fmax(a, b) _mm256_max_ps(a, b) #define vec_fabs(a) _mm256_andnot_ps(_mm256_set1_ps(-0.f), a) +#define vec_floor(a) _mm256_floor_ps(a) +#define vec_cmp_lt(a, b) _mm256_cmp_ps(a, b, _CMP_LT_OQ) +#define vec_cmp_gt(a, b) _mm256_cmp_ps(a, b, _CMP_GT_OQ) +#define vec_cmp_lte(a, b) _mm256_cmp_ps(a, b, _CMP_LE_OQ) +#define vec_cmp_result(a) _mm256_movemask_ps(a) +#define vec_and(a, b) _mm256_and_ps(a, b) #define vec_todbl_lo(a) _mm256_cvtps_pd(_mm256_extract128_ps(a, 0)) #define vec_todbl_hi(a) _mm256_cvtps_pd(_mm256_extract128_ps(a, 1)) #define vec_dbl_tofloat(a, b) _mm256_insertf128(_mm256_castps128_ps256(a), b, 1) @@ -118,9 +151,63 @@ .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \ .f[6] = a, .f[7] = a \ } +#define VEC_HADD(a, b) \ + a.v = _mm256_hadd_ps(a.v, a.v); \ + a.v = _mm256_hadd_ps(a.v, a.v); \ + b += a.f[0] + a.f[4]; +#define VEC_GET_LOW(a) _mm256_castps256_ps128(a) +#define VEC_GET_HIGH(a) _mm256_extractf128_ps(a, 1) #ifdef HAVE_AVX2 +#define vec_fma(a, b, c) _mm256_fmadd_ps(a, b, c) +#define identity_indices 0x0706050403020100 #define VEC_HAVE_GATHER #define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1) +#define VEC_FORM_PACKED_MASK(mask, v_mask, pack) \ + { \ + unsigned long expanded_mask = _pdep_u64(mask, 0x0101010101010101); \ + expanded_mask *= 0xFF; \ + unsigned long wanted_indices = _pext_u64(identity_indices, expanded_mask); \ + __m128i bytevec = _mm_cvtsi64_si128(wanted_indices); \ + v_mask = _mm256_cvtepu8_epi32(bytevec); \ + pack += __builtin_popcount(mask); \ + } +#define VEC_LEFT_PACK(a, mask, result) \ + vec_unaligned_store(_mm256_permutevar8x32_ps(a, mask), result) +#endif +#ifndef vec_fma +#define vec_fma(a, b, c) vec_add(vec_mul(a, b), c) +#endif +#ifndef VEC_FORM_PACKED_MASK +#define VEC_FORM_PACKED_MASK(mask, v_mask, pack) \ + { \ + for (int i = 0; i < VEC_SIZE; i++) \ + if ((mask & (1 << i))) v_mask.i[pack++] = i; \ + } +#define VEC_FORM_PACKED_MASK_2(mask, v_mask, pack, mask2, v_mask2, pack2) \ + { \ + for (int i = 0; i < VEC_SIZE; i++) { \ + if ((mask & (1 << i))) v_mask.i[pack++] = i; \ + if ((mask2 & (1 << i))) v_mask2.i[pack2++] = i; \ + } \ + } +#endif +#ifndef VEC_LEFT_PACK +#define VEC_LEFT_PACK(a, mask, result) \ + { \ + __m256 t1 = _mm256_castps128_ps256(_mm256_extractf128_ps(a, 1)); \ + __m256 t2 = _mm256_insertf128_ps(t1, _mm256_castps256_ps128(a), 1); \ + __m256 r0 = _mm256_permutevar_ps(a, mask); \ + __m256 r1 = _mm256_permutevar_ps(t2, mask); \ + __m128i k1 = _mm_slli_epi32( \ + (__m128i)(_mm_xor_si128((__m128i)VEC_GET_HIGH((__m256)mask), \ + (__m128i)_mm_set1_epi32(4))), \ + 29); \ + __m128i k0 = _mm_slli_epi32((__m128i)(VEC_GET_LOW((__m256)mask)), 29); \ + __m256 kk = \ + _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(k0)), \ + _mm_castsi128_ps(k1), 1); \ + *((__m256 *)(result)) = _mm256_blendv_ps(r0, r1, kk); \ + } #endif #elif defined(HAVE_SSE2) #define VEC_SIZE 4 @@ -128,9 +215,16 @@ #define VEC_DBL __m128d #define VEC_INT __m128i #define vec_load(a) _mm_load_ps(a) +#define vec_store(a, addr) _mm_store_ps(addr, a) +#define vec_setzero() _mm_setzero_ps() +#define vec_setintzero() _mm_setzero_si256() #define vec_set1(a) _mm_set1_ps(a) +#define vec_setint1(a) _mm_set1_epi32(a) #define vec_set(a, b, c, d) _mm_set_ps(d, c, b, a) #define vec_dbl_set(a, b) _mm_set_pd(b, a) +#define vec_add(a, b) _mm_add_ps(a, b) +#define vec_sub(a, b) _mm_sub_ps(a, b) +#define vec_mul(a, b) _mm_mul_ps(a, b) #define vec_sqrt(a) _mm_sqrt_ps(a) #define vec_rcp(a) _mm_rcp_ps(a) #define vec_rsqrt(a) _mm_rsqrt_ps(a) @@ -138,6 +232,12 @@ #define vec_fmin(a, b) _mm_min_ps(a, b) #define vec_fmax(a, b) _mm_max_ps(a, b) #define vec_fabs(a) _mm_andnot_ps(_mm_set1_ps(-0.f), a) +#define vec_floor(a) _mm_floor_ps(a) +#define vec_cmp_gt(a, b) _mm_cmpgt_ps(a, b) +#define vec_cmp_lt(a, b) _mm_cmplt_ps(a, b) +#define vec_cmp_lte(a, b) _mm_cmp_ps(a, b, _CMP_LE_OQ) +#define vec_cmp_result(a) _mm_movemask_ps(a) +#define vec_and(a, b) _mm_and_ps(a, b) #define vec_todbl_lo(a) _mm_cvtps_pd(a) #define vec_todbl_hi(a) _mm_cvtps_pd(_mm_movehl_ps(a, a)) #define vec_dbl_tofloat(a, b) _mm_movelh_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b)) @@ -151,6 +251,12 @@ #define vec_dbl_fmax(a, b) _mm_max_pd(a, b) #define FILL_VEC(a) \ { .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a } +#define VEC_HADD(a, b) \ + a.v = _mm_hadd_ps(a.v, a.v); \ + b += a.f[0] + a.f[1]; +#ifndef vec_fma +#define vec_fma(a, b, c) vec_add(vec_mul(a, b), c) +#endif #else #define VEC_SIZE 4 #endif @@ -165,6 +271,45 @@ typedef union { int i[VEC_SIZE]; } vector; +/** + * @brief Calculates the inverse ($1/x$) of a vector using intrinsics and a + * Newton iteration to obtain the correct level of accuracy. + * + * @param x #vector to be inverted. + * @return x_inv #vector inverted x. + */ +__attribute__((always_inline)) INLINE vector vec_reciprocal(vector x) { + + vector x_inv; + + x_inv.v = vec_rcp(x.v); + x_inv.v = vec_sub(x_inv.v, + vec_mul(x_inv.v, (vec_fma(x.v, x_inv.v, vec_set1(-1.0f))))); + + return x_inv; +} + +/** + * @brief Calculates the inverse and square root (\f$1/\sqrt{x}\f$) of a vector + * using intrinsics and a Newton iteration to obtain the correct level of + * accuracy. + * + * @param x #vector to be inverted. + * @return x_inv #vector inverted x. + */ +__attribute__((always_inline)) INLINE vector vec_reciprocal_sqrt(vector x) { + + vector x_inv; + + x_inv.v = vec_rsqrt(x.v); + x_inv.v = vec_sub( + x_inv.v, + vec_mul(vec_mul(vec_set1(0.5f), x_inv.v), + (vec_fma(x.v, vec_mul(x_inv.v, x_inv.v), vec_set1(-1.0f))))); + + return x_inv; +} + #else /* Needed for cache alignment. */ #define VEC_SIZE 16 diff --git a/tests/Makefile.am b/tests/Makefile.am index 136b7ad231947574a5459298e7fb85902028a3f4..0db5c2544433012dcd7f451f535391aa81b1f802 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -25,15 +25,15 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh \ testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT \ testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \ - testMatrixInversion testThreadpool + testMatrixInversion testThreadpool testDump testLogger # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testSPHStep testPair test27cells test125cells testParser \ testKernel testKernelGrav testFFT testInteractions testMaths \ - testSymmetry testThreadpool \ + testSymmetry testThreadpool benchmarkInteractions \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ - testRiemannHLLC testMatrixInversion + testRiemannHLLC testMatrixInversion testDump testLogger # Sources for the individual programs testGreetings_SOURCES = testGreetings.c @@ -66,6 +66,8 @@ testFFT_SOURCES = testFFT.c testInteractions_SOURCES = testInteractions.c +benchmarkInteractions_SOURCES = benchmarkInteractions.c + testAdiabaticIndex_SOURCES = testAdiabaticIndex.c testRiemannExact_SOURCES = testRiemannExact.c @@ -78,6 +80,10 @@ testMatrixInversion_SOURCES = testMatrixInversion.c testThreadpool_SOURCES = testThreadpool.c +testDump_SOURCES = testDump.c + +testLogger_SOURCES = testLogger.c + # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ test27cells.sh test27cellsPerturbed.sh testParser.sh \ diff --git a/tests/benchmarkInteractions.c b/tests/benchmarkInteractions.c new file mode 100644 index 0000000000000000000000000000000000000000..6d6d345bee743d28fb4bdda911bd4bcc4c78205f --- /dev/null +++ b/tests/benchmarkInteractions.c @@ -0,0 +1,500 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "swift.h" + +#define array_align sizeof(float) * VEC_SIZE +#define ACC_THRESHOLD 1e-5 + +#ifdef NONSYM_DENSITY +#define IACT runner_iact_nonsym_density +#define IACT_VEC runner_iact_nonsym_2_vec_density +#define IACT_NAME "test_nonsym_density" +#endif + +#ifdef SYM_DENSITY +#define IACT runner_iact_density +#define IACT_VEC runner_iact_vec_density +#define IACT_NAME "test_sym_density" +#endif + +#ifdef NONSYM_FORCE +#define IACT runner_iact_nonsym_force +#define IACT_VEC runner_iact_nonsym_vec_force +#define IACT_NAME "test_nonsym_force" +#endif + +#ifdef SYM_FORCE +#define IACT runner_iact_force +#define IACT_VEC runner_iact_vec_force +#define IACT_NAME "test_sym_force" +#endif + +#ifndef IACT +#define IACT runner_iact_nonsym_density +#define IACT_VEC runner_iact_nonsym_2_vec_density +#define IACT_NAME "test_nonsym_density" +#endif + +/** + * @brief Constructs an array of particles in a valid state prior to + * a IACT_NONSYM and IACT_NONSYM_VEC call. + * + * @param count No. of particles to create + * @param offset The position of the particle offset from (0,0,0). + * @param spacing Particle spacing. + * @param h The smoothing length of the particles in units of the inter-particle + *separation. + * @param partId The running counter of IDs. + */ +struct part *make_particles(size_t count, double *offset, double spacing, + double h, long long *partId) { + + struct part *particles; + if (posix_memalign((void **)&particles, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(particles, count * sizeof(struct part)); + + /* Construct the particles */ + struct part *p; + + /* Set test particle at centre of unit sphere. */ + p = &particles[0]; + + /* Place the test particle at the centre of a unit sphere. */ + p->x[0] = 0.0f; + p->x[1] = 0.0f; + p->x[2] = 0.0f; + + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + + /* Place rest of particles around the test particle + * with random position within a unit sphere. */ + for (size_t i = 1; i < count; ++i) { + p = &particles[i]; + + /* Randomise positions within a unit sphere. */ + p->x[0] = random_uniform(-1.0, 1.0); + p->x[1] = random_uniform(-1.0, 1.0); + p->x[2] = random_uniform(-1.0, 1.0); + + /* Randomise velocities. */ + p->v[0] = random_uniform(-0.05, 0.05); + p->v[1] = random_uniform(-0.05, 0.05); + p->v[2] = random_uniform(-0.05, 0.05); + + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + } + return particles; +} + +/** + * @brief Populates particle properties needed for the force calculation. + */ +void prepare_force(struct part *parts, size_t count) { + + struct part *p; + for (size_t i = 0; i < count; ++i) { + p = &parts[i]; + p->rho = i + 1; + p->force.balsara = random_uniform(0.0, 1.0); + p->force.P_over_rho2 = i + 1; + p->force.soundspeed = random_uniform(2.0, 3.0); + p->force.v_sig = 0.0f; + p->force.h_dt = 0.0f; + } +} + +/** + * @brief Dumps all particle information to a file + */ +void dump_indv_particle_fields(char *fileName, struct part *p) { + + FILE *file = fopen(fileName, "a"); + + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e " + "%13e %13e %13e %13e " + "%13e %13e %13e\n", + p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], + p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho, + p->density.rho_dh, p->density.wcount, p->density.wcount_dh, + p->force.h_dt, p->force.v_sig, +#if defined(MINIMAL_SPH) + 0., 0., 0., 0. +#else + p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], + p->density.rot_v[2] +#endif + ); + fclose(file); +} + +/** + * @brief Creates a header for the output file + */ +void write_header(char *fileName) { + + FILE *file = fopen(fileName, "w"); + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s " + "%13s %13s %13s %13s" + "%13s %13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y", + "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig", + "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt"); + fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n"); + fclose(file); +} + +/** + * @brief Compares the vectorised result against + * the serial result of the interaction. + * + * @param serial_test_part Particle that has been updated serially + * @param serial_parts Particle array that has been interacted serially + * @param vec_test_part Particle that has been updated using vectors + * @param vec_parts Particle array to be interacted using vectors + * @param count No. of particles that have been interacted + * + * @return Non-zero value if difference found, 0 otherwise + */ +int check_results(struct part serial_test_part, struct part *serial_parts, + struct part vec_test_part, struct part *vec_parts, + int count) { + int result = 0; + result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD); + + for (int i = 0; i < count; i++) + result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD); + + return result; +} + +/* + * @brief Calls the serial and vectorised version of the non-symmetrical density + * interaction. + * + * @param test_part Particle that will be updated + * @param parts Particle array to be interacted + * @param count No. of particles to be interacted + * @param serial_inter_func Serial interaction function to be called + * @param vec_inter_func Vectorised interaction function to be called + * @param runs No. of times to call interactions + * + */ +void test_interactions(struct part test_part, struct part *parts, size_t count, + char *filePrefix, int runs) { + + ticks serial_time = 0; +#ifdef WITH_VECTORIZATION + ticks vec_time = 0; +#endif + + FILE *file; + char serial_filename[200] = ""; + char vec_filename[200] = ""; + + strcpy(serial_filename, filePrefix); + strcpy(vec_filename, filePrefix); + sprintf(serial_filename + strlen(serial_filename), "_serial.dat"); + sprintf(vec_filename + strlen(vec_filename), "_vec.dat"); + + write_header(serial_filename); + write_header(vec_filename); + + struct part pi_serial, pi_vec; + struct part pj_serial[count], pj_vec[count]; + + float r2[count] __attribute__((aligned(array_align))); + float dx[3 * count] __attribute__((aligned(array_align))); + +#ifdef WITH_VECTORIZATION + struct part *piq[count], *pjq[count]; + for (size_t k = 0; k < count; k++) { + piq[k] = NULL; + pjq[k] = NULL; + } + + float r2q[count] __attribute__((aligned(array_align))); + float hiq[count] __attribute__((aligned(array_align))); + float dxq[count] __attribute__((aligned(array_align))); + + float dyq[count] __attribute__((aligned(array_align))); + float dzq[count] __attribute__((aligned(array_align))); + float mjq[count] __attribute__((aligned(array_align))); + float vixq[count] __attribute__((aligned(array_align))); + float viyq[count] __attribute__((aligned(array_align))); + float vizq[count] __attribute__((aligned(array_align))); + float vjxq[count] __attribute__((aligned(array_align))); + float vjyq[count] __attribute__((aligned(array_align))); + float vjzq[count] __attribute__((aligned(array_align))); +#endif + + /* Call serial interaction a set number of times. */ + for (int k = 0; k < runs; k++) { + /* Reset particle to initial setup */ + pi_serial = test_part; + for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i]; + + /* Only dump data on first run. */ + if (k == 0) { + /* Dump state of particles before serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi_serial); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(serial_filename, &pj_serial[i]); + } + + /* Perform serial interaction */ + for (size_t i = 0; i < count; i++) { + /* Compute the pairwise distance. */ + r2[i] = 0.0f; + for (int k = 0; k < 3; k++) { + int ind = (3 * i) + k; + dx[ind] = pi_serial.x[k] - pj_serial[i].x[k]; + r2[i] += dx[ind] * dx[ind]; + } + } + + const ticks tic = getticks(); +/* Perform serial interaction */ +#ifdef __ICC +#pragma novector +#endif + for (size_t i = 0; i < count; i++) { + IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial, + &pj_serial[i]); + } + serial_time += getticks() - tic; + } + + file = fopen(serial_filename, "a"); + fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi_serial); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(serial_filename, &pj_serial[i]); + + /* Call vector interaction a set number of times. */ + for (int k = 0; k < runs; k++) { + /* Reset particle to initial setup */ + pi_vec = test_part; + for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i]; + + /* Setup arrays for vector interaction. */ + for (size_t i = 0; i < count; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pi_vec.x[k] - pj_vec[i].x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef WITH_VECTORIZATION + r2q[i] = r2; + dxq[i] = dx[0]; + hiq[i] = pi_vec.h; + piq[i] = &pi_vec; + pjq[i] = &pj_vec[i]; + + dyq[i] = dx[1]; + dzq[i] = dx[2]; + mjq[i] = pj_vec[i].mass; + vixq[i] = pi_vec.v[0]; + viyq[i] = pi_vec.v[1]; + vizq[i] = pi_vec.v[2]; + vjxq[i] = pj_vec[i].v[0]; + vjyq[i] = pj_vec[i].v[1]; + vjzq[i] = pj_vec[i].v[2]; +#endif + } + + /* Only dump data on first run. */ + if (k == 0) { +#ifdef WITH_VECTORIZATION + /* Dump state of particles before vector interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); +#endif + } + +/* Perform vector interaction. */ +#ifdef WITH_VECTORIZATION + vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, mask, mask2; + vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, + curlvySum, curlvzSum; + + rhoSum.v = vec_set1(0.f); + rho_dhSum.v = vec_set1(0.f); + wcountSum.v = vec_set1(0.f); + wcount_dhSum.v = vec_set1(0.f); + div_vSum.v = vec_set1(0.f); + curlvxSum.v = vec_set1(0.f); + curlvySum.v = vec_set1(0.f); + curlvzSum.v = vec_set1(0.f); + + hi_vec.v = vec_load(&hiq[0]); + vix_vec.v = vec_load(&vixq[0]); + viy_vec.v = vec_load(&viyq[0]); + viz_vec.v = vec_load(&vizq[0]); + + hi_inv_vec = vec_reciprocal(hi_vec); + mask.m = vec_setint1(0xFFFFFFFF); + mask2.m = vec_setint1(0xFFFFFFFF); + +#ifdef HAVE_AVX512_F + KNL_MASK_16 knl_mask, knl_mask2; + knl_mask = 0xFFFF; + knl_mask2 = 0xFFFF; +#endif + + const ticks vec_tic = getticks(); + + for (size_t i = 0; i < count; i += 2 * VEC_SIZE) { + + IACT_VEC(&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec), + (vix_vec), (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]), + &(vjzq[i]), &(mjq[i]), &rhoSum, &rho_dhSum, &wcountSum, + &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, + mask, mask2, +#ifdef HAVE_AVX512_F + knl_mask, knl_mask2); +#else + 0, 0); +#endif + } + + VEC_HADD(rhoSum, piq[0]->rho); + VEC_HADD(rho_dhSum, piq[0]->density.rho_dh); + VEC_HADD(wcountSum, piq[0]->density.wcount); + VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh); + VEC_HADD(div_vSum, piq[0]->density.div_v); + VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]); + VEC_HADD(curlvySum, piq[0]->density.rot_v[1]); + VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]); + + vec_time += getticks() - vec_tic; +#endif + } + + file = fopen(vec_filename, "a"); + fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n"); + fclose(file); + +#ifdef WITH_VECTORIZATION + /* Dump result of serial interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); +#endif + +#ifdef WITH_VECTORIZATION + /* Check serial results against the vectorised results. */ + if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count)) + message("Differences found..."); +#endif + + message("The serial interactions took : %15lli ticks.", + serial_time / runs); +#ifdef WITH_VECTORIZATION + message("The vectorised interactions took : %15lli ticks.", vec_time / runs); + message("Speed up: %15fx.", (double)(serial_time) / vec_time); +#endif +} + +/* And go... */ +int main(int argc, char *argv[]) { + size_t runs = 10000; + double h = 1.0, spacing = 0.5; + double offset[3] = {0.0, 0.0, 0.0}; + size_t count = 256; + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 's': + sscanf(optarg, "%lf", &spacing); + case 'n': + sscanf(optarg, "%zu", &count); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || spacing < 0) { + printf( + "\nUsage: %s [OPTIONS...]\n" + "\nGenerates a particle array with equal particle separation." + "\nThese are then interacted using runner_iact_density and " + "runner_iact_vec_density." + "\n\nOptions:" + "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-s SPACING=0.5 - Spacing between particles" + "\n-n NUMBER=9 - No. of particles", + argv[0]); + exit(1); + } + + /* Correct count so that VEC_SIZE of particles interact with the test + * particle. */ + count = count - (count % VEC_SIZE) + 1; + + /* Build the infrastructure */ + static long long partId = 0; + struct part test_particle; + struct part *particles = make_particles(count, offset, spacing, h, &partId); + +#if defined(NONSYM_FORCE) || defined(SYM_FORCE) + prepare_force(particles, count); +#endif + + test_particle = particles[0]; + /* Call the non-sym density test. */ + message("Testing %s interaction...", IACT_NAME); + test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs); + + return 0; +} diff --git a/tests/test125cells.c b/tests/test125cells.c index 3ae80d952f78e8f50235cf38af493501c1c97634..91b1cf6dc3b321643aae1f4eec6bd3d7abb48350 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -272,8 +272,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, hydro_first_init_part(part, xpart); part->id = ++(*partId); - part->ti_begin = 0; - part->ti_end = 1; + part->time_bin = 1; #if defined(GIZMO_SPH) part->geometry.volume = part->conserved.mass / density; @@ -292,6 +291,11 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, part->conserved.mass; #endif +#ifdef SWIFT_DEBUG_CHECKS + part->ti_drift = 8; + part->ti_kick = 8; +#endif + ++part; ++xpart; } @@ -311,9 +315,9 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_old = 1; - cell->ti_end_min = 1; - cell->ti_end_max = 1; + cell->ti_old = 8; + cell->ti_end_min = 8; + cell->ti_end_max = 8; // shuffle_particles(cell->parts, cell->count); @@ -364,10 +368,10 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, #else main_cell->parts[pid].density.div_v, #endif - hydro_get_entropy(&main_cell->parts[pid], 0.f), - hydro_get_internal_energy(&main_cell->parts[pid], 0.f), - hydro_get_pressure(&main_cell->parts[pid], 0.f), - hydro_get_soundspeed(&main_cell->parts[pid], 0.f), + hydro_get_entropy(&main_cell->parts[pid]), + hydro_get_internal_energy(&main_cell->parts[pid]), + hydro_get_pressure(&main_cell->parts[pid]), + hydro_get_soundspeed(&main_cell->parts[pid]), main_cell->parts[pid].a_hydro[0], main_cell->parts[pid].a_hydro[1], main_cell->parts[pid].a_hydro[2], main_cell->parts[pid].force.h_dt, #if defined(GADGET2_SPH) @@ -527,7 +531,7 @@ int main(int argc, char *argv[]) { engine.physical_constants = &prog_const; engine.s = &space; engine.time = 0.1f; - engine.ti_current = 1; + engine.ti_current = 8; struct runner runner; runner.e = &engine; @@ -572,6 +576,12 @@ int main(int argc, char *argv[]) { const ticks tic = getticks(); + /* Start with a gentle kick */ + // runner_do_kick1(&runner, main_cell, 0); + + /* And a gentle drift */ + // runner_do_drift(&runner, main_cell, 0); + /* First, sort stuff */ for (int j = 0; j < 125; ++j) runner_do_sort(&runner, cells[j], 0x1FFF, 0); @@ -640,7 +650,8 @@ int main(int argc, char *argv[]) { #endif /* Finally, give a gentle kick */ - runner_do_kick(&runner, main_cell, 0); + runner_do_end_force(&runner, main_cell, 0); + // runner_do_kick2(&runner, main_cell, 0); const ticks toc = getticks(); time += toc - tic; @@ -663,6 +674,12 @@ int main(int argc, char *argv[]) { const ticks tic = getticks(); + /* Kick the central cell */ + // runner_do_kick1(&runner, main_cell, 0); + + /* And drift it */ + runner_do_drift(&runner, main_cell, 0); + /* Initialise the particles */ for (int j = 0; j < 125; ++j) runner_do_init(&runner, cells[j], 0); @@ -728,7 +745,8 @@ int main(int argc, char *argv[]) { #endif /* Finally, give a gentle kick */ - runner_do_kick(&runner, main_cell, 0); + runner_do_end_force(&runner, main_cell, 0); + // runner_do_kick2(&runner, main_cell, 0); const ticks toc = getticks(); diff --git a/tests/test27cells.c b/tests/test27cells.c index f58b4dc410637f3d91369dab1b442de0b7044c08..929a148d1f5730b63de79e9a1ab7e25f1ca7311e 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -30,6 +30,18 @@ /* Local headers. */ #include "swift.h" +#define ACC_THRESHOLD 1e-5 + +#if defined(WITH_VECTORIZATION) +#define DOSELF1 runner_doself1_density_vec +#define DOSELF1_NAME "runner_doself1_density_vec" +#endif + +#ifndef DOSELF1 +#define DOSELF1 runner_doself1_density +#define DOSELF1_NAME "runner_doself1_density" +#endif + enum velocity_types { velocity_zero, velocity_random, @@ -116,8 +128,13 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, part->entropy_one_over_gamma = 1.f; #endif - part->ti_begin = 0; - part->ti_end = 1; + part->time_bin = 1; + +#ifdef SWIFT_DEBUG_CHECKS + part->ti_drift = 8; + part->ti_kick = 8; +#endif + ++part; } } @@ -135,8 +152,9 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_end_min = 1; - cell->ti_end_max = 1; + cell->ti_old = 8; + cell->ti_end_min = 8; + cell->ti_end_max = 8; shuffle_particles(cell->parts, cell->count); @@ -254,15 +272,40 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, fclose(file); } +/** + * @brief Compares the vectorised result against + * the serial result of the interaction. + * + * @param serial_parts Particle array that has been interacted serially + * @param vec_parts Particle array to be interacted using vectors + * @param count No. of particles that have been interacted + * @param threshold Level of accuracy needed + * + * @return Non-zero value if difference found, 0 otherwise + */ +int check_results(struct part *serial_parts, struct part *vec_parts, int count, + double threshold) { + int result = 0; + + for (int i = 0; i < count; i++) + result += compare_particles(serial_parts[i], vec_parts[i], threshold); + + return result; +} + /* Just a forward declaration... */ void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); void runner_doself1_density(struct runner *r, struct cell *ci); +void runner_doself1_density_vec(struct runner *r, struct cell *ci); /* And go... */ int main(int argc, char *argv[]) { + + engine_pin(); size_t runs = 0, particles = 0; double h = 1.23485, size = 1., rho = 1.; double perturbation = 0.; + double threshold = ACC_THRESHOLD; char outputFileNameExtension[200] = ""; char outputFileName[200] = ""; enum velocity_types vel = velocity_zero; @@ -278,7 +321,7 @@ int main(int argc, char *argv[]) { srand(0); char c; - while ((c = getopt(argc, argv, "m:s:h:n:r:t:d:f:v:")) != -1) { + while ((c = getopt(argc, argv, "m:s:h:n:r:t:d:f:v:a:")) != -1) { switch (c) { case 'h': sscanf(optarg, "%lf", &h); @@ -304,6 +347,9 @@ int main(int argc, char *argv[]) { case 'v': sscanf(optarg, "%d", (int *)&vel); break; + case 'a': + sscanf(optarg, "%lf", &threshold); + break; case '?': error("Unknown option."); break; @@ -329,6 +375,8 @@ int main(int argc, char *argv[]) { } /* Help users... */ + message("Function called: %s", DOSELF1_NAME); + message("Vector size: %d", VEC_SIZE); message("Adiabatic index: ga = %f", hydro_gamma); message("Hydro implementation: %s", SPH_IMPLEMENTATION); message("Smoothing length: h = %f", h * size); @@ -347,7 +395,7 @@ int main(int argc, char *argv[]) { struct engine engine; engine.s = &space; engine.time = 0.1f; - engine.ti_current = 1; + engine.ti_current = 8; struct runner runner; runner.e = &engine; @@ -371,6 +419,9 @@ int main(int argc, char *argv[]) { /* Store the main cell for future use */ main_cell = cells[13]; + ticks timings[27]; + for (int i = 0; i < 27; i++) timings[i] = 0; + ticks time = 0; for (size_t i = 0; i < runs; ++i) { /* Zero the fields */ @@ -381,12 +432,30 @@ int main(int argc, char *argv[]) { #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) /* Run all the pairs */ - for (int j = 0; j < 27; ++j) - if (cells[j] != main_cell) + for (int j = 0; j < 27; ++j) { + if (cells[j] != main_cell) { + const ticks sub_tic = getticks(); + runner_dopair1_density(&runner, main_cell, cells[j]); - /* And now the self-interaction */ - runner_doself1_density(&runner, main_cell); + const ticks sub_toc = getticks(); + timings[j] += sub_toc - sub_tic; + } + } + +/* And now the self-interaction */ +#ifdef WITH_VECTORIZATION + runner.par_cache.count = 0; + cache_init(&runner.par_cache, 512); +#endif + + const ticks self_tic = getticks(); + + DOSELF1(&runner, main_cell); + + const ticks self_toc = getticks(); + + timings[13] += self_toc - self_tic; #endif @@ -404,8 +473,26 @@ int main(int argc, char *argv[]) { } } + /* Store the vectorised particle results. */ + struct part vec_parts[main_cell->count]; + for (int i = 0; i < main_cell->count; i++) vec_parts[i] = main_cell->parts[i]; + /* Output timing */ - message("SWIFT calculation took : %15lli ticks.", time / runs); + ticks corner_time = timings[0] + timings[2] + timings[6] + timings[8] + + timings[18] + timings[20] + timings[24] + timings[26]; + + ticks edge_time = timings[1] + timings[3] + timings[5] + timings[7] + + timings[9] + timings[11] + timings[15] + timings[17] + + timings[19] + timings[21] + timings[23] + timings[25]; + + ticks face_time = timings[4] + timings[10] + timings[12] + timings[14] + + timings[16] + timings[22]; + + message("Corner calculations took : %15lli ticks.", corner_time / runs); + message("Edge calculations took : %15lli ticks.", edge_time / runs); + message("Face calculations took : %15lli ticks.", face_time / runs); + message("Self calculations took : %15lli ticks.", timings[13] / runs); + message("SWIFT calculation took : %15lli ticks.", time / runs); /* Now perform a brute-force version for accuracy tests */ @@ -434,6 +521,10 @@ int main(int argc, char *argv[]) { sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension); dump_particle_fields(outputFileName, main_cell, cells); + /* Check serial results against the vectorised results. */ + if (check_results(main_cell->parts, vec_parts, main_cell->count, threshold)) + message("Differences found..."); + /* Output timing */ message("Brute force calculation took : %15lli ticks.", toc - tic); diff --git a/tests/test27cells.sh.in b/tests/test27cells.sh.in index bf9cfeaf9a70790a321fa7ec4c63983d8cfd866c..07b6b92a82cee2bbe9c593f8f62e750d4406f84e 100755 --- a/tests/test27cells.sh.in +++ b/tests/test27cells.sh.in @@ -6,7 +6,7 @@ do rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat - ./test27cells -n 6 -r 1 -d 0 -f standard -v $v + ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4 if [ -e brute_force_27_standard.dat ] then diff --git a/tests/test27cellsPerturbed.sh.in b/tests/test27cellsPerturbed.sh.in index 3cdaf79ab17e705ec69a0b646949cc5a71109796..30498594b659101216b51dfea2346fa9230dbc97 100755 --- a/tests/test27cellsPerturbed.sh.in +++ b/tests/test27cellsPerturbed.sh.in @@ -6,7 +6,7 @@ do rm -f brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat - ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v + ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4 if [ -e brute_force_27_perturbed.dat ] then diff --git a/tests/testDump.c b/tests/testDump.c new file mode 100644 index 0000000000000000000000000000000000000000..ab74a1b1f022761efedf5258a20c525fcef47bd6 --- /dev/null +++ b/tests/testDump.c @@ -0,0 +1,84 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +/* This object's header. */ +#include "../src/dump.h" + +/* Local headers. */ +#include "../src/threadpool.h" + +void dump_mapper(void *map_data, int num_elements, void *extra_data) { + struct dump *d = (struct dump *)extra_data; + size_t offset; + char *out_string = dump_get(d, 7, &offset); + char out_buff[8]; + snprintf(out_buff, 8, "%06zi\n", offset / 7); + memcpy(out_string, out_buff, 7); +} + +int main(int argc, char *argv[]) { + + /* Some constants. */ + const int num_threads = 4; + const char *filename = "/tmp/dump_test.out"; + const int num_runs = 20; + const int chunk_size = 1000; + + /* Prepare a threadpool to write to the dump. */ + struct threadpool t; + threadpool_init(&t, num_threads); + + /* Prepare a dump. */ + struct dump d; + dump_init(&d, filename, 1024); + + /* Dump numbers in chunks. */ + for (int run = 0; run < num_runs; run++) { + + /* Ensure capacity. */ + dump_ensure(&d, 7 * chunk_size); + + /* Dump a few numbers. */ + printf("dumping %i chunks...\n", chunk_size); + fflush(stdout); + threadpool_map(&t, dump_mapper, NULL, chunk_size, 0, 1, &d); + } + + /* Sync the file, not necessary before dump_close, but just to test this. */ + dump_sync(&d); + + /* Finalize the dump. */ + dump_close(&d); + + /* Return a happy number. */ + return 0; +} diff --git a/tests/testInteractions.c b/tests/testInteractions.c index d14c840ec77819bbef5750b897c72139f4d7b2b4..4ce7fe40554d24551750629fa47c0bee7acdb6da 100644 --- a/tests/testInteractions.c +++ b/tests/testInteractions.c @@ -30,6 +30,9 @@ int main() { return 0; } #include <unistd.h> #include "swift.h" +#define array_align sizeof(float) * VEC_SIZE +#define ACC_THRESHOLD 1e-5 + /* Typdef function pointers for serial and vectorised versions of the * interaction functions. */ typedef void (*serial_interaction)(float, float *, float, float, struct part *, @@ -48,8 +51,8 @@ typedef void (*vec_interaction)(float *, float *, float *, float *, *separation. * @param partId The running counter of IDs. */ -struct part *make_particles(int count, double *offset, double spacing, double h, - long long *partId) { +struct part *make_particles(size_t count, double *offset, double spacing, + double h, long long *partId) { struct part *particles; if (posix_memalign((void **)&particles, part_align, @@ -60,11 +63,28 @@ struct part *make_particles(int count, double *offset, double spacing, double h, /* Construct the particles */ struct part *p; - for (size_t i = 0; i < VEC_SIZE + 1; ++i) { + + /* Set test particle at centre of unit sphere. */ + p = &particles[0]; + + /* Place the test particle at the centre of a unit sphere. */ + p->x[0] = 0.0f; + p->x[1] = 0.0f; + p->x[2] = 0.0f; + + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + + /* Place rest of particles around the test particle + * with random position within a unit sphere. */ + for (size_t i = 1; i < count; ++i) { p = &particles[i]; - p->x[0] = offset[0] + spacing * i; - p->x[1] = offset[1] + spacing * i; - p->x[2] = offset[2] + spacing * i; + + /* Randomise positions within a unit sphere. */ + p->x[0] = random_uniform(-1.0, 1.0); + p->x[1] = random_uniform(-1.0, 1.0); + p->x[2] = random_uniform(-1.0, 1.0); /* Randomise velocities. */ p->v[0] = random_uniform(-0.05, 0.05); @@ -81,20 +101,17 @@ struct part *make_particles(int count, double *offset, double spacing, double h, /** * @brief Populates particle properties needed for the force calculation. */ -void prepare_force(struct part *parts) { +void prepare_force(struct part *parts, size_t count) { struct part *p; - for (size_t i = 0; i < VEC_SIZE + 1; ++i) { + for (size_t i = 0; i < count; ++i) { p = &parts[i]; p->rho = i + 1; -#if defined(GADGET2_SPH) - p->force.balsara = i + 1; - p->force.P_over_rho2 = i + 1; -#elif defined(DEFAULT_SPH) - p->force.balsara = i + 1; + p->force.balsara = random_uniform(0.0, 1.0); p->force.P_over_rho2 = i + 1; -#else -#endif + p->force.soundspeed = random_uniform(2.0, 3.0); + p->force.v_sig = 0.0f; + p->force.h_dt = 0.0f; } } @@ -106,7 +123,7 @@ void dump_indv_particle_fields(char *fileName, struct part *p) { FILE *file = fopen(fileName, "a"); fprintf(file, - "%6llu %10f %10f %10f %10f %10f %10f %10f %10f %10f %13e %13e %13e " + "%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e " "%13e %13e %13e %13e " "%13e %13e %13e %10f\n", p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], @@ -120,7 +137,8 @@ void dump_indv_particle_fields(char *fileName, struct part *p) { p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], p->density.rot_v[2], 0. #else - 0., 0., 0., 0., 0. + p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], + p->density.rot_v[2] #endif ); fclose(file); @@ -140,24 +158,52 @@ void write_header(char *fileName) { "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y", "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig", "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt"); - fprintf(file, "\nPARTICLES BEFORE INTERACTION:\n"); + fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n"); fclose(file); } /** - * @brief Calls the serial and vectorised version of the non-symmetrical density - * interaction. + * @brief Compares the vectorised result against + * the serial result of the interaction. + * + * @param serial_test_part Particle that has been updated serially + * @param serial_parts Particle array that has been interacted serially + * @param vec_test_part Particle that has been updated using vectors + * @param vec_parts Particle array to be interacted using vectors + * @param count No. of particles that have been interacted + * + * @return Non-zero value if difference found, 0 otherwise + */ +int check_results(struct part serial_test_part, struct part *serial_parts, + struct part vec_test_part, struct part *vec_parts, + int count) { + int result = 0; + result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD); + + for (int i = 0; i < count; i++) + result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD); + + return result; +} + +/* + * @brief Calls the serial and vectorised version of an interaction + * function given by the function pointers. * + * @param test_part Particle that will be updated * @param parts Particle array to be interacted * @param count No. of particles to be interacted + * @param serial_inter_func Serial interaction function to be called + * @param vec_inter_func Vectorised interaction function to be called + * @param runs No. of times to call interactions * */ -void test_interactions(struct part *parts, int count, +void test_interactions(struct part test_part, struct part *parts, size_t count, serial_interaction serial_inter_func, - vec_interaction vec_inter_func, char *filePrefix) { + vec_interaction vec_inter_func, char *filePrefix, + size_t runs) { - /* Use the first particle in the array as the one that gets updated. */ - struct part pi = parts[0]; + ticks serial_time = 0, vec_time = 0; FILE *file; char serial_filename[200] = ""; @@ -171,98 +217,148 @@ void test_interactions(struct part *parts, int count, write_header(serial_filename); write_header(vec_filename); - /* Dump state of particles before serial interaction. */ - dump_indv_particle_fields(serial_filename, &pi); - for (int i = 1; i < count; i++) - dump_indv_particle_fields(serial_filename, &parts[i]); - - /* Make copy of pi to be used in vectorised version. */ - struct part pi_vec = pi; - struct part pj_vec[VEC_SIZE]; - for (int i = 0; i < VEC_SIZE; i++) pj_vec[i] = parts[i + 1]; - - float r2q[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); - float hiq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); - float hjq[VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(sizeof(float) * VEC_SIZE))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; - - /* Perform serial interaction */ - for (int i = 1; i < count; i++) { - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pi.x[k] - parts[i].x[k]; - r2 += dx[k] * dx[k]; + /* Test particle at the center of a unit sphere. */ + struct part pi_serial, pi_vec; + + /* Remaining particles in the sphere that will interact with test particle. */ + struct part pj_serial[count], pj_vec[count]; + + /* Stores the separation, smoothing length and pointers to particles + * needed for the vectorised interaction. */ + float r2q[count] __attribute__((aligned(array_align))); + float hiq[count] __attribute__((aligned(array_align))); + float hjq[count] __attribute__((aligned(array_align))); + float dxq[3 * count] __attribute__((aligned(array_align))); + struct part *piq[count], *pjq[count]; + + /* Call serial interaction a set number of times. */ + for (size_t k = 0; k < runs; k++) { + /* Reset particle to initial setup */ + pi_serial = test_part; + for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i]; + + /* Only dump data on first run. */ + if (k == 0) { + /* Dump state of particles before serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi_serial); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(serial_filename, &pj_serial[i]); } - serial_inter_func(r2, dx, pi.h, parts[i].h, &pi, &parts[i]); + /* Perform serial interaction */ + for (size_t i = 0; i < count; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (size_t k = 0; k < 3; k++) { + dx[k] = pi_serial.x[k] - pj_serial[i].x[k]; + r2 += dx[k] * dx[k]; + } + + const ticks tic = getticks(); + + serial_inter_func(r2, dx, pi_serial.h, pj_serial[i].h, &pi_serial, + &pj_serial[i]); + + serial_time += getticks() - tic; + } } file = fopen(serial_filename, "a"); - fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); + fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n"); fclose(file); /* Dump result of serial interaction. */ - dump_indv_particle_fields(serial_filename, &pi); - for (int i = 1; i < count; i++) - dump_indv_particle_fields(serial_filename, &parts[i]); - - /* Setup arrays for vector interaction. */ - for (int i = 0; i < VEC_SIZE; i++) { - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pi_vec.x[k] - pj_vec[i].x[k]; - r2 += dx[k] * dx[k]; + dump_indv_particle_fields(serial_filename, &pi_serial); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(serial_filename, &pj_serial[i]); + + /* Call vector interaction a set number of times. */ + for (size_t k = 0; k < runs; k++) { + /* Reset particle to initial setup */ + pi_vec = test_part; + for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i]; + + /* Setup arrays for vector interaction. */ + for (size_t i = 0; i < count; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (size_t k = 0; k < 3; k++) { + dx[k] = pi_vec.x[k] - pj_vec[i].x[k]; + r2 += dx[k] * dx[k]; + } + + r2q[i] = r2; + dxq[3 * i + 0] = dx[0]; + dxq[3 * i + 1] = dx[1]; + dxq[3 * i + 2] = dx[2]; + hiq[i] = pi_vec.h; + hjq[i] = pj_vec[i].h; + piq[i] = &pi_vec; + pjq[i] = &pj_vec[i]; } - r2q[i] = r2; - dxq[3 * i + 0] = dx[0]; - dxq[3 * i + 1] = dx[1]; - dxq[3 * i + 2] = dx[2]; - hiq[i] = pi_vec.h; - hjq[i] = pj_vec[i].h; - piq[i] = &pi_vec; - pjq[i] = &pj_vec[i]; - } - /* Dump state of particles before vector interaction. */ - dump_indv_particle_fields(vec_filename, piq[0]); - for (size_t i = 0; i < VEC_SIZE; i++) - dump_indv_particle_fields(vec_filename, pjq[i]); + /* Only dump data on first run. */ + if (k == 0) { + /* Dump state of particles before vector interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); + } - /* Perform vector interaction. */ - vec_inter_func(r2q, dxq, hiq, hjq, piq, pjq); + const ticks vec_tic = getticks(); + + /* Perform vector interaction. */ + for (size_t i = 0; i < count; i += VEC_SIZE) { + vec_inter_func(&(r2q[i]), &(dxq[3 * i]), &(hiq[i]), &(hjq[i]), &(piq[i]), + &(pjq[i])); + } + + vec_time += getticks() - vec_tic; + } file = fopen(vec_filename, "a"); - fprintf(file, "\nPARTICLES AFTER INTERACTION:\n"); + fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n"); fclose(file); - /* Dump result of serial interaction. */ + /* Dump result of vector interaction. */ dump_indv_particle_fields(vec_filename, piq[0]); - for (size_t i = 0; i < VEC_SIZE; i++) + for (size_t i = 0; i < count; i++) dump_indv_particle_fields(vec_filename, pjq[i]); + + /* Check serial results against the vectorised results. */ + if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count)) + message("Differences found..."); + + message("The serial interactions took : %15lli ticks.", + serial_time / runs); + message("The vectorised interactions took : %15lli ticks.", vec_time / runs); } /* And go... */ int main(int argc, char *argv[]) { - double h = 1.2348, spacing = 0.5; + size_t runs = 10000; + double h = 1.0, spacing = 0.5; double offset[3] = {0.0, 0.0, 0.0}; - int count = VEC_SIZE + 1; + size_t count = 256; /* Get some randomness going */ srand(0); char c; - while ((c = getopt(argc, argv, "s:h:")) != -1) { + while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) { switch (c) { case 'h': sscanf(optarg, "%lf", &h); break; case 's': sscanf(optarg, "%lf", &spacing); + case 'n': + sscanf(optarg, "%zu", &count); + break; + case 'r': + sscanf(optarg, "%zu", &runs); break; case '?': error("Unknown option."); @@ -278,26 +374,35 @@ int main(int argc, char *argv[]) { "runner_iact_vec_density." "\n\nOptions:" "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" - "\n-s spacing - Spacing between particles", + "\n-s SPACING=0.5 - Spacing between particles" + "\n-n NUMBER=9 - No. of particles", argv[0]); exit(1); } + /* Correct count so that VEC_SIZE of particles interact with the test + * particle. */ + count = count - (count % VEC_SIZE) + 1; + /* Build the infrastructure */ static long long partId = 0; + struct part density_test_particle, force_test_particle; struct part *density_particles = make_particles(count, offset, spacing, h, &partId); struct part *force_particles = make_particles(count, offset, spacing, h, &partId); - prepare_force(force_particles); + prepare_force(force_particles, count); /* Define which interactions to call */ serial_interaction serial_inter_func = &runner_iact_nonsym_density; vec_interaction vec_inter_func = &runner_iact_nonsym_vec_density; + density_test_particle = density_particles[0]; /* Call the non-sym density test. */ - test_interactions(density_particles, count, serial_inter_func, vec_inter_func, - "test_nonsym_density"); + message("Testing non-symmetrical density interaction..."); + test_interactions(density_test_particle, &density_particles[1], count - 1, + serial_inter_func, vec_inter_func, "test_nonsym_density", + runs); density_particles = make_particles(count, offset, spacing, h, &partId); @@ -305,28 +410,36 @@ int main(int argc, char *argv[]) { serial_inter_func = &runner_iact_density; vec_inter_func = &runner_iact_vec_density; + density_test_particle = density_particles[0]; /* Call the symmetrical density test. */ - test_interactions(density_particles, count, serial_inter_func, vec_inter_func, - "test_sym_density"); + message("Testing symmetrical density interaction..."); + test_interactions(density_test_particle, &density_particles[1], count - 1, + serial_inter_func, vec_inter_func, "test_sym_density", + runs); /* Re-assign function pointers. */ serial_inter_func = &runner_iact_nonsym_force; vec_inter_func = &runner_iact_nonsym_vec_force; + force_test_particle = force_particles[0]; /* Call the test non-sym force test. */ - test_interactions(force_particles, count, serial_inter_func, vec_inter_func, - "test_nonsym_force"); + message("Testing non-symmetrical force interaction..."); + test_interactions(force_test_particle, &force_particles[1], count - 1, + serial_inter_func, vec_inter_func, "test_nonsym_force", + runs); force_particles = make_particles(count, offset, spacing, h, &partId); - prepare_force(force_particles); + prepare_force(force_particles, count); /* Re-assign function pointers. */ serial_inter_func = &runner_iact_force; vec_inter_func = &runner_iact_vec_force; + force_test_particle = force_particles[0]; /* Call the test symmetrical force test. */ - test_interactions(force_particles, count, serial_inter_func, vec_inter_func, - "test_sym_force"); + message("Testing symmetrical force interaction..."); + test_interactions(force_test_particle, &force_particles[1], count - 1, + serial_inter_func, vec_inter_func, "test_sym_force", runs); return 0; } diff --git a/tests/testLogger.c b/tests/testLogger.c new file mode 100644 index 0000000000000000000000000000000000000000..ec3b33b6a9e38741e41b4678681e7afe9b9a7950 --- /dev/null +++ b/tests/testLogger.c @@ -0,0 +1,247 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* This object's header. */ +#include "../src/logger.h" + +/* Local headers. */ +#include "../src/dump.h" +#include "../src/part.h" + +void test_log_parts(struct dump *d) { + + /* Write several copies of a part to the dump. */ + struct part p; + bzero(&p, sizeof(struct part)); + p.x[0] = 1.0; + p.v[0] = 0.1; + + /* Start with an offset at the end of the dump. */ + size_t offset = d->count; + + /* Write the full part. */ + logger_log_part(&p, logger_mask_x | logger_mask_v | logger_mask_a | + logger_mask_u | logger_mask_h | logger_mask_rho | + logger_mask_consts, + &offset, d); + printf("Wrote part at offset %#016zx.\n", offset); + + /* Write only the position. */ + p.x[0] = 2.0; + logger_log_part(&p, logger_mask_x, &offset, d); + printf("Wrote part at offset %#016zx.\n", offset); + + /* Write the position and velocity. */ + p.x[0] = 3.0; + p.v[0] = 0.3; + logger_log_part(&p, logger_mask_x | logger_mask_v, &offset, d); + printf("Wrote part at offset %#016zx.\n", offset); + + /* Recover the last part from the dump. */ + bzero(&p, sizeof(struct part)); + size_t offset_old = offset; + int mask = logger_read_part(&p, &offset, d->data); + printf( + "Recovered part at offset %#016zx with mask %#04x: p.x[0]=%e, " + "p.v[0]=%e.\n", + offset_old, mask, p.x[0], p.v[0]); + if (p.x[0] != 3.0 || p.v[0] != 0.3f) { + printf("FAIL: could not read position and velocity of stored particle.\n"); + abort(); + } + + /* Recover the second part from the dump (only position). */ + bzero(&p, sizeof(struct part)); + offset_old = offset; + mask = logger_read_part(&p, &offset, d->data); + printf( + "Recovered part at offset %#016zx with mask %#04x: p.x[0]=%e, " + "p.v[0]=%e.\n", + offset_old, mask, p.x[0], p.v[0]); + if (p.x[0] != 2.0 || p.v[0] != 0.0) { + printf("FAIL: could not read position and velocity of stored particle.\n"); + abort(); + } + + /* Recover the first part from the dump. */ + bzero(&p, sizeof(struct part)); + offset_old = offset; + mask = logger_read_part(&p, &offset, d->data); + printf( + "Recovered part at offset %#016zx with mask %#04x: p.x[0]=%e, " + "p.v[0]=%e.\n", + offset_old, mask, p.x[0], p.v[0]); + if (p.x[0] != 1.0 || p.v[0] != 0.1f) { + printf("FAIL: could not read position and velocity of stored particle.\n"); + abort(); + } +} + +void test_log_gparts(struct dump *d) { + + /* Write several copies of a part to the dump. */ + struct gpart p; + bzero(&p, sizeof(struct gpart)); + p.x[0] = 1.0; + p.v_full[0] = 0.1; + + /* Start with an offset at the end of the dump. */ + size_t offset = d->count; + + /* Write the full part. */ + logger_log_gpart(&p, logger_mask_x | logger_mask_v | logger_mask_a | + logger_mask_h | logger_mask_consts, + &offset, d); + printf("Wrote gpart at offset %#016zx.\n", offset); + + /* Write only the position. */ + p.x[0] = 2.0; + logger_log_gpart(&p, logger_mask_x, &offset, d); + printf("Wrote gpart at offset %#016zx.\n", offset); + + /* Write the position and velocity. */ + p.x[0] = 3.0; + p.v_full[0] = 0.3; + logger_log_gpart(&p, logger_mask_x | logger_mask_v, &offset, d); + printf("Wrote gpart at offset %#016zx.\n", offset); + + /* Recover the last part from the dump. */ + bzero(&p, sizeof(struct gpart)); + size_t offset_old = offset; + int mask = logger_read_gpart(&p, &offset, d->data); + printf( + "Recovered gpart at offset %#016zx with mask %#04x: p.x[0]=%e, " + "p.v[0]=%e.\n", + offset_old, mask, p.x[0], p.v_full[0]); + if (p.x[0] != 3.0 || p.v_full[0] != 0.3f) { + printf("FAIL: could not read position and velocity of stored gpart.\n"); + abort(); + } + + /* Recover the second part from the dump. */ + bzero(&p, sizeof(struct gpart)); + offset_old = offset; + mask = logger_read_gpart(&p, &offset, d->data); + printf( + "Recovered gpart at offset %#016zx with mask %#04x: p.x[0]=%e, " + "p.v[0]=%e.\n", + offset_old, mask, p.x[0], p.v_full[0]); + if (p.x[0] != 2.0 || p.v_full[0] != 0.0) { + printf("FAIL: could not read position and velocity of stored gpart.\n"); + abort(); + } + + /* Recover the first part from the dump. */ + bzero(&p, sizeof(struct gpart)); + offset_old = offset; + mask = logger_read_gpart(&p, &offset, d->data); + printf( + "Recovered gpart at offset %#016zx with mask %#04x: p.x[0]=%e, " + "p.v[0]=%e.\n", + offset_old, mask, p.x[0], p.v_full[0]); + if (p.x[0] != 1.0 || p.v_full[0] != 0.1f) { + printf("FAIL: could not read position and velocity of stored gpart.\n"); + abort(); + } +} + +void test_log_timestamps(struct dump *d) { + + /* The timestamp to log. */ + unsigned long long int t = 10; + + /* Start with an offset at the end of the dump. */ + size_t offset = d->count; + + /* Log three consecutive timestamps. */ + logger_log_timestamp(t, &offset, d); + printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset); + t += 10; + logger_log_timestamp(t, &offset, d); + printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset); + t += 10; + logger_log_timestamp(t, &offset, d); + printf("Logged timestamp %020llu at offset %#016zx.\n", t, offset); + + /* Recover the three timestamps. */ + size_t offset_old = offset; + t = 0; + int mask = logger_read_timestamp(&t, &offset, d->data); + printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t, + offset_old, mask); + if (t != 30) { + printf("FAIL: could not recover correct timestamp.\n"); + abort(); + } + + offset_old = offset; + t = 0; + mask = logger_read_timestamp(&t, &offset, d->data); + printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t, + offset_old, mask); + if (t != 20) { + printf("FAIL: could not recover correct timestamp.\n"); + abort(); + } + + offset_old = offset; + t = 0; + mask = logger_read_timestamp(&t, &offset, d->data); + printf("Recovered timestamp %020llu at offset %#016zx with mask %#04x.\n", t, + offset_old, mask); + if (t != 10) { + printf("FAIL: could not recover correct timestamp.\n"); + abort(); + } +} + +int main(int argc, char *argv[]) { + + /* Some constants. */ + const char *filename = "/tmp/dump_test.out"; + + /* Prepare a dump. */ + struct dump d; + dump_init(&d, filename, 1024 * 1024); + + /* Test writing/reading parts. */ + test_log_parts(&d); + + /* Test writing/reading gparts. */ + test_log_gparts(&d); + + /* Test writing/reading timestamps. */ + test_log_timestamps(&d); + + /* Finalize the dump. */ + dump_close(&d); + + /* Return a happy number. */ + printf("PASS\n"); + return 0; +} diff --git a/tests/testPair.c b/tests/testPair.c index 8b272b866431db3bfe36239222cd87d669961ae7..8b23cc419a661f4d50ea53948302729784a129f9 100644 --- a/tests/testPair.c +++ b/tests/testPair.c @@ -68,8 +68,13 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, #else part->mass = density * volume / count; #endif - part->ti_begin = 0; - part->ti_end = 1; + part->time_bin = 1; + +#ifdef SWIFT_DEBUG_CHECKS + part->ti_drift = 8; + part->ti_kick = 8; +#endif + ++part; } } @@ -87,8 +92,9 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_end_min = 1; - cell->ti_end_max = 1; + cell->ti_old = 8; + cell->ti_end_min = 8; + cell->ti_end_max = 8; shuffle_particles(cell->parts, cell->count); @@ -245,7 +251,7 @@ int main(int argc, char *argv[]) { engine.s = &space; engine.time = 0.1f; - engine.ti_current = 1; + engine.ti_current = 8; runner.e = &engine; volume = particles * particles * particles; diff --git a/tests/testReading.c b/tests/testReading.c index 2ef32a5ef11c7e24a379ce5131df9cbea153fa7c..cbf25bf880c988bec95a91d5e141bf7554a97fe7 100644 --- a/tests/testReading.c +++ b/tests/testReading.c @@ -25,13 +25,14 @@ int main() { - size_t Ngas = 0, Ngpart = 0; + size_t Ngas = 0, Ngpart = 0, Nspart = 0; int periodic = -1; int flag_entropy_ICs = -1; int i, j, k; double dim[3]; struct part *parts = NULL; struct gpart *gparts = NULL; + struct spart *sparts = NULL; /* Default unit system */ struct UnitSystem us; @@ -43,8 +44,8 @@ int main() { const double rho = 2.; /* Read data */ - read_ic_single("input.hdf5", &us, dim, &parts, &gparts, &Ngas, &Ngpart, - &periodic, &flag_entropy_ICs, 0); + read_ic_single("input.hdf5", &us, dim, &parts, &gparts, &sparts, &Ngas, + &Ngpart, &Nspart, &periodic, &flag_entropy_ICs, 1, 1, 0, 0); /* Check global properties read are correct */ assert(dim[0] == boxSize); diff --git a/tests/testRiemannExact.c b/tests/testRiemannExact.c index 1943820339ba2ac06d194a17d2d450157ded1a31..82b12449f1b199133de5a74fe7b68b5c386c9cf5 100644 --- a/tests/testRiemannExact.c +++ b/tests/testRiemannExact.c @@ -281,11 +281,11 @@ void check_riemann_symmetry() { check_value(Whalf1[3], Whalf2[3], "V[2] solution"); check_value(Whalf1[4], Whalf2[4], "Pressure solution"); } else { - message( - "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " - "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", - Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0], - Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]); + /* message( */ + /* "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */ + /* "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */ + /* Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0], */ + /* Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]); */ } vij[0] = random_uniform(-10.0f, 10.0f); @@ -314,11 +314,11 @@ void check_riemann_symmetry() { check_value(totflux1[3], totflux2[3], "Momentum[2] flux"); check_value(totflux1[4], totflux2[4], "Energy flux"); } else { - message( - "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " - "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", - totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], - totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); + /* message( */ + /* "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */ + /* "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */ + /* totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], */ + /* totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); */ } } diff --git a/tests/testRiemannHLLC.c b/tests/testRiemannHLLC.c index 4cf883b68efbcfd795d0b7894adb9e7265b14d14..6bdf1192a6da8482d562895027d761f73ecc71de 100644 --- a/tests/testRiemannHLLC.c +++ b/tests/testRiemannHLLC.c @@ -75,11 +75,11 @@ void check_riemann_symmetry() { totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); error("Asymmetry in flux solution!"); } else { - message( - "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " - "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", - totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], - totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); + /* message( */ + /* "Flux solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */ + /* "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */ + /* totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], */ + /* totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); */ } } diff --git a/tests/testRiemannTRRS.c b/tests/testRiemannTRRS.c index 18ecbdce9173f43674a63b21231322cb01620d29..4a0eac0be23581e175d2c0e599b786fd4508b14a 100644 --- a/tests/testRiemannTRRS.c +++ b/tests/testRiemannTRRS.c @@ -274,11 +274,11 @@ void check_riemann_symmetry() { Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]); error("Asymmetry in solution!"); } else { - message( - "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " - "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", - Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0], - Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]); + /* message( */ + /* "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */ + /* "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */ + /* Whalf1[0], Whalf1[1], Whalf1[2], Whalf1[3], Whalf1[4], Whalf2[0], */ + /* Whalf2[1], Whalf2[2], Whalf2[3], Whalf2[4]); */ } vij[0] = random_uniform(-10.0f, 10.0f); @@ -300,11 +300,11 @@ void check_riemann_symmetry() { totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); error("Asymmetry in solution!"); } else { - message( - "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " - "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", - totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], - totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); + /* message( */ + /* "Solver symmetric: [%.3e,%.3e,%.3e,%.3e,%.3e] == " */ + /* "[%.3e,%.3e,%.3e,%.3e,%.3e]\n", */ + /* totflux1[0], totflux1[1], totflux1[2], totflux1[3], totflux1[4], */ + /* totflux2[0], totflux2[1], totflux2[2], totflux2[3], totflux2[4]); */ } } diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index ff2ec841b27bd5ca6190517bc39f4da0c28fbc0c..0c7ae1d0d8855371b8f8f9fbf51c7c63b3221aaa 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -61,8 +61,7 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { offset[2] * cellSize + z * cellSize / N + cellSize / (2 * N); part->h = h; part->id = x * N * N + y * N + z + id_offset; - part->ti_begin = 0; - part->ti_end = 1; + part->time_bin = 1; ++part; } } diff --git a/tests/testTimeIntegration.c b/tests/testTimeIntegration.c index f39adaee902ac3460b01857c002659b8bb2101f4..42a3d224f43d580e512119edc55051bd22719a3b 100644 --- a/tests/testTimeIntegration.c +++ b/tests/testTimeIntegration.c @@ -115,7 +115,7 @@ int main() { c.parts[0].a_hydro[1] = -(G * M_sun * c.parts[0].x[1] / r * r * r); /* Kick... */ - runner_do_kick(&run, &c, 0); + runner_do_kick2(&run, &c, 0); } /* Clean-up */ diff --git a/tests/tolerance_27_normal.dat b/tests/tolerance_27_normal.dat index 71acaa89be231d02fc33e47c96a7bacf623bbf48..9c7ca10414507746b41e453d75426a072f989d2e 100644 --- a/tests/tolerance_27_normal.dat +++ b/tests/tolerance_27_normal.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 2e-5 2e-3 2e-6 2e-6 2e-6 2e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-5 1e-4 2e-5 2e-5 2e-5 2e-5 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 4e-5 2e-4 2e-3 8e-6 6e-6 6e-6 6e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-4 1e-4 1e-4 2e-4 1e-4 1e-4 1e-4 diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat index 45293cbaa223b5887f3b0ce05cd9430d0db7440b..53de4ec7632039a56a3757488881e890296e3ac8 100644 --- a/tests/tolerance_27_perturbed.dat +++ b/tests/tolerance_27_perturbed.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-5 2.1e-5 2e-3 2.1e-6 2e-6 2e-6 2e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-3 1e-5 1e-4 2e-5 4e-4 4e-4 4e-4 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 5e-5 2e-3 3.1e-6 3e-6 3e-6 3e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-2 1e-5 1e-4 2e-5 2e-3 2e-3 2e-3