diff --git a/.gitignore b/.gitignore index 596f144ed1e790d03510c5bdec851adb3fcecc3c..ba0028d163dae64143b0c43b465152e357d042b0 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,7 @@ tests/testRiemannHLLC tests/testMatrixInversion tests/testDump tests/testLogger +tests/benchmarkInteractions theory/latex/swift.pdf theory/SPH/Kernels/kernels.pdf diff --git a/examples/CoolingHaloWithSpin/cooling_halo.yml b/examples/CoolingHaloWithSpin/cooling_halo.yml index 2e5f09b9cae199090f29ae2341c232218f8099d0..fc5094f9f5dcae62bb936d2b5510f41e3c70504e 100644 --- a/examples/CoolingHaloWithSpin/cooling_halo.yml +++ b/examples/CoolingHaloWithSpin/cooling_halo.yml @@ -9,8 +9,8 @@ InternalUnitSystem: # Parameters governing the time integration TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 10. # The end time of the simulation (in internal units). - dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + time_end: 10. # The end time of the simulation (in internal units). + dt_min: 1e-5 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-1 # The maximal time-step size of the simulation (in internal units). # Parameters governing the conserved quantities statistics @@ -48,4 +48,4 @@ LambdaCooling: minimum_temperature: 1.0e4 # Minimal temperature (Kelvin) mean_molecular_weight: 0.59 # Mean molecular weight hydrogen_mass_abundance: 0.75 # Hydrogen mass abundance (dimensionless) - cooling_tstep_mult: 1.0 # Dimensionless pre-factor for the time-step condition + cooling_tstep_mult: 0.1 # Dimensionless pre-factor for the time-step condition diff --git a/examples/CoolingHaloWithSpin/density_profile.py b/examples/CoolingHaloWithSpin/density_profile.py index ea282328e5b75530a128eab2dec5f065e46cf819..fb88ddd6aea71603a6f6fcb36b13771106737e6a 100644 --- a/examples/CoolingHaloWithSpin/density_profile.py +++ b/examples/CoolingHaloWithSpin/density_profile.py @@ -28,7 +28,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"]) unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"]) unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"]) unit_time_cgs = unit_length_cgs / unit_velocity_cgs -v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"]) +v_c = float(params.attrs["IsothermalPotential:vrot"]) v_c_cgs = v_c * unit_velocity_cgs lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"]) X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"]) @@ -101,18 +101,18 @@ for i in range(n_snaps): rho_0 = density[0] rho_analytic_init = rho_0 * (radial_bin_mids/r_0)**(-2) - plt.plot(radial_bin_mids,density/rho_analytic_init,'ko',label = "Average density of shell") - #plt.plot(t,rho_analytic,label = "Initial analytic density profile" + plt.plot(radial_bin_mids,density,'ko',label = "Average density of shell") + plt.plot(radial_bin_mids,rho_analytic_init,label = "Initial analytic density profile") plt.xlabel(r"$r / r_{vir}$") - plt.ylabel(r"$\rho / \rho_{init})$") + plt.ylabel(r"$(\rho / \rho_{init})$") plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c)) #plt.ylim((1.e-2,1.e1)) - plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,20),'r',label = "Cooling radius") + plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(1.0e-4,1.0e4),'r',label = "Cooling radius") plt.xlim((radial_bin_mids[0],max_r)) - plt.ylim((0,20)) - plt.plot((0,max_r),(1,1)) + plt.ylim((1.0e-4,1.0e4)) + #plt.plot((0,max_r),(1,1)) #plt.xscale('log') - #plt.yscale('log') + plt.yscale('log') plt.legend(loc = "upper right") plot_filename = "density_profile_%03d.png" %i plt.savefig(plot_filename,format = "png") diff --git a/examples/CoolingHaloWithSpin/internal_energy_profile.py b/examples/CoolingHaloWithSpin/internal_energy_profile.py index a3e470cc24a939c9bc915371e927d9bd39196bff..5f71d69ca7a978de242559f84ec390faa86a27f0 100644 --- a/examples/CoolingHaloWithSpin/internal_energy_profile.py +++ b/examples/CoolingHaloWithSpin/internal_energy_profile.py @@ -46,7 +46,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"]) unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"]) unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"]) unit_time_cgs = unit_length_cgs / unit_velocity_cgs -v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"]) +v_c = float(params.attrs["IsothermalPotential:vrot"]) v_c_cgs = v_c * unit_velocity_cgs lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"]) X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"]) diff --git a/examples/CoolingHaloWithSpin/makeIC.py b/examples/CoolingHaloWithSpin/makeIC.py index 8970fbaa70578532a4f41bab7a096d8fa3565d26..a6d57868ad7542498b27007a5c3ef9234b9feb84 100644 --- a/examples/CoolingHaloWithSpin/makeIC.py +++ b/examples/CoolingHaloWithSpin/makeIC.py @@ -36,6 +36,7 @@ h = 0.67777 # hubble parameter gamma = 5./3. eta = 1.2349 spin_lambda = 0.05 #spin parameter +f_b = 0.2 #baryon fraction # First set unit velocity and then the circular velocity parameter for the isothermal potential const_unit_velocity_in_cgs = 1.e5 #kms^-1 @@ -99,6 +100,8 @@ grp.attrs["PeriodicBoundariesOn"] = periodic # set seed for random number np.random.seed(1234) +gas_mass = f_b * np.sqrt(3.) / 2. #virial mass of halo is 1, virial radius is 1, enclosed mass scales with r +gas_particle_mass = gas_mass / float(N) # Positions # r^(-2) distribution corresponds to uniform distribution in radius @@ -164,12 +167,12 @@ N = x_coords.size print "Number of particles in the box = " , N #make the coords and radius arrays again -coords_2 = np.zeros((N,3)) -coords_2[:,0] = x_coords -coords_2[:,1] = y_coords -coords_2[:,2] = z_coords +coords= np.zeros((N,3)) +coords[:,0] = x_coords +coords[:,1] = y_coords +coords[:,2] = z_coords -radius = np.sqrt((coords_2[:,0]-boxSize/2.)**2 + (coords_2[:,1]-boxSize/2.)**2 + (coords_2[:,2]-boxSize/2.)**2) +radius = np.sqrt((coords[:,0]-boxSize/2.)**2 + (coords[:,1]-boxSize/2.)**2 + (coords[:,2]-boxSize/2.)**2) #now give particle's velocities v = np.zeros((N,3)) @@ -184,7 +187,7 @@ print "J =", J omega = np.zeros((N,3)) for i in range(N): omega[i,2] = 3.*J / radius[i] - v[i,:] = np.cross(omega[i,:],(coords_2[i,:]-boxSize/2.)) + v[i,:] = np.cross(omega[i,:],(coords[i,:]-boxSize/2.)) # Header grp = file.create_group("/Header") @@ -202,16 +205,15 @@ grp.attrs["Dimension"] = 3 grp = file.create_group("/PartType0") ds = grp.create_dataset('Coordinates', (N, 3), 'd') -ds[()] = coords_2 -coords_2 = np.zeros(1) +ds[()] = coords +coords = np.zeros(1) ds = grp.create_dataset('Velocities', (N, 3), 'f') ds[()] = v v = np.zeros(1) # All particles of equal mass -mass = 1. / N -m = np.full((N,),mass) +m = np.full((N,),gas_particle_mass) ds = grp.create_dataset('Masses', (N, ), 'f') ds[()] = m m = np.zeros(1) diff --git a/examples/CoolingHaloWithSpin/velocity_profile.py b/examples/CoolingHaloWithSpin/velocity_profile.py index d64d255b18482bc26578f21f46199aa3540ae7b5..07df8e1b0751307513c30a5b128773b193c3a9cd 100644 --- a/examples/CoolingHaloWithSpin/velocity_profile.py +++ b/examples/CoolingHaloWithSpin/velocity_profile.py @@ -46,7 +46,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"]) unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"]) unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"]) unit_time_cgs = unit_length_cgs / unit_velocity_cgs -v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"]) +v_c = float(params.attrs["IsothermalPotential:vrot"]) v_c_cgs = v_c * unit_velocity_cgs header = f["Header"] N = header.attrs["NumPart_Total"][0] diff --git a/examples/EAGLE_100/README b/examples/EAGLE_100/README new file mode 100644 index 0000000000000000000000000000000000000000..e3af3c0e1281f8e9ba9e0aae3fa6dd8475359a47 --- /dev/null +++ b/examples/EAGLE_100/README @@ -0,0 +1,16 @@ +ICs extracted from the EAGLE suite of simulations. + +WARNING: The ICs are 217GB in size. They contain ~3.4G DM particles, +~3.2G gas particles and ~170M star particles + +The particle distribution here is the snapshot 27 (z=0.1) of the 100Mpc +Ref-model. h- and a- factors from the original Gadget code have been +corrected for. Variables not used in a pure hydro & gravity code have +been removed. +Everything is ready to be run without cosmological integration. + +The particle load of the main EAGLE simulation can be reproduced by +running these ICs on 4096 cores. + +MD5 checksum of the ICs: +2301ea73e14207b541bbb04163c5269e EAGLE_ICs_100.hdf5 diff --git a/examples/EAGLE_100/eagle_100.yml b/examples/EAGLE_100/eagle_100.yml new file mode 100644 index 0000000000000000000000000000000000000000..a9b83b81f085e66b36d115c5265b66d6093ffdfb --- /dev/null +++ b/examples/EAGLE_100/eagle_100.yml @@ -0,0 +1,35 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.989e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.085678e24 # Mpc in centimeters + UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1e-2 # The end time of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: eagle # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 1e-3 # Time difference between consecutive outputs (in internal units) + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./EAGLE_ICs_100.hdf5 # The file to read + diff --git a/examples/EAGLE_100/getIC.sh b/examples/EAGLE_100/getIC.sh new file mode 100755 index 0000000000000000000000000000000000000000..227df3f9f79d294cd8ccbfd3b72b02dfbea2ebd6 --- /dev/null +++ b/examples/EAGLE_100/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget http://virgodb.cosma.dur.ac.uk/swift-webstorage/ICs/EAGLE_ICs_100.hdf5 diff --git a/examples/EAGLE_100/run.sh b/examples/EAGLE_100/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..6ef47d5d98172cc8a318242923ede37332bd5590 --- /dev/null +++ b/examples/EAGLE_100/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e EAGLE_ICs_100.hdf5 ] +then + echo "Fetching initial conditions for the EAGLE 100Mpc example..." + ./getIC.sh +fi + +../swift -s -t 16 eagle_100.yml 2>&1 | tee output.log + diff --git a/examples/ExternalPointMass/energy_plot.py b/examples/ExternalPointMass/energy_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..a75fcb835d33b3695170aab822092556f12db7d1 --- /dev/null +++ b/examples/ExternalPointMass/energy_plot.py @@ -0,0 +1,121 @@ +import matplotlib +matplotlib.use("Agg") +from pylab import * +import h5py + +# Plot parameters +params = {'axes.labelsize': 10, +'axes.titlesize': 10, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 10, +'ytick.labelsize': 10, +'text.usetex': True, + 'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.145, +'figure.subplot.right' : 0.99, +'figure.subplot.bottom' : 0.11, +'figure.subplot.top' : 0.99, +'figure.subplot.wspace' : 0.15, +'figure.subplot.hspace' : 0.12, +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + + +import numpy as np +import h5py as h5 +import sys + +# File containing the total energy +stats_filename = "./energy.txt" + +# First snapshot +snap_filename = "pointMass_000.hdf5" +f = h5.File(snap_filename,'r') + +# Read the units parameters from the snapshot +units = f["InternalCodeUnits"] +unit_mass = units.attrs["Unit mass in cgs (U_M)"] +unit_length = units.attrs["Unit length in cgs (U_L)"] +unit_time = units.attrs["Unit time in cgs (U_t)"] + +G = 6.67408e-8 * unit_mass * unit_time**2 / unit_length**3 + +# Read the header +header = f["Header"] +box_size = float(header.attrs["BoxSize"][0]) + +# Read the properties of the potential +parameters = f["Parameters"] +mass = float(parameters.attrs["PointMassPotential:mass"]) +centre = [box_size/2, box_size/2, box_size/2] +f.close() + +# Read the statistics summary +file_energy = np.loadtxt("energy.txt") +time_stats = file_energy[:,0] +E_kin_stats = file_energy[:,3] +E_pot_stats = file_energy[:,5] +E_tot_stats = E_kin_stats + E_pot_stats + +# Read the snapshots +time_snap = np.zeros(402) +E_kin_snap = np.zeros(402) +E_pot_snap = np.zeros(402) +E_tot_snap = np.zeros(402) +Lz_snap = np.zeros(402) + +# Read all the particles from the snapshots +for i in range(402): + snap_filename = "pointMass_%0.3d.hdf5"%i + f = h5.File(snap_filename,'r') + + pos_x = f["PartType1/Coordinates"][:,0] + pos_y = f["PartType1/Coordinates"][:,1] + pos_z = f["PartType1/Coordinates"][:,2] + vel_x = f["PartType1/Velocities"][:,0] + vel_y = f["PartType1/Velocities"][:,1] + vel_z = f["PartType1/Velocities"][:,2] + m = f["/PartType1/Masses"][:] + + r = np.sqrt((pos_x[:] - centre[0])**2 + (pos_y[:] - centre[1])**2 + (pos_z[:] - centre[2])**2) + Lz = (pos_x[:] - centre[0]) * vel_y[:] - (pos_y[:] - centre[1]) * vel_x[:] + + time_snap[i] = f["Header"].attrs["Time"] + E_kin_snap[i] = np.sum(0.5 * m * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2)) + E_pot_snap[i] = np.sum(-mass * m * G / r) + E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i] + Lz_snap[i] = np.sum(Lz) + +# Plot energy evolution +figure() +plot(time_stats, E_kin_stats, "r-", lw=0.5, label="Kinetic energy") +plot(time_stats, E_pot_stats, "g-", lw=0.5, label="Potential energy") +plot(time_stats, E_tot_stats, "k-", lw=0.5, label="Total energy") + +plot(time_snap[::10], E_kin_snap[::10], "rD", lw=0.5, ms=2) +plot(time_snap[::10], E_pot_snap[::10], "gD", lw=0.5, ms=2) +plot(time_snap[::10], E_tot_snap[::10], "kD", lw=0.5, ms=2) + +legend(loc="center right", fontsize=8, frameon=False, handlelength=3, ncol=1) +xlabel("${\\rm{Time}}$", labelpad=0) +ylabel("${\\rm{Energy}}$",labelpad=0) +xlim(0, 8) + +savefig("energy.png", dpi=200) + +# Plot angular momentum evolution +figure() +plot(time_snap, Lz_snap, "k-", lw=0.5, ms=2) + +xlabel("${\\rm{Time}}$", labelpad=0) +ylabel("${\\rm{Angular~momentum}}$",labelpad=0) +xlim(0, 8) + +savefig("angular_momentum.png", dpi=200) + + diff --git a/examples/ExternalPointMass/externalPointMass.yml b/examples/ExternalPointMass/externalPointMass.yml index 621a66bbc39838ac8d3d8a8a3992b2a7be3157a8..20b5bb3aa613d553d8c401e968d8ebfc0572e610 100644 --- a/examples/ExternalPointMass/externalPointMass.yml +++ b/examples/ExternalPointMass/externalPointMass.yml @@ -9,7 +9,7 @@ InternalUnitSystem: # Parameters governing the time integration TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 1. # The end time of the simulation (in internal units). + time_end: 8. # The end time of the simulation (in internal units). dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). @@ -31,7 +31,7 @@ SPH: # Parameters related to the initial conditions InitialConditions: - file_name: Sphere.hdf5 # The file to read + file_name: PointMass.hdf5 # The file to read shift_x: 50. # A shift to apply to all particles read from the ICs (in internal units). shift_y: 50. shift_z: 50. diff --git a/examples/ExternalPointMass/makeIC.py b/examples/ExternalPointMass/makeIC.py index 326183398933c88d7348e72e00343064b3e3a64c..ba415daf9e03058239599cc08039fc89e0929393 100644 --- a/examples/ExternalPointMass/makeIC.py +++ b/examples/ExternalPointMass/makeIC.py @@ -24,10 +24,10 @@ import numpy import math import random -# Generates a random distriution of particles, for motion in an external potnetial centred at (0,0,0) +# Generates a random distriution of particles, for motion in an external potential centred at (0,0,0) # physical constants in cgs -NEWTON_GRAVITY_CGS = 6.672e-8 +NEWTON_GRAVITY_CGS = 6.67408e-8 SOLAR_MASS_IN_CGS = 1.9885e33 PARSEC_IN_CGS = 3.0856776e18 @@ -39,34 +39,28 @@ const_unit_velocity_in_cgs = (1e5) print "UnitMass_in_cgs: ", const_unit_mass_in_cgs print "UnitLength_in_cgs: ", const_unit_length_in_cgs print "UnitVelocity_in_cgs: ", const_unit_velocity_in_cgs +print "UnitTime_in_cgs: ", const_unit_length_in_cgs / const_unit_velocity_in_cgs # derived units const_unit_time_in_cgs = (const_unit_length_in_cgs / const_unit_velocity_in_cgs) const_G = ((NEWTON_GRAVITY_CGS*const_unit_mass_in_cgs*const_unit_time_in_cgs*const_unit_time_in_cgs/(const_unit_length_in_cgs*const_unit_length_in_cgs*const_unit_length_in_cgs))) -print 'G=', const_G +print '---------------------' +print 'G in internal units: ', const_G # Parameters -periodic= 1 # 1 For periodic box -boxSize = 100. # -Radius = boxSize / 4. # maximum radius of particles -G = const_G -Mass = 1e10 +periodic = 1 # 1 For periodic box +boxSize = 100. # +max_radius = boxSize / 4. # maximum radius of particles +Mass = 1e10 +print "Mass at the centre: ", Mass -N = int(sys.argv[1]) # Number of particles -L = N**(1./3.) +numPart = int(sys.argv[1]) # Number of particles +mass = 1. -# these are not used but necessary for I/O -rho = 2. # Density -P = 1. # Pressure -gamma = 5./3. # Gas adiabatic index -fileName = "Sphere.hdf5" +fileName = "PointMass.hdf5" -#--------------------------------------------------- -numPart = N -mass = 1 -internalEnergy = P / ((gamma - 1.)*rho) #-------------------------------------------------- @@ -98,25 +92,26 @@ grp.attrs["Unit current in cgs (U_I)"] = 1. grp.attrs["Unit temperature in cgs (U_T)"] = 1. #Particle group -#grp0 = file.create_group("/PartType0") grp1 = file.create_group("/PartType1") + #generate particle positions -radius = Radius * (numpy.random.rand(N))**(1./3.) -ctheta = -1. + 2 * numpy.random.rand(N) -stheta = numpy.sqrt(1.-ctheta**2) -phi = 2 * math.pi * numpy.random.rand(N) +radius = max_radius * (numpy.random.rand(numPart))**(1./3.) +print '---------------------' +print 'Radius: minimum = ',min(radius) +print 'Radius: maximum = ',max(radius) +radius = numpy.sort(radius) r = numpy.zeros((numPart, 3)) -# r[:,0] = radius * stheta * numpy.cos(phi) -# r[:,1] = radius * stheta * numpy.sin(phi) -# r[:,2] = radius * ctheta r[:,0] = radius -# -speed = numpy.sqrt(G * Mass / radius) -v = numpy.zeros((numPart, 3)) + +#generate particle velocities +speed = numpy.sqrt(const_G * Mass / radius) omega = speed / radius period = 2.*math.pi/omega -print 'period = minimum = ',min(period), ' maximum = ',max(period) +print '---------------------' +print 'Period: minimum = ',min(period) +print 'Period: maximum = ',max(period) +v = numpy.zeros((numPart, 3)) v[:,0] = -omega * r[:,1] v[:,1] = omega * r[:,0] @@ -129,17 +124,6 @@ ds = grp1.create_dataset('Masses', (numPart,), 'f') ds[()] = m m = numpy.zeros(1) -h = numpy.full((numPart, ), 1.1255 * boxSize / L) -ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f') -ds[()] = h -h = numpy.zeros(1) - -u = numpy.full((numPart, ), internalEnergy) -ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f') -ds[()] = u -u = numpy.zeros(1) - - ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False) ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L') ds[()] = ids diff --git a/examples/ExternalPointMass/run.sh b/examples/ExternalPointMass/run.sh index 4ac513f09cb8ac8dcefc256a68478e215b8bc320..e074c384c4e002a161c7d8258e9068663204099f 100755 --- a/examples/ExternalPointMass/run.sh +++ b/examples/ExternalPointMass/run.sh @@ -1,10 +1,13 @@ #!/bin/bash # Generate the initial conditions if they are not present. -if [ ! -e Sphere.hdf5 ] +if [ ! -e PointMass.hdf5 ] then echo "Generating initial conditions for the point mass potential box example..." python makeIC.py 10000 fi +rm -rf pointMass_*.hdf5 ../swift -g -t 1 externalPointMass.yml 2>&1 | tee output.log + +python energy_plot.py diff --git a/examples/ExternalPointMass/test.pro b/examples/ExternalPointMass/test.pro deleted file mode 100644 index 21c10e9d27daa45b085c6a659ba3cf7260f017fb..0000000000000000000000000000000000000000 --- a/examples/ExternalPointMass/test.pro +++ /dev/null @@ -1,65 +0,0 @@ -; -; test energy / angular momentum conservation of test problem -; -@physunits - -indir = '/gpfs/data/tt/Codes/Swift-git/swiftsim/examples/' -basefile = 'output_' -nfiles = 657 -nfollow = 100 ; number of particles to follow -eout = fltarr(nfollow, nfiles) -ekin = fltarr(nfollow, nfiles) -epot = fltarr(nfollow, nfiles) -tout = fltarr(nfiles) -; set properties of potential -uL = 1e3 * phys.pc ; unit of length -uM = phys.msun ; unit of mass -uV = 1d5 ; unit of velocity - -; derived units -constG = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ; -pcentre = [50.,50.,50.] * 1d3 * pc / uL -mextern = 1d10 * msun / uM -; -; -; -ifile = 0 -for ifile=0,nfiles-1 do begin -;for ifile=0,3 do begin - inf = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5' - time = h5ra(inf, 'Header','Time') - p = h5rd(inf,'PartType1/Coordinates') - v = h5rd(inf,'PartType1/Velocities') - id = h5rd(inf,'PartType1/ParticleIDs') - indx = sort(id) -; - id = id[indx] - for ic=0,2 do begin - tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx] - tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx] - endfor -; calculate energy - dd = size(p,/dimen) & npart = dd[1] - ener = fltarr(npart) - dr = fltarr(npart) & dv = dr - for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2 - for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2 - dr = sqrt(dr) -; print,'time = ',time,p[0,0],v[0,0],id[0] - ek = 0.5 * dv - ep = - constG * mextern / dr - ener = ek + ep - tout(ifile) = time - eout(*,ifile) = ener[0:nfollow-1] - ekin(*,ifile) = ek[0:nfollow-1] - epot(*,ifile) = ep[0:nfollow-1] -endfor - -; calculate relative energy change -de = 0.0 * eout -for ifile=1, nfiles -1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0] - - -end - - diff --git a/examples/Feedback/feedback.pro b/examples/Feedback/feedback.pro deleted file mode 100644 index 02d616fc82f0aeb7011d022d13db9d1d1030e89c..0000000000000000000000000000000000000000 --- a/examples/Feedback/feedback.pro +++ /dev/null @@ -1,24 +0,0 @@ -base = 'Feedback' -inf = 'Feedback_005.hdf5' - -blast = [5.650488e-01, 5.004371e-01, 5.010494e-01] ; location of blast -pos = h5rd(inf,'PartType0/Coordinates') -vel = h5rd(inf,'PartType0/Velocities') -rho = h5rd(inf,'PartType0/Density') -utherm = h5rd(inf,'PartType0/InternalEnergy') - -; shift to centre -for ic=0,2 do pos[ic,*] = pos[ic,*] - blast[ic] - -;; distance from centre -dist = fltarr(n_elements(rho)) -for ic=0,2 do dist = dist + pos[ic,*]^2 -dist = sqrt(dist) - -; radial velocity -vr = fltarr(n_elements(rho)) -for ic=0,2 do vr = vr + pos[ic,*]*vel[ic,*] -vr = vr / dist - -; -end diff --git a/examples/Feedback/feedback.yml b/examples/Feedback/feedback.yml deleted file mode 100644 index de4f7abef1ef538a97a5e38c72b4db5ce2647976..0000000000000000000000000000000000000000 --- a/examples/Feedback/feedback.yml +++ /dev/null @@ -1,43 +0,0 @@ -# Define the system of units to use internally. -InternalUnitSystem: - UnitMass_in_cgs: 1 # Grams - UnitLength_in_cgs: 1 # Centimeters - UnitVelocity_in_cgs: 1 # Centimeters per second - UnitCurrent_in_cgs: 1 # Amperes - UnitTemp_in_cgs: 1 # Kelvin - -# Parameters governing the time integration -TimeIntegration: - time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 5e-2 # The end time of the simulation (in internal units). - dt_min: 1e-7 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). - -# Parameters governing the snapshots -Snapshots: - basename: Feedback # Common part of the name of output files - time_first: 0. # Time of the first output (in internal units) - delta_time: 1e-2 # Time difference between consecutive outputs (in internal units) - -# Parameters governing the conserved quantities statistics -Statistics: - delta_time: 1e-3 # Time between statistics output - -# Parameters for the hydrodynamics scheme -SPH: - resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. - -# Parameters related to the initial conditions -InitialConditions: - file_name: ./Feedback.hdf5 # The file to read - -# Parameters for feedback - -SN: - time: 0.001 # time the SN explodes (internal units) - energy: 1.0 # energy of the explosion (internal units) - x: 0.5 # x-position of explostion (internal units) - y: 0.5 # y-position of explostion (internal units) - z: 0.5 # z-position of explostion (internal units) diff --git a/examples/Feedback/makeIC.py b/examples/Feedback/makeIC.py deleted file mode 100644 index bd1081a9c275616038f5fa4e3eb943c36cb4c3eb..0000000000000000000000000000000000000000 --- a/examples/Feedback/makeIC.py +++ /dev/null @@ -1,109 +0,0 @@ -############################################################################### - # This file is part of SWIFT. - # Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk), - # Matthieu Schaller (matthieu.schaller@durham.ac.uk) - # 2016 Tom Theuns (tom.theuns@durham.ac.uk) - # - # This program is free software: you can redistribute it and/or modify - # it under the terms of the GNU Lesser General Public License as published - # by the Free Software Foundation, either version 3 of the License, or - # (at your option) any later version. - # - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. - # - # You should have received a copy of the GNU Lesser General Public License - # along with this program. If not, see <http://www.gnu.org/licenses/>. - # - ############################################################################## - -import h5py -import sys -from numpy import * - -# Generates a swift IC file containing a cartesian distribution of particles -# at a constant density and pressure in a cubic box - -# Parameters -periodic= 1 # 1 For periodic box -boxSize = 1. -L = int(sys.argv[1]) # Number of particles along one axis -rho = 1. # Density -P = 1.e-6 # Pressure -gamma = 5./3. # Gas adiabatic index -eta = 1.2349 # 48 ngbs with cubic spline kernel -fileName = "Feedback.hdf5" - -#--------------------------------------------------- -numPart = L**3 -mass = boxSize**3 * rho / numPart -internalEnergy = P / ((gamma - 1.)*rho) - -#-------------------------------------------------- - -#File -file = h5py.File(fileName, 'w') - -# Header -grp = file.create_group("/Header") -grp.attrs["BoxSize"] = boxSize -grp.attrs["NumPart_Total"] = [numPart, 0, 0, 0, 0, 0] -grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] -grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0] -grp.attrs["Time"] = 0.0 -grp.attrs["NumFilesPerSnapshot"] = 1 -grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] -grp.attrs["Flag_Entropy_ICs"] = 0 - -#Runtime parameters -grp = file.create_group("/RuntimePars") -grp.attrs["PeriodicBoundariesOn"] = periodic - -#Units -grp = file.create_group("/Units") -grp.attrs["Unit length in cgs (U_L)"] = 1. -grp.attrs["Unit mass in cgs (U_M)"] = 1. -grp.attrs["Unit time in cgs (U_t)"] = 1. -grp.attrs["Unit current in cgs (U_I)"] = 1. -grp.attrs["Unit temperature in cgs (U_T)"] = 1. - -#Particle group -grp = file.create_group("/PartType0") - -v = zeros((numPart, 3)) -ds = grp.create_dataset('Velocities', (numPart, 3), 'f') -ds[()] = v -v = zeros(1) - -m = full((numPart, 1), mass) -ds = grp.create_dataset('Masses', (numPart,1), 'f') -ds[()] = m -m = zeros(1) - -h = full((numPart, 1), eta * boxSize / L) -ds = grp.create_dataset('SmoothingLength', (numPart,1), 'f') -ds[()] = h -h = zeros(1) - -u = full((numPart, 1), internalEnergy) -ds = grp.create_dataset('InternalEnergy', (numPart,1), 'f') -ds[()] = u -u = zeros(1) - - -ids = linspace(0, numPart, numPart, endpoint=False).reshape((numPart,1)) -ds = grp.create_dataset('ParticleIDs', (numPart, 1), 'L') -ds[()] = ids + 1 -x = ids % L; -y = ((ids - x) / L) % L; -z = (ids - x - L * y) / L**2; -coords = zeros((numPart, 3)) -coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L) -coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L) -coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L) -ds = grp.create_dataset('Coordinates', (numPart, 3), 'd') -ds[()] = coords - -file.close() diff --git a/examples/IsothermalPotential/energy_plot.py b/examples/IsothermalPotential/energy_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..0afa6fa93fa2a992e6ddeab3c9d33538c0b41de3 --- /dev/null +++ b/examples/IsothermalPotential/energy_plot.py @@ -0,0 +1,120 @@ +import matplotlib +matplotlib.use("Agg") +from pylab import * +import h5py + +# Plot parameters +params = {'axes.labelsize': 10, +'axes.titlesize': 10, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 10, +'ytick.labelsize': 10, +'text.usetex': True, + 'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.145, +'figure.subplot.right' : 0.99, +'figure.subplot.bottom' : 0.11, +'figure.subplot.top' : 0.99, +'figure.subplot.wspace' : 0.15, +'figure.subplot.hspace' : 0.12, +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + + +import numpy as np +import h5py as h5 +import sys + +# File containing the total energy +stats_filename = "./energy.txt" + +# First snapshot +snap_filename = "Isothermal_000.hdf5" +f = h5.File(snap_filename,'r') + +# Read the units parameters from the snapshot +units = f["InternalCodeUnits"] +unit_mass = units.attrs["Unit mass in cgs (U_M)"] +unit_length = units.attrs["Unit length in cgs (U_L)"] +unit_time = units.attrs["Unit time in cgs (U_t)"] + +# Read the header +header = f["Header"] +box_size = float(header.attrs["BoxSize"][0]) + +# Read the properties of the potential +parameters = f["Parameters"] +R200 = 100 +Vrot = float(parameters.attrs["IsothermalPotential:vrot"]) +centre = [box_size/2, box_size/2, box_size/2] +f.close() + +# Read the statistics summary +file_energy = np.loadtxt("energy.txt") +time_stats = file_energy[:,0] +E_kin_stats = file_energy[:,3] +E_pot_stats = file_energy[:,5] +E_tot_stats = E_kin_stats + E_pot_stats + +# Read the snapshots +time_snap = np.zeros(402) +E_kin_snap = np.zeros(402) +E_pot_snap = np.zeros(402) +E_tot_snap = np.zeros(402) +Lz_snap = np.zeros(402) + +# Read all the particles from the snapshots +for i in range(402): + snap_filename = "Isothermal_%0.3d.hdf5"%i + f = h5.File(snap_filename,'r') + + pos_x = f["PartType1/Coordinates"][:,0] + pos_y = f["PartType1/Coordinates"][:,1] + pos_z = f["PartType1/Coordinates"][:,2] + vel_x = f["PartType1/Velocities"][:,0] + vel_y = f["PartType1/Velocities"][:,1] + vel_z = f["PartType1/Velocities"][:,2] + mass = f["/PartType1/Masses"][:] + + r = np.sqrt((pos_x[:] - centre[0])**2 + (pos_y[:] - centre[1])**2 + (pos_z[:] - centre[2])**2) + Lz = (pos_x[:] - centre[0]) * vel_y[:] - (pos_y[:] - centre[1]) * vel_x[:] + + time_snap[i] = f["Header"].attrs["Time"] + E_kin_snap[i] = np.sum(0.5 * mass * (vel_x[:]**2 + vel_y[:]**2 + vel_z[:]**2)) + E_pot_snap[i] = np.sum(-mass * Vrot**2 * log(r)) + E_tot_snap[i] = E_kin_snap[i] + E_pot_snap[i] + Lz_snap[i] = np.sum(Lz) + +# Plot energy evolution +figure() +plot(time_stats, E_kin_stats, "r-", lw=0.5, label="Kinetic energy") +plot(time_stats, E_pot_stats, "g-", lw=0.5, label="Potential energy") +plot(time_stats, E_tot_stats, "k-", lw=0.5, label="Total energy") + +plot(time_snap[::10], E_kin_snap[::10], "rD", lw=0.5, ms=2) +plot(time_snap[::10], E_pot_snap[::10], "gD", lw=0.5, ms=2) +plot(time_snap[::10], E_tot_snap[::10], "kD", lw=0.5, ms=2) + +legend(loc="center right", fontsize=8, frameon=False, handlelength=3, ncol=1) +xlabel("${\\rm{Time}}$", labelpad=0) +ylabel("${\\rm{Energy}}$",labelpad=0) +xlim(0, 8) + +savefig("energy.png", dpi=200) + +# Plot angular momentum evolution +figure() +plot(time_snap, Lz_snap, "k-", lw=0.5, ms=2) + +xlabel("${\\rm{Time}}$", labelpad=0) +ylabel("${\\rm{Angular~momentum}}$",labelpad=0) +xlim(0, 8) + +savefig("angular_momentum.png", dpi=200) + + diff --git a/examples/IsothermalPotential/isothermal.yml b/examples/IsothermalPotential/isothermal.yml index 0de99779f07591a5b71be11b75bc56ec741ddaed..8d9ec3875e405d95a89b3486bca5fd3465a3e20d 100644 --- a/examples/IsothermalPotential/isothermal.yml +++ b/examples/IsothermalPotential/isothermal.yml @@ -15,7 +15,7 @@ TimeIntegration: # Parameters governing the conserved quantities statistics Statistics: - delta_time: 1e-2 # Time between statistics output + delta_time: 1e-3 # Time between statistics output # Parameters governing the snapshots Snapshots: @@ -23,25 +23,18 @@ Snapshots: time_first: 0. # Time of the first output (in internal units) delta_time: 0.02 # Time difference between consecutive outputs (in internal units) -# Parameters for the hydrodynamics scheme -SPH: - resolution_eta: 1.2349 # Target smoothing length in units of the mean inter-particle separation (1.2349 == 48Ngbs with the cubic spline kernel). - delta_neighbours: 1. # The tolerance for the targetted number of neighbours. - CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. - max_smoothing_length: 40. # Maximal smoothing length allowed (in internal units). - # Parameters related to the initial conditions InitialConditions: file_name: Isothermal.hdf5 # The file to read - shift_x: 100. # A shift to apply to all particles read from the ICs (in internal units). - shift_y: 100. - shift_z: 100. + shift_x: 200. # Shift all particles to be in the potential + shift_y: 200. + shift_z: 200. # External potential parameters IsothermalPotential: - position_x: 100. # location of centre of isothermal potential in internal units - position_y: 100. - position_z: 100. + position_x: 0. # location of centre of isothermal potential in internal units + position_y: 0. + position_z: 0. vrot: 200. # rotation speed of isothermal potential in internal units - timestep_mult: 0.03 # controls time step - + timestep_mult: 0.01 # controls time step + epsilon: 0. # No softening at the centre of the halo diff --git a/examples/IsothermalPotential/makeIC.py b/examples/IsothermalPotential/makeIC.py index 976119f0a312c5acc81fab943ba3cf5769102269..7d1c5361f9a255365517226e49c55a8a50c4d6ce 100644 --- a/examples/IsothermalPotential/makeIC.py +++ b/examples/IsothermalPotential/makeIC.py @@ -30,10 +30,10 @@ import random # all particles move in the xy plane, and start at y=0 # physical constants in cgs -NEWTON_GRAVITY_CGS = 6.672e-8 +NEWTON_GRAVITY_CGS = 6.67408e-8 SOLAR_MASS_IN_CGS = 1.9885e33 PARSEC_IN_CGS = 3.0856776e18 -PROTON_MASS_IN_CGS = 1.6726231e24 +PROTON_MASS_IN_CGS = 1.672621898e24 YEAR_IN_CGS = 3.154e+7 # choice of units @@ -66,17 +66,12 @@ N = int(sys.argv[1]) # Number of particles icirc = int(sys.argv[2]) # if = 0, all particles are on circular orbits, if = 1, Lz/Lcirc uniform in ]0,1[ L = N**(1./3.) -# these are not used but necessary for I/O -rho = 2. # Density -P = 1. # Pressure -gamma = 5./3. # Gas adiabatic index fileName = "Isothermal.hdf5" #--------------------------------------------------- numPart = N mass = 1 -internalEnergy = P / ((gamma - 1.)*rho) #-------------------------------------------------- @@ -111,7 +106,6 @@ grp.attrs["PeriodicBoundariesOn"] = periodic numpy.random.seed(1234) #Particle group -#grp0 = file.create_group("/PartType0") grp1 = file.create_group("/PartType1") #generate particle positions radius = Radius * (numpy.random.rand(N))**(1./3.) @@ -119,10 +113,8 @@ ctheta = -1. + 2 * numpy.random.rand(N) stheta = numpy.sqrt(1.-ctheta**2) phi = 2 * math.pi * numpy.random.rand(N) r = numpy.zeros((numPart, 3)) -#r[:,0] = radius * stheta * numpy.cos(phi) -#r[:,1] = radius * stheta * numpy.sin(phi) -#r[:,2] = radius * ctheta r[:,0] = radius + # speed = vrot v = numpy.zeros((numPart, 3)) @@ -146,17 +138,6 @@ ds = grp1.create_dataset('Masses', (numPart,), 'f') ds[()] = m m = numpy.zeros(1) -h = numpy.full((numPart, ), 1.1255 * boxSize / L, dtype='f') -ds = grp1.create_dataset('SmoothingLength', (numPart,), 'f') -ds[()] = h -h = numpy.zeros(1) - -u = numpy.full((numPart, ), internalEnergy, dtype='f') -ds = grp1.create_dataset('InternalEnergy', (numPart,), 'f') -ds[()] = u -u = numpy.zeros(1) - - ids = 1 + numpy.linspace(0, numPart, numPart, endpoint=False, dtype='L') ds = grp1.create_dataset('ParticleIDs', (numPart, ), 'L') ds[()] = ids diff --git a/examples/IsothermalPotential/run.sh b/examples/IsothermalPotential/run.sh index 28a3cc0910f986f84bcd603091543643356f1c4a..976fbddc01cf7a3dcbb114d437ddb8f03b4d54bd 100755 --- a/examples/IsothermalPotential/run.sh +++ b/examples/IsothermalPotential/run.sh @@ -7,4 +7,7 @@ then python makeIC.py 1000 1 fi +rm -rf Isothermal_*.hdf5 ../swift -g -t 1 isothermal.yml 2>&1 | tee output.log + +python energy_plot.py diff --git a/examples/IsothermalPotential/test.pro b/examples/IsothermalPotential/test.pro deleted file mode 100644 index edfa50121d2e5adb7e039f3c38d6d4c0b4d5e34f..0000000000000000000000000000000000000000 --- a/examples/IsothermalPotential/test.pro +++ /dev/null @@ -1,168 +0,0 @@ -; -; test energy / angular momentum conservation of test problem -; - -iplot = 1 ; if iplot = 1, make plot of E/Lz conservation, else, simply compare final and initial energy - -; set physical constants -@physunits - -indir = './' -basefile = 'Isothermal_' - -; set properties of potential -uL = 1e3 * phys.pc ; unit of length -uM = phys.msun ; unit of mass -uV = 1d5 ; unit of velocity -vrot = 200. ; km/s -r200 = 100. ; virial radius - -; derived units -constG = 10.^(alog10(phys.g)+alog10(uM)-2d0*alog10(uV)-alog10(uL)) ; -pcentre = [100.,100.,100.] * 1d3 * pc / uL - -; -infile = indir + basefile + '*' -spawn,'ls -1 '+infile,res -nfiles = n_elements(res) - - - -; choose: calculate change of energy and Lz, comparing first and last -; snapshots for all particles, or do so for a subset - -; compare all -ifile = 0 -inf = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5' -id = h5rd(inf,'PartType1/ParticleIDs') -nfollow = n_elements(id) - -; follow a subset -nfollow = 500 ; number of particles to follow - -; -if (iplot eq 1) then begin - nskip = 1 - nsave = nfiles -endif else begin - nskip = nfiles - 2 - nsave = 2 -endelse - -; -lout = fltarr(nfollow, nsave) ; Lz -xout = fltarr(nfollow, nsave) ; x -yout = fltarr(nfollow, nsave) ; y -zout = fltarr(nfollow, nsave) ; z -eout = fltarr(nfollow, nsave) ; energies -ekin = fltarr(nfollow, nsave) -epot = fltarr(nfollow, nsave) -tout = fltarr(nsave) - - - -ifile = 0 -isave = 0 -for ifile=0,nfiles-1,nskip do begin - inf = indir + basefile + strtrim(string(ifile,'(i3.3)'),1) + '.hdf5' - time = h5ra(inf, 'Header','Time') - p = h5rd(inf,'PartType1/Coordinates') - v = h5rd(inf,'PartType1/Velocities') - id = h5rd(inf,'PartType1/ParticleIDs') - indx = sort(id) -; - id = id[indx] - for ic=0,2 do begin - tmp = reform(p[ic,*]) & p[ic,*] = tmp[indx] - tmp = reform(v[ic,*]) & v[ic,*] = tmp[indx] - endfor - - -; calculate energy - dd = size(p,/dimen) & npart = dd[1] - ener = fltarr(npart) - dr = fltarr(npart) & dv = dr - for ic=0,2 do dr[*] = dr[*] + (p[ic,*]-pcentre[ic])^2 - for ic=0,2 do dv[*] = dv[*] + v[ic,*]^2 - xout[*,isave] = p[0,0:nfollow-1]-pcentre[0] - yout[*,isave] = p[1,0:nfollow-1]-pcentre[1] - zout[*,isave] = p[2,0:nfollow-1]-pcentre[2] - Lz = (p[0,*]-pcentre[0]) * v[1,*] - (p[1,*]-pcentre[1]) * v[0,*] - dr = sqrt(dr) -; print,'time = ',time,p[0,0],v[0,0],id[0] - ek = 0.5 * dv -; ep = - constG * mextern / dr - ep = -vrot*vrot * (1 + alog(r200/dr)) - ener = ek + ep - tout(isave) = time - lout[*,isave] = lz[0:nfollow-1] - eout(*,isave) = ener[0:nfollow-1] - ekin(*,isave) = ek[0:nfollow-1] - epot(*,isave) = ep[0:nfollow-1] - -; write some output -; print,' time= ',time,' e= ',eout[0],' Lz= ',lz[0],format='(%a %f %a -; %f)' - print,format='('' time= '',f7.1,'' E= '',f9.2,'' Lz= '',e9.2)', time,eout[0],lz[0] - isave = isave + 1 - -endfor -x0 = reform(xout[0,*]) -y0 = reform(xout[1,*]) -z0 = reform(xout[2,*]) - -; calculate relative energy change -de = 0.0 * eout -dl = 0.0 * lout -nsave = isave -for ifile=1, nsave-1 do de[*,ifile] = (eout[*,ifile]-eout[*,0])/eout[*,0] -for ifile=1, nsave-1 do dl[*,ifile] = (lout[*,ifile] - lout[*,0])/lout[*,0] - - -; calculate statistics of energy changes -print,' relatve energy change: (per cent) ',minmax(de) * 100. -print,' relative Lz change: (per cent) ',minmax(dl) * 100. - -; plot enery and Lz conservation for some particles -if(iplot eq 1) then begin -; plot results on energy conservation for some particles - nplot = min(10, nfollow) - win,0 - xr = [min(tout), max(tout)] - yr = [-2,2]*1d-2 ; in percent - plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='time',ytitle='dE/E, dL/L (%)' - for i=0,nplot-1 do oplot,tout,de[i,*] - for i=0,nplot-1 do oplot,tout,dl[i,*],color=red - legend,['dE/E','dL/L'],linestyle=[0,0],color=[black,red],box=0,/bottom,/left - screen_to_png,'e-time.png' - -; plot orbits of those particles - win,2 - xr = [-100,100] - yr = xr - plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/iso,/nodata,xtitle='x',ytitle='y' - color = floor(findgen(nplot)*255/float(nplot)) - for i=0,nplot-1 do oplot,xout[i,*],yout[i,*],color=color(i) - screen_to_png,'orbit.png' - -; plot radial position of these particles - win,4 - xr = [min(tout), max(tout)] - yr = [0,80] - plot,[0],[0],xr=xr,yr=yr,/xs,/ys,/nodata,xtitle='t',ytitle='r' - color = floor(findgen(nplot)*255/float(nplot)) -for i=0,nplot-1 do begin dr = sqrt(reform(xout[i,*])^2 + reform(yout[i,*])^2) & oplot,tout,dr,color=color[i] & endfor - screen_to_png,'r-time.png' - -; make histogram of energy changes at end - win,6 - ohist,de,x,y,-0.05,0.05,0.001 - plot,x,y,psym=10,xtitle='de (%)' - screen_to_png,'de-hist.png' - - -endif - -end - - diff --git a/examples/Makefile.am b/examples/Makefile.am index 28a4629bdb401c0736379a2fe14a3a5f19caf650..dd13fb7eb4b82fbbfbb1ae450e20d01b13f2a455 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -63,11 +63,11 @@ EXTRA_DIST = BigCosmoVolume/makeIC.py \ EAGLE_12/eagle_12.yml EAGLE_12/getIC.sh EAGLE_12/README EAGLE_12/run.sh \ EAGLE_25/eagle_25.yml EAGLE_25/getIC.sh EAGLE_25/README EAGLE_25/run.sh \ EAGLE_50/eagle_50.yml EAGLE_50/getIC.sh EAGLE_50/README EAGLE_50/run.sh \ - ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/test.pro \ + ExternalPointMass/externalPointMass.yml ExternalPointMass/makeIC.py ExternalPointMass/run.sh ExternalPointMass/energy_plot.py \ GreshoVortex_2D/getGlass.sh GreshoVortex_2D/gresho.yml GreshoVortex_2D/makeIC.py GreshoVortex_2D/plotSolution.py GreshoVortex_2D/run.sh \ HydrostaticHalo/README HydrostaticHalo/hydrostatic.yml HydrostaticHalo/makeIC.py HydrostaticHalo/run.sh \ HydrostaticHalo/density_profile.py HydrostaticHalo/velocity_profile.py HydrostaticHalo/internal_energy_profile.py HydrostaticHalo/test_energy_conservation.py \ - IsothermalPotential/README IsothermalPotential/run.sh IsothermalPotential/test.pro IsothermalPotential/isothermal.yml IsothermalPotential/makeIC.py \ + IsothermalPotential/README IsothermalPotential/run.sh IsothermalPotential/energy_plot.py IsothermalPotential/isothermal.yml IsothermalPotential/makeIC.py \ KelvinHelmholtz_2D/kelvinHelmholtz.yml KelvinHelmholtz_2D/makeIC.py KelvinHelmholtz_2D/plotSolution.py KelvinHelmholtz_2D/run.sh \ MultiTypes/makeIC.py MultiTypes/multiTypes.yml MultiTypes/run.sh \ PerturbedBox_2D/makeIC.py PerturbedBox_2D/perturbedPlane.yml \ diff --git a/examples/main.c b/examples/main.c index 50f444e692c8ee9cfd3453cd7b3c9b1e4749a2d3..4d511ce8bc4ad35afee4f86655cbabeda3eebc0a 100644 --- a/examples/main.c +++ b/examples/main.c @@ -290,7 +290,8 @@ int main(int argc, char *argv[]) { /* Do we have debugging checks ? */ #ifdef SWIFT_DEBUG_CHECKS - message("WARNING: Debugging checks activated. Code will be slower !"); + if (myrank == 0) + message("WARNING: Debugging checks activated. Code will be slower !"); #endif /* Do we choke on FP-exceptions ? */ @@ -359,7 +360,7 @@ int main(int argc, char *argv[]) { /* Initialise the hydro properties */ struct hydro_props hydro_properties; - hydro_props_init(&hydro_properties, params); + if (with_hydro) hydro_props_init(&hydro_properties, params); /* Read particles and space information from (GADGET) ICs */ char ICfileName[200] = ""; diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index 6295c81a5f2fdb1e726cdf0a8fb43713004800f1..978448b3cd049c6ff31a92c7255851390ccc700c 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -55,40 +55,44 @@ PLOT_PARAMS = {"axes.labelsize": 10, pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost", - "extra_ghost", "kick", "send", "recv", - "grav_gather_m", "grav_fft", "grav_mm", "grav_up", - "grav_external", "cooling", "count"] - -TASKCOLOURS = {"none": "black", - "sort": "lightblue", - "self": "greenyellow", - "pair": "navy", - "sub_self": "greenyellow", - "sub_pair": "navy", - "init": "indigo", - "ghost": "cyan", - "extra_ghost": "cyan", - "kick": "green", - "send": "yellow", - "recv": "magenta", - "grav_gather_m": "mediumorchid", - "grav_fft": "mediumnightblue", - "grav_mm": "mediumturquoise", - "grav_up": "mediumvioletred", - "grav_external": "darkred", - "cooling": "darkblue", - "count": "powerblue"} - -SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"] - -SUBCOLOURS = {"none": "black", - "density": "red", - "gradient": "powerblue", - "force": "blue", - "grav": "indigo", - "tend": "grey", - "count": "black"} +TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", + "init", "ghost", "extra_ghost", "drift", "kick1", "kick2", + "timestep", "send", "recv", "grav_gather_m", "grav_fft", + "grav_mm", "grav_up", "cooling", "sourceterms", "count"] +SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", + "tend", "xv", "rho", "gpart", "count"] + +# Task/subtypes of interest. +FULLTYPES = ["self/force", "self/density", "sub_self/force", + "sub_self/density", "pair/force", "pair/density", "sub_pair/force", + "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", + "recv/tend", "send/tend"] + +# Get a number of colours for the various types. +colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon", + "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki", + "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen", + "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey", + "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue", + "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink", + "pink"] +maxcolours = len(colours) + +# Set colours of task/subtype. +TASKCOLOURS = {} +ncolours = 0 +for task in TASKTYPES: + TASKCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours + +SUBCOLOURS = {} +for task in SUBTYPES: + SUBCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours + +for task in FULLTYPES: + SUBCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours # Show docs if help is requested. if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ): @@ -149,39 +153,26 @@ num_lines = pl.size(data) / 10 for line in range(num_lines): thread = int(data[line,0]) tasks[thread].append({}) - tasks[thread][-1]["type"] = TASKTYPES[int(data[line,1])] - tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,2])] + tasktype = TASKTYPES[int(data[line,1])] + subtype = SUBTYPES[int(data[line,2])] + tasks[thread][-1]["type"] = tasktype + tasks[thread][-1]["subtype"] = subtype tic = int(data[line,4]) / CPU_CLOCK * 1000 toc = int(data[line,5]) / CPU_CLOCK * 1000 tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc tasks[thread][-1]["t"] = (toc + tic)/ 2 + if "self" in tasktype or "pair" in tasktype: + fulltype = tasktype + "/" + subtype + if fulltype in SUBCOLOURS: + tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype] + else: + tasks[thread][-1]["colour"] = SUBCOLOURS[subtype] + else: + tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype] -combtasks = {} -combtasks[-1] = [] -for i in range(nthread): - combtasks[i] = [] - for thread in range(nthread): tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"]) - lasttype = "" - types = [] - for task in tasks[thread]: - if task["type"] not in types: - types.append(task["type"]) - if lasttype == "" or not lasttype == task["type"]: - combtasks[thread].append({}) - combtasks[thread][-1]["type"] = task["type"] - combtasks[thread][-1]["subtype"] = task["subtype"] - combtasks[thread][-1]["tic"] = task["tic"] - combtasks[thread][-1]["toc"] = task["toc"] - if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub": - combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]] - else: - combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]] - lasttype = task["type"] - else: - combtasks[thread][-1]["toc"] = task["toc"] typesseen = [] fig = pl.figure() @@ -192,11 +183,11 @@ tictoc = np.zeros(2) for i in range(nthread): # Collect ranges and colours into arrays. - tictocs = np.zeros(len(combtasks[i])*2) - colours = np.empty(len(combtasks[i])*2, dtype='object') + tictocs = np.zeros(len(tasks[i])*2) + colours = np.empty(len(tasks[i])*2, dtype='object') coloursseen = [] j = 0 - for task in combtasks[i]: + for task in tasks[i]: tictocs[j] = task["tic"] tictocs[j+1] = task["toc"] colours[j] = task["colour"] diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py index 734918b8cbf388ef8f1a064e014cfd28775edde2..c95bfa1fd2d087cc907b57201c1a1397cbeb1460 100755 --- a/examples/plot_tasks_MPI.py +++ b/examples/plot_tasks_MPI.py @@ -63,40 +63,44 @@ PLOT_PARAMS = {"axes.labelsize": 10, pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", - "ghost", "extra_ghost", "kick", "send", "recv", - "grav_gather_m", "grav_fft", "grav_mm", "grav_up", - "grav_external", "cooling", "count"] - -TASKCOLOURS = {"none": "black", - "sort": "lightblue", - "self": "greenyellow", - "pair": "navy", - "sub_self": "greenyellow", - "sub_pair": "navy", - "init": "indigo", - "ghost": "cyan", - "extra_ghost": "cyan", - "kick": "green", - "send": "yellow", - "recv": "magenta", - "grav_gather_m": "mediumorchid", - "grav_fft": "mediumnightblue", - "grav_mm": "mediumturquoise", - "grav_up": "mediumvioletred", - "grav_external": "darkred", - "cooling": "darkblue", - "count": "powerblue"} - -SUBTYPES = ["none", "density", "gradient", "force", "grav", "tend", "count"] - -SUBCOLOURS = {"none": "black", - "density": "red", - "gradient": "powerblue", - "force": "blue", - "grav": "indigo", - "tend": "grey", - "count": "black"} +TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", + "init", "ghost", "extra_ghost", "drift", "kick1", "kick2", + "timestep", "send", "recv", "grav_gather_m", "grav_fft", + "grav_mm", "grav_up", "cooling", "sourceterms", "count"] +SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", + "tend", "xv", "rho", "gpart", "count"] + +# Task/subtypes of interest. +FULLTYPES = ["self/force", "self/density", "sub_self/force", + "sub_self/density", "pair/force", "pair/density", "sub_pair/force", + "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", + "recv/tend", "send/tend"] + +# Get a number of colours for the various types. +colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon", + "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki", + "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen", + "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey", + "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue", + "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink", + "pink"] +maxcolours = len(colours) + +# Set colours of task/subtype. +TASKCOLOURS = {} +ncolours = 0 +for task in TASKTYPES: + TASKCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours + +SUBCOLOURS = {} +for task in SUBTYPES: + SUBCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours + +for task in FULLTYPES: + SUBCOLOURS[task] = colours[ncolours] + ncolours = (ncolours + 1) % maxcolours # Show docs if help is requested. if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ): @@ -185,39 +189,26 @@ for rank in range(nranks): for line in range(num_lines): thread = int(data[line,1]) tasks[thread].append({}) - tasks[thread][-1]["type"] = TASKTYPES[int(data[line,2])] - tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,3])] + tasktype = TASKTYPES[int(data[line,2])] + subtype = SUBTYPES[int(data[line,3])] + tasks[thread][-1]["type"] = tasktype + tasks[thread][-1]["subtype"] = subtype tic = int(data[line,5]) / CPU_CLOCK * 1000 toc = int(data[line,6]) / CPU_CLOCK * 1000 tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc tasks[thread][-1]["t"] = (toc + tic)/ 2 - - combtasks = {} - combtasks[-1] = [] - for i in range(nthread): - combtasks[i] = [] + if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype: + fulltype = tasktype + "/" + subtype + if fulltype in SUBCOLOURS: + tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype] + else: + tasks[thread][-1]["colour"] = SUBCOLOURS[subtype] + else: + tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype] for thread in range(nthread): tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"]) - lasttype = "" - types = [] - for task in tasks[thread]: - if task["type"] not in types: - types.append(task["type"]) - if lasttype == "" or not lasttype == task["type"]: - combtasks[thread].append({}) - combtasks[thread][-1]["type"] = task["type"] - combtasks[thread][-1]["subtype"] = task["subtype"] - combtasks[thread][-1]["tic"] = task["tic"] - combtasks[thread][-1]["toc"] = task["toc"] - if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub": - combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]] - else: - combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]] - lasttype = task["type"] - else: - combtasks[thread][-1]["toc"] = task["toc"] fig = pl.figure() ax = fig.add_subplot(1,1,1) @@ -227,11 +218,11 @@ for rank in range(nranks): for i in range(nthread): # Collect ranges and colours into arrays. - tictocs = np.zeros(len(combtasks[i])*2) - colours = np.empty(len(combtasks[i])*2, dtype='object') + tictocs = np.zeros(len(tasks[i])*2) + colours = np.empty(len(tasks[i])*2, dtype='object') coloursseen = [] j = 0 - for task in combtasks[i]: + for task in tasks[i]: tictocs[j] = task["tic"] tictocs[j+1] = task["toc"] colours[j] = task["colour"] diff --git a/src/Makefile.am b/src/Makefile.am index 5982be729020fff7ef732a78b5155ad0f5a430eb..5434241c0eb203f069901dd9be99640b91dd8e90 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -59,7 +59,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \ - units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ + runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ dimension.h equation_of_state.h \ gravity.h gravity_io.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ diff --git a/src/active.h b/src/active.h index edb276c5e9d7f420e8bcaa9cb9d15dae0ad4a880..8dfd063fda9189ff715107c599ef4851cc5a0387 100644 --- a/src/active.h +++ b/src/active.h @@ -41,9 +41,10 @@ __attribute__((always_inline)) INLINE static int cell_is_drifted( #ifdef SWIFT_DEBUG_CHECKS if (c->ti_old > e->ti_current) error( - "Cell has been drifted too far forward in time! c->ti_old=%lld " - "e->ti_current=%lld", - c->ti_old, e->ti_current); + "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) " + "and e->ti_current=%lld (t=%e)", + c->ti_old, c->ti_old * e->timeBase, e->ti_current, + e->ti_current * e->timeBase); #endif return (c->ti_old == e->ti_current); @@ -62,9 +63,10 @@ __attribute__((always_inline)) INLINE static int cell_is_active( #ifdef SWIFT_DEBUG_CHECKS if (c->ti_end_min < e->ti_current) error( - "cell in an impossible time-zone! c->ti_end_min=%lld " - "e->ti_current=%lld", - c->ti_end_min, e->ti_current); + "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " + "e->ti_current=%lld (t=%e)", + c->ti_end_min, c->ti_end_min * e->timeBase, e->ti_current, + e->ti_current * e->timeBase); #endif return (c->ti_end_min == e->ti_current); diff --git a/src/cell.c b/src/cell.c index 1684111d5b03ce712df11f4dabffeb2fdbf33dc3..ead1d7a49e40c028cdbf737cad7c295e4d005383 100644 --- a/src/cell.c +++ b/src/cell.c @@ -130,7 +130,6 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { temp->dx_max_sort = 0.f; temp->nodeID = c->nodeID; temp->parent = c; - temp->ti_old = c->ti_old; c->progeny[k] = temp; c->split = 1; count += cell_unpack(&pc[pc->progeny[k]], temp, s); @@ -245,7 +244,7 @@ int cell_pack(struct cell *c, struct pcell *pc) { * * @return The number of packed cells. */ -int cell_pack_ti_ends(struct cell *c, int *ti_ends) { +int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends) { #ifdef WITH_MPI @@ -276,7 +275,7 @@ int cell_pack_ti_ends(struct cell *c, int *ti_ends) { * * @return The number of cells created. */ -int cell_unpack_ti_ends(struct cell *c, int *ti_ends) { +int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends) { #ifdef WITH_MPI @@ -908,6 +907,10 @@ int cell_is_drift_needed(struct cell *c, const struct engine *e) { */ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { +#ifdef WITH_MPI + struct engine *e = s->space->e; +#endif + /* Un-skip the density tasks involved with this cell. */ for (struct link *l = c->density; l != NULL; l = l->next) { struct task *t = l->t; @@ -958,8 +961,10 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Activate the tasks to recv foreign cell ci's data. */ scheduler_activate(s, ci->recv_xv); - scheduler_activate(s, ci->recv_rho); - scheduler_activate(s, ci->recv_ti); + if (cell_is_active(ci, e)) { + scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->recv_ti); + } /* Look for the local cell cj's send tasks. */ struct link *l = NULL; @@ -974,24 +979,28 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { else error("Drift task missing !"); - for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_rho task."); - scheduler_activate(s, l->t); - - for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_ti task."); - scheduler_activate(s, l->t); + if (cell_is_active(cj, e)) { + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } } else if (cj->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell cj's data. */ scheduler_activate(s, cj->recv_xv); - scheduler_activate(s, cj->recv_rho); - scheduler_activate(s, cj->recv_ti); + if (cell_is_active(cj, e)) { + scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->recv_ti); + } /* Look for the local cell ci's send tasks. */ struct link *l = NULL; @@ -1006,17 +1015,19 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { else error("Drift task missing !"); - for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_rho task."); - scheduler_activate(s, l->t); - - for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_ti task."); - scheduler_activate(s, l->t); + if (cell_is_active(ci, e)) { + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } } else { scheduler_activate(s, ci->super->drift); scheduler_activate(s, cj->super->drift); @@ -1168,3 +1179,25 @@ void cell_drift(struct cell *c, const struct engine *e) { /* Update the time of the last drift */ c->ti_old = ti_current; } + +/** + * @brief Recursively checks that all particles in a cell have a time-step + */ +void cell_check_timesteps(struct cell *c) { +#ifdef SWIFT_DEBUG_CHECKS + + if (c->ti_end_min == 0 && c->nr_tasks > 0) + error("Cell without assigned time-step"); + + if (c->split) { + for (int k = 0; k < 8; ++k) + if (c->progeny[k] != NULL) cell_check_timesteps(c->progeny[k]); + } else { + + if (c->nodeID == engine_rank) + for (int i = 0; i < c->count; ++i) + if (c->parts[i].time_bin == 0) + error("Particle without assigned time-bin"); + } +#endif +} diff --git a/src/cell.h b/src/cell.h index a89353a2b9105387fdc5f22b130ac1e108498303..eed72bfa64f9cab00d25f6e9b0ae3a008a090186 100644 --- a/src/cell.h +++ b/src/cell.h @@ -35,6 +35,7 @@ #include "multipole.h" #include "part.h" #include "task.h" +#include "timeline.h" /* Avoid cyclic inclusions */ struct engine; @@ -299,8 +300,8 @@ int cell_glocktree(struct cell *c); void cell_gunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); -int cell_pack_ti_ends(struct cell *c, int *ti_ends); -int cell_unpack_ti_ends(struct cell *c, int *ti_ends); +int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends); +int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends); int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); @@ -315,5 +316,6 @@ int cell_is_drift_needed(struct cell *c, const struct engine *e); int cell_unskip_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); void cell_drift(struct cell *c, const struct engine *e); +void cell_check_timesteps(struct cell *c); #endif /* SWIFT_CELL_H */ diff --git a/src/cooling/const_lambda/cooling.h b/src/cooling/const_lambda/cooling.h index 757c5e6600d85e19910d2fd75e955e5342fb63ee..9fadd51e3c2a3c5462c8476e0aac893e3a2d530d 100644 --- a/src/cooling/const_lambda/cooling.h +++ b/src/cooling/const_lambda/cooling.h @@ -89,10 +89,9 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( float cooling_du_dt = cooling_rate(phys_const, us, cooling, p); /* Integrate cooling equation to enforce energy floor */ - if (u_old + hydro_du_dt * dt < u_floor) { - cooling_du_dt = 0.f; - } else if (u_old + (hydro_du_dt + cooling_du_dt) * dt < u_floor) { - cooling_du_dt = (u_old + dt * hydro_du_dt - u_floor) / dt; + /* Factor of 1.5 included since timestep could potentially double */ + if (u_old + (hydro_du_dt + cooling_du_dt) * 1.5f * dt < u_floor) { + cooling_du_dt = -(u_old + 1.5f * dt * hydro_du_dt - u_floor) / (1.5f * dt); } /* Update the internal energy time derivative */ diff --git a/src/engine.c b/src/engine.c index 285e68dc94bdb2214ba8228d4ffa2df0b10c0813..2106c044fe02b360232f67cd037203e94ad9af64 100644 --- a/src/engine.c +++ b/src/engine.c @@ -48,6 +48,7 @@ #include "engine.h" /* Local headers. */ +#include "active.h" #include "atomic.h" #include "cell.h" #include "clocks.h" @@ -698,14 +699,14 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, ci->super->drift = scheduler_addtask( s, task_type_drift, task_subtype_none, 0, 0, ci->super, NULL, 0); - t_xv = scheduler_addtask(s, task_type_send, task_subtype_none, - 4 * ci->tag, 0, ci, cj, 0); - t_rho = scheduler_addtask(s, task_type_send, task_subtype_none, + t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, 4 * ci->tag, + 0, ci, cj, 0); + t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho, 4 * ci->tag + 1, 0, ci, cj, 0); t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, 4 * ci->tag + 2, 0, ci, cj, 0); #ifdef EXTRA_HYDRO_LOOP - t_gradient = scheduler_addtask(s, task_type_send, task_subtype_none, + t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient, 4 * ci->tag + 3, 0, ci, cj, 0); #endif @@ -739,8 +740,8 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, /* Drift before you send */ scheduler_addunlock(s, ci->super->drift, t_xv); - /* The super-cell's kick task should unlock the send_ti task. */ - if (t_ti != NULL) scheduler_addunlock(s, ci->super->kick2, t_ti); + /* The super-cell's timestep task should unlock the send_ti task. */ + scheduler_addunlock(s, ci->super->timestep, t_ti); } /* Add them to the local cell. */ @@ -749,7 +750,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, #ifdef EXTRA_HYDRO_LOOP engine_addlink(e, &ci->send_gradient, t_gradient); #endif - if (t_ti != NULL) engine_addlink(e, &ci->send_ti, t_ti); + engine_addlink(e, &ci->send_ti, t_ti); } /* Recurse? */ @@ -787,14 +788,14 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, if (t_xv == NULL && c->density != NULL) { /* Create the tasks. */ - t_xv = scheduler_addtask(s, task_type_recv, task_subtype_none, 4 * c->tag, - 0, c, NULL, 0); - t_rho = scheduler_addtask(s, task_type_recv, task_subtype_none, + t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 4 * c->tag, 0, + c, NULL, 0); + t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho, 4 * c->tag + 1, 0, c, NULL, 0); t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, 4 * c->tag + 2, 0, c, NULL, 0); #ifdef EXTRA_HYDRO_LOOP - t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_none, + t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient, 4 * c->tag + 3, 0, c, NULL, 0); #endif } @@ -815,7 +816,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, } for (struct link *l = c->force; l != NULL; l = l->next) { scheduler_addunlock(s, t_gradient, l->t); - if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti); + scheduler_addunlock(s, l->t, t_ti); } if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); #else @@ -825,7 +826,7 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, } for (struct link *l = c->force; l != NULL; l = l->next) { scheduler_addunlock(s, t_rho, l->t); - if (t_ti != NULL) scheduler_addunlock(s, l->t, t_ti); + scheduler_addunlock(s, l->t, t_ti); } if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); #endif @@ -2049,9 +2050,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the arguments. */ struct task *tasks = (struct task *)map_data; - const integertime_t ti_end = ((size_t *)extra_data)[0]; size_t *rebuild_space = &((size_t *)extra_data)[1]; struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]); + struct engine *e = (struct engine *)((size_t *)extra_data)[0]; for (int ind = 0; ind < num_elements; ind++) { struct task *t = &tasks[ind]; @@ -2062,7 +2063,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, t->type == task_type_sourceterms || t->type == task_type_sub_self) { /* Set this task's skip. */ - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); + if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } /* Pair? */ @@ -2080,7 +2081,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, *rebuild_space = 1; /* Set this task's skip, otherwise nothing to do. */ - if (ci->ti_end_min <= ti_end || cj->ti_end_min <= ti_end) + if (cell_is_active(t->ci, e) || cell_is_active(t->cj, e)) scheduler_activate(s, t); else continue; @@ -2109,8 +2110,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Activate the tasks to recv foreign cell ci's data. */ scheduler_activate(s, ci->recv_xv); - scheduler_activate(s, ci->recv_rho); - scheduler_activate(s, ci->recv_ti); + if (cell_is_active(ci, e)) { + scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->recv_ti); + } /* Look for the local cell cj's send tasks. */ struct link *l = NULL; @@ -2125,24 +2128,28 @@ void engine_marktasks_mapper(void *map_data, int num_elements, else error("Drift task missing !"); - for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_rho task."); - scheduler_activate(s, l->t); - - for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_ti task."); - scheduler_activate(s, l->t); + if (cell_is_active(cj, e)) { + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } } else if (cj->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell cj's data. */ scheduler_activate(s, cj->recv_xv); - scheduler_activate(s, cj->recv_rho); - scheduler_activate(s, cj->recv_ti); + if (cell_is_active(cj, e)) { + scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->recv_ti); + } /* Look for the local cell ci's send tasks. */ struct link *l = NULL; @@ -2157,49 +2164,35 @@ void engine_marktasks_mapper(void *map_data, int num_elements, else error("Drift task missing !"); - for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_rho task."); - scheduler_activate(s, l->t); - - for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; - l = l->next) - ; - if (l == NULL) error("Missing link to send_ti task."); - scheduler_activate(s, l->t); - } else { - scheduler_activate(s, ci->super->drift); - scheduler_activate(s, cj->super->drift); + if (cell_is_active(ci, e)) { + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } } -#else - scheduler_activate(s, ci->super->drift); - scheduler_activate(s, cj->super->drift); + #endif } - /* Kick? */ - else if (t->type == task_type_kick1) { - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); - } else if (t->type == task_type_kick2) { - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); + /* Kick/Drift/Init? */ + else if (t->type == task_type_kick1 || t->type == task_type_kick2 || + t->type == task_type_drift || t->type == task_type_init) { + if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } /* Time-step? */ else if (t->type == task_type_timestep) { t->ci->updated = 0; t->ci->g_updated = 0; - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); - } - - /* Drift? */ - else if (t->type == task_type_drift) { - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); - } - - /* Init? */ - else if (t->type == task_type_init) { - if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); + if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } /* Tasks with no cells should not be skipped? */ @@ -2222,7 +2215,7 @@ int engine_marktasks(struct engine *e) { int rebuild_space = 0; /* Run through the tasks and mark as skip or not. */ - size_t extra_data[3] = {e->ti_current, rebuild_space, (size_t)&e->sched}; + size_t extra_data[3] = {(size_t)e, rebuild_space, (size_t)&e->sched}; threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks, sizeof(struct task), 10000, extra_data); rebuild_space = extra_data[1]; @@ -2321,15 +2314,16 @@ void engine_rebuild(struct engine *e) { * @param drift_all Whether to drift particles before rebuilding or not. Will * not be necessary if all particles have already been * drifted (before repartitioning for instance). - * @param deferskip Whether to defer the skip until after the rebuild. - * Needed after a repartition. + * @param postrepart If we have just repartitioned, if so we need to defer the + * skip until after the rebuild and not check the if all + * cells have been drifted. */ -void engine_prepare(struct engine *e, int drift_all, int deferskip) { +void engine_prepare(struct engine *e, int drift_all, int postrepart) { TIMER_TIC; /* Unskip active tasks and check for rebuild */ - if (!deferskip) engine_unskip(e); + if (!postrepart) engine_unskip(e); /* Run through the tasks and mark as skip or not. */ int rebuild = e->forcerebuild; @@ -2350,13 +2344,15 @@ void engine_prepare(struct engine *e, int drift_all, int deferskip) { if (drift_all) engine_drift_all(e); #ifdef SWIFT_DEBUG_CHECKS - /* Check that all cells have been drifted to the current time */ - space_check_drift_point(e->s, e->ti_current); + /* Check that all cells have been drifted to the current time, unless + * we have just repartitioned, that can include cells that have not + * previously been active on this rank. */ + if (!postrepart) space_check_drift_point(e->s, e->ti_current); #endif engine_rebuild(e); } - if (deferskip) engine_unskip(e); + if (postrepart) engine_unskip(e); /* Re-rank the tasks every now and then. */ if (e->tasks_age % engine_tasksreweight == 1) { @@ -2417,36 +2413,33 @@ void engine_barrier(struct engine *e, int tid) { */ void engine_collect_kick(struct cell *c) { - /* Skip super-cells (Their values are already set) */ +/* Skip super-cells (Their values are already set) */ +#ifdef WITH_MPI + if (c->timestep != NULL || c->recv_ti != NULL) return; +#else if (c->timestep != NULL) return; +#endif /* WITH_MPI */ /* Counters for the different quantities. */ int updated = 0, g_updated = 0; integertime_t ti_end_min = max_nr_timesteps; - /* Only do something is the cell is non-empty */ - if (c->count != 0 || c->gcount != 0) { + /* Collect the values from the progeny. */ + for (int k = 0; k < 8; k++) { + struct cell *cp = c->progeny[k]; + if (cp != NULL && (cp->count > 0 || cp->gcount > 0)) { - /* If this cell is not split, I'm in trouble. */ - if (!c->split) error("Cell is not split."); + /* Recurse */ + engine_collect_kick(cp); - /* Collect the values from the progeny. */ - for (int k = 0; k < 8; k++) { - struct cell *cp = c->progeny[k]; - if (cp != NULL) { + /* And update */ + ti_end_min = min(ti_end_min, cp->ti_end_min); + updated += cp->updated; + g_updated += cp->g_updated; - /* Recurse */ - engine_collect_kick(cp); - - /* And update */ - ti_end_min = min(ti_end_min, cp->ti_end_min); - updated += cp->updated; - g_updated += cp->g_updated; - - /* Collected, so clear for next time. */ - cp->updated = 0; - cp->g_updated = 0; - } + /* Collected, so clear for next time. */ + cp->updated = 0; + cp->g_updated = 0; } } @@ -2470,9 +2463,9 @@ void engine_collect_timestep(struct engine *e) { const struct space *s = e->s; /* Collect the cell data. */ - for (int k = 0; k < s->nr_cells; k++) - if (s->cells_top[k].nodeID == e->nodeID) { - struct cell *c = &s->cells_top[k]; + for (int k = 0; k < s->nr_cells; k++) { + struct cell *c = &s->cells_top[k]; + if (c->count > 0 || c->gcount > 0) { /* Make the top-cells recurse */ engine_collect_kick(c); @@ -2486,15 +2479,16 @@ void engine_collect_timestep(struct engine *e) { c->updated = 0; c->g_updated = 0; } + } /* Aggregate the data from the different nodes. */ #ifdef WITH_MPI { - int in_i[1], out_i[1]; + integertime_t in_i[1], out_i[1]; in_i[0] = 0; out_i[0] = ti_end_min; - if (MPI_Allreduce(out_i, in_i, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD) != - MPI_SUCCESS) + if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN, + MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate t_end_min."); ti_end_min = in_i[0]; } @@ -2672,7 +2666,9 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { if (e->nodeID == 0) message("Converting internal energy variable."); /* Apply the conversion */ - space_map_cells_pre(s, 0, cell_convert_hydro, NULL); + // space_map_cells_pre(s, 0, cell_convert_hydro, NULL); + for (size_t i = 0; i < s->nr_parts; ++i) + hydro_convert_quantities(&s->parts[i], &s->xparts[i]); /* Correct what we did (e.g. in PE-SPH, need to recompute rho_bar) */ if (hydro_need_extra_init_loop) { @@ -2693,8 +2689,12 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { clocks_gettime(&time2); +#ifdef SWIFT_DEBUG_CHECKS + space_check_timesteps(e->s); +#endif + /* Ready to go */ - e->step = -1; + e->step = 0; e->forcerebuild = 1; e->wallclock_time = (float)clocks_diff(&time1, &time2); @@ -2785,12 +2785,6 @@ void engine_step(struct engine *e) { engine_launch(e, e->nr_threads); TIMER_TOC(timer_runners); -#ifdef SWIFT_DEBUG_CHECKS - for (size_t i = 0; i < e->s->nr_parts; ++i) { - if (e->s->parts[i].time_bin == 0) error("Particle in bin 0"); - } -#endif - TIMER_TOC2(timer_step); clocks_gettime(&time2); @@ -2833,6 +2827,11 @@ void engine_drift_all(struct engine *e) { threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all cells have been drifted to the current time. */ + space_check_drift_point(e->s, e->ti_current); +#endif + if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -3317,6 +3316,7 @@ void engine_init(struct engine *e, struct space *s, #endif if (with_aff) { +#ifdef HAVE_SETAFFINITY #ifdef WITH_MPI printf("[%04i] %s engine_init: cpu map is [ ", nodeID, clocks_get_timesincestart()); @@ -3325,6 +3325,7 @@ void engine_init(struct engine *e, struct space *s, #endif for (int i = 0; i < nr_affinity_cores; i++) printf("%i ", cpuid[i]); printf("].\n"); +#endif } /* Are we doing stuff in parallel? */ diff --git a/src/engine.h b/src/engine.h index 72d54f6d263eef9cc2f7d63a3b2aeddcf8b3c2e5..db5a7317d233a8e386fb03817cd42659ecd10816 100644 --- a/src/engine.h +++ b/src/engine.h @@ -225,7 +225,7 @@ void engine_init(struct engine *e, struct space *s, const struct cooling_function_data *cooling, struct sourceterms *sourceterms); void engine_launch(struct engine *e, int nr_runners); -void engine_prepare(struct engine *e, int drift_all, int deferskip); +void engine_prepare(struct engine *e, int drift_all, int postrepart); void engine_print(struct engine *e); void engine_init_particles(struct engine *e, int flag_entropy_ICs); void engine_step(struct engine *e); diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index bf8362a573519ecfacb54ae6001aedb74e429db3..a0bfee05f8b7f93cce65e8b9a3e7e322e166569d 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -42,21 +42,6 @@ gravity_compute_timestep_self(const struct gpart* const gp) { return dt; } -/** - * @brief Initialises the g-particles for the first time - * - * This function is called only once just after the ICs have been - * read in to do some conversions. - * - * @param gp The particle to act upon - */ -__attribute__((always_inline)) INLINE static void gravity_first_init_gpart( - struct gpart* gp) { - - gp->time_bin = 0; - gp->epsilon = 0.; // MATTHIEU -} - /** * @brief Prepares a g-particle for the gravity calculation * @@ -100,4 +85,21 @@ __attribute__((always_inline)) INLINE static void gravity_end_force( __attribute__((always_inline)) INLINE static void gravity_kick_extra( struct gpart* gp, float dt) {} +/** + * @brief Initialises the g-particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) INLINE static void gravity_first_init_gpart( + struct gpart* gp) { + + gp->time_bin = 0; + gp->epsilon = 0.; // MATTHIEU + + gravity_init_gpart(gp); +} + #endif /* SWIFT_DEFAULT_GRAVITY_H */ diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h index 4c3c41c94329666c274e6c129e728b59d2f5b892..f0d145647ab3f973f3c0ffc2f995ee01d534bc72 100644 --- a/src/gravity/Default/gravity_debug.h +++ b/src/gravity/Default/gravity_debug.h @@ -22,11 +22,10 @@ __attribute__((always_inline)) INLINE static void gravity_debug_particle( const struct gpart* p) { printf( - "x=[%.3e,%.3e,%.3e], " - "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " - "mass=%.3e time_bin=%d\n", - p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2], - p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->time_bin); + "mass=%.3e epsilon=%.5e time_bin=%d\n" + "x=[%.5e,%.5e,%.5e], v_full=[%.5e,%.5e,%.5e], a=[%.5e,%.5e,%.5e]\n", + p->mass, p->epsilon, p->time_bin, p->x[0], p->x[1], p->x[2], p->v_full[0], + p->v_full[1], p->v_full[2], p->a_grav[0], p->a_grav[1], p->a_grav[2]); } #endif /* SWIFT_DEFAULT_GRAVITY_DEBUG_H */ diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index 18d524d39f86f363df2a8ad7ce6638f438bdb477..aa309a5beb201c1aeec0cabe9d64a158d2071fa9 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -47,6 +47,16 @@ struct gpart { /* Time-step length */ timebin_t time_bin; +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif + } SWIFT_STRUCT_ALIGN; #endif /* SWIFT_DEFAULT_GRAVITY_PART_H */ diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 0bfbf7ffef18f22cac9440ccc8adde0ba1899616..160a2d8b5d25a97cefb2afd5e22d8e6bcea0006e 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -377,9 +377,13 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt) { /* Do not decrease the entropy by more than a factor of 2 */ - const float entropy_change = p->entropy_dt * dt; - xp->entropy_full = - max(xp->entropy_full + entropy_change, 0.5f * xp->entropy_full); + if (dt > 0. && p->entropy_dt * dt < -0.5f * xp->entropy_full) { + /* message("Warning! Limiting entropy_dt. Possible cooling error.\n + * entropy_full = %g \n entropy_dt * dt =%g \n", */ + /* xp->entropy_full,p->entropy_dt * dt); */ + p->entropy_dt = -0.5f * xp->entropy_full / dt; + } + xp->entropy_full += p->entropy_dt * dt; /* Compute the pressure */ const float pressure = gas_pressure_from_entropy(p->rho, xp->entropy_full); diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index f88a2896ffcf09eef422c9449d30ba3dc62eb14b..20856b7e038855e22aa3776a74ba9f495ff6c93f 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -360,8 +360,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt) { /* Do not decrease the energy by more than a factor of 2*/ - const float u_change = p->u_dt * dt; - xp->u_full = max(xp->u_full + u_change, 0.5f * xp->u_full); + if (dt > 0. && p->u_dt * dt < -0.5f * xp->u_full) { + p->u_dt = -0.5f * xp->u_full / dt; + } + xp->u_full += p->u_dt * dt; /* Compute the pressure */ const float pressure = gas_pressure_from_internal_energy(p->rho, xp->u_full); diff --git a/src/hydro/PressureEntropy/hydro.h b/src/hydro/PressureEntropy/hydro.h index 5ac0c73b70b8ebae7a0f5107b0a35b094e179053..f22bb8a13a8ba4d896a77bd4c4f5e86bed5a5960 100644 --- a/src/hydro/PressureEntropy/hydro.h +++ b/src/hydro/PressureEntropy/hydro.h @@ -394,11 +394,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( struct part *restrict p, struct xpart *restrict xp, float dt) { /* Do not decrease the entropy (temperature) by more than a factor of 2*/ - const float entropy_change = p->entropy_dt * dt; - if (entropy_change > -0.5f * xp->entropy_full) - xp->entropy_full += entropy_change; - else - xp->entropy_full *= 0.5f; + if (dt > 0. && p->entropy_dt * dt < -0.5f * xp->entropy_full) { + p->entropy_dt = -0.5f * xp->entropy_full / dt; + } + xp->entropy_full += p->entropy_dt * dt; /* Compute the pressure */ const float pressure = diff --git a/src/potential/isothermal/potential.h b/src/potential/isothermal/potential.h index b82e97d23e3d682fa8ca0cd26bd6e70de64733d5..9c07f3eb67528a003788ca94bd1e2e52dd985a2c 100644 --- a/src/potential/isothermal/potential.h +++ b/src/potential/isothermal/potential.h @@ -132,7 +132,7 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( * @brief Computes the gravitational potential energy of a particle in an * isothermal potential. * - * phi = 0.5 * vrot^2 * ln(r^2 + epsilon^2) + * phi = -0.5 * vrot^2 * ln(r^2 + epsilon^2) * * @param time The current time (unused here). * @param potential The #external_potential used in the run. @@ -148,8 +148,8 @@ external_gravity_get_potential_energy( const float dy = g->x[1] - potential->y; const float dz = g->x[2] - potential->z; - return 0.5f * potential->vrot * potential->vrot * - logf(dx * dx + dy * dy * dz * dz + potential->epsilon2); + return -0.5f * potential->vrot * potential->vrot * + logf(dx * dx + dy * dy + dz * dz + potential->epsilon2); } /** diff --git a/src/runner.c b/src/runner.c index 3bb5151fde8d0da34c557c5981a60d707d9af794..69ae0b059d7e92cf204d43bf914e3b8916e2afd3 100644 --- a/src/runner.c +++ b/src/runner.c @@ -336,9 +336,10 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { struct part *parts = c->parts; struct xpart *xparts = c->xparts; struct entry *sort; - const int count = c->count; - int off[8], inds[8], temp_i; + int j, k, count = c->count; + int i, ind, off[8], inds[8], temp_i, missing; float buff[8]; + double px[3]; TIMER_TIC @@ -776,6 +777,9 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { */ static void runner_do_unskip(struct cell *c, struct engine *e) { + /* Ignore empty cells. */ + if (c->count == 0 && c->gcount == 0) return; + /* Recurse */ if (c->split) { for (int k = 0; k < 8; k++) { @@ -893,7 +897,11 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) { const integertime_t ti_end = get_integer_time_end(ti_current, p->time_bin); - if (ti_end - ti_begin != ti_step) error("Particle in wrong time-bin"); + if (ti_end - ti_begin != ti_step) + error( + "Particle in wrong time-bin, ti_end=%lld, ti_begin=%lld, " + "ti_step=%lld time_bin=%d ti_current=%lld", + ti_end, ti_begin, ti_step, p->time_bin, ti_current); #endif /* do the kick */ @@ -976,7 +984,10 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) { #ifdef SWIFT_DEBUG_CHECKS if (ti_begin + ti_step != ti_current) - error("Particle in wrong time-bin"); + error( + "Particle in wrong time-bin, ti_begin=%lld, ti_step=%lld " + "time_bin=%d ti_current=%lld", + ti_begin, ti_step, p->time_bin, ti_current); #endif /* Finish the time-step with a second half-kick */ @@ -1072,7 +1083,9 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { /* What is the next sync-point ? */ ti_end_min = min(ti_current + ti_new_step, ti_end_min); ti_end_max = max(ti_current + ti_new_step, ti_end_max); - } else { /* part is inactive */ + } + + else { /* part is inactive */ const integertime_t ti_end = get_integer_time_end(ti_current, p->time_bin); @@ -1212,20 +1225,18 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { } /** - * @brief Construct the cell properties from the received particles + * @brief Construct the cell properties from the received #part. * * @param r The runner thread. * @param c The cell. * @param timer Are we timing this ? */ -void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { +void runner_do_recv_part(struct runner *r, struct cell *c, int timer) { #ifdef WITH_MPI const struct part *restrict parts = c->parts; - const struct gpart *restrict gparts = c->gparts; const size_t nr_parts = c->count; - const size_t nr_gparts = c->gcount; const integertime_t ti_current = r->e->ti_current; TIMER_TIC; @@ -1241,40 +1252,107 @@ void runner_do_recv_cell(struct runner *r, struct cell *c, int timer) { for (size_t k = 0; k < nr_parts; k++) { const integertime_t ti_end = get_integer_time_end(ti_current, parts[k].time_bin); - // if(ti_end < ti_current) error("Received invalid particle !"); ti_end_min = min(ti_end_min, ti_end); ti_end_max = max(ti_end_max, ti_end); h_max = max(h_max, parts[k].h); + +#ifdef SWIFT_DEBUG_CHECKS + if (parts[k].ti_drift != ti_current) + error("Received un-drifted particle !"); +#endif } + } + + /* Otherwise, recurse and collect. */ + else { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + runner_do_recv_part(r, c->progeny[k], 0); + ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); + ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + h_max = max(h_max, c->progeny[k]->h_max); + } + } + } + +#ifdef SWIFT_DEBUG_CHECKS + if (ti_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_end_min, ti_current); +#endif + + /* ... and store. */ + c->ti_end_min = ti_end_min; + c->ti_end_max = ti_end_max; + c->ti_old = ti_current; + c->h_max = h_max; + + if (timer) TIMER_TOC(timer_dorecv_part); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Construct the cell properties from the received #gpart. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { + +#ifdef WITH_MPI + + const struct gpart *restrict gparts = c->gparts; + const size_t nr_gparts = c->gcount; + const integertime_t ti_current = r->e->ti_current; + + TIMER_TIC; + + integertime_t ti_end_min = max_nr_timesteps; + integertime_t ti_end_max = 0; + + /* If this cell is a leaf, collect the particle data. */ + if (!c->split) { + + /* Collect everything... */ for (size_t k = 0; k < nr_gparts; k++) { const integertime_t ti_end = get_integer_time_end(ti_current, gparts[k].time_bin); - // if(ti_end < ti_current) error("Received invalid particle !"); ti_end_min = min(ti_end_min, ti_end); ti_end_max = max(ti_end_max, ti_end); } - } /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) { - runner_do_recv_cell(r, c->progeny[k], 0); + runner_do_recv_gpart(r, c->progeny[k], 0); ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); - h_max = max(h_max, c->progeny[k]->h_max); } } } +#ifdef SWIFT_DEBUG_CHECKS + if (ti_end_min < ti_current) + error( + "Received a cell at an incorrect time c->ti_end_min=%lld, " + "e->ti_current=%lld.", + ti_end_min, ti_current); +#endif + /* ... and store. */ c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; c->ti_old = ti_current; - c->h_max = h_max; - if (timer) TIMER_TOC(timer_dorecv_cell); + if (timer) TIMER_TOC(timer_dorecv_gpart); #else error("SWIFT was not compiled with MPI support."); @@ -1337,13 +1415,13 @@ void *runner_main(void *data) { } else if (cj == NULL) { /* self */ - /* if (!cell_is_active(ci, e) && t->type != task_type_sort && + if (!cell_is_active(ci, e) && t->type != task_type_sort && t->type != task_type_send && t->type != task_type_recv) error( "Task (type='%s/%s') should have been skipped ti_current=%lld " "c->ti_end_min=%lld", taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, - ci->ti_end_min); */ + ci->ti_end_min); /* Special case for sorts */ if (!cell_is_active(ci, e) && t->type == task_type_sort && @@ -1476,8 +1554,11 @@ void *runner_main(void *data) { if (t->subtype == task_subtype_tend) { cell_unpack_ti_ends(ci, t->buff); free(t->buff); - } else { - runner_do_recv_cell(r, ci, 1); + } else if (t->subtype == task_subtype_xv || + t->subtype == task_subtype_rho) { + runner_do_recv_part(r, ci, 1); + } else if (t->subtype == task_subtype_gpart) { + runner_do_recv_gpart(r, ci, 1); } break; #endif diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 7e6165c5be37cfa8da1343e11a98ca5d33c28347..9188d877d84a56a7da604fae6b9a46991a22862b 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -32,9 +32,18 @@ #define _DOPAIR2(f) PASTE(runner_dopair2, f) #define DOPAIR2 _DOPAIR2(FUNCTION) +#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f) +#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION) + +#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f) +#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION) + #define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f) #define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) +#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f) +#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION) + #define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) #define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) @@ -101,6 +110,8 @@ #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) +#include "runner_doiact_nosort.h" + /** * @brief Compute the interactions between a cell pair (non-symmetric). * @@ -249,7 +260,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, // error("Don't use in actual runs ! Slow code !"); #endif -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -304,7 +315,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT(r2, dx, hi, pj->h, pi, pj); @@ -334,7 +345,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -352,7 +363,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { // error("Don't use in actual runs ! Slow code !"); #endif -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -395,7 +406,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT(r2, dx, hi, pj->h, pi, pj); @@ -425,7 +436,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -454,7 +465,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, error("Don't use in actual runs ! Slow code !"); -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -504,7 +515,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -534,7 +545,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -561,7 +572,14 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj); + return; + } +#endif + +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -636,7 +654,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -701,7 +719,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -732,7 +750,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ } -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -755,7 +773,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts, int *restrict ind, int count) { -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -795,7 +813,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, /* Hit or miss? */ if (r2 > 0.0f && r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -825,7 +843,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -846,7 +864,14 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR1_NOSORT(r, ci, cj); + return; + } +#endif + +#ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16))); @@ -951,7 +976,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); @@ -1021,7 +1046,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); @@ -1051,7 +1076,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount > 0) for (int k = 0; k < icount; k++) @@ -1072,7 +1097,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR2_NOSORT(r, ci, cj); + return; + } +#endif + +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1195,7 +1227,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1254,7 +1286,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hig2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pj need to be updated too? */ if (part_is_active(pj, e)) @@ -1355,7 +1387,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hi, hj, pi, pj); @@ -1413,7 +1445,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Hit or miss? */ if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pi need to be updated too? */ if (part_is_active(pi, e)) @@ -1471,7 +1503,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -1498,7 +1530,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1575,7 +1607,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1639,7 +1671,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || doj) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Which parts need to be updated? */ if (r2 < hig2 && doj) @@ -1722,7 +1754,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { } /* loop over all particles. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -1747,7 +1779,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); float hiq1[VEC_SIZE] __attribute__((aligned(16))); @@ -1824,7 +1856,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION IACT_NONSYM(r2, dx, hj, hi, pj, pi); @@ -1886,7 +1918,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { -#ifndef WITH_VECTORIZATION +#ifndef WITH_OLD_VECTORIZATION /* Does pj need to be updated too? */ if (part_is_active(pj, e)) @@ -1944,7 +1976,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { } /* loop over all particles. */ -#ifdef WITH_VECTORIZATION +#ifdef WITH_OLD_VECTORIZATION /* Pick up any leftovers. */ if (icount1 > 0) for (int k = 0; k < icount1; k++) @@ -2240,8 +2272,14 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { } /* Otherwise, compute self-interaction. */ - else + else { +#if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \ + defined(GADGET2_SPH) + runner_doself1_density_vec(r, ci); +#else DOSELF1(r, ci); +#endif + } if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF); } diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 59a5ae496680390c23458bde65b4bba321ffe7a1..9d2606ceb06fd6d32592010376e867a6ae582bf0 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -25,8 +25,6 @@ #include "gravity.h" #include "part.h" -#define ICHECK -1000 - /** * @brief Compute the recursive upward sweep, i.e. construct the * multipoles in a cell hierarchy. diff --git a/src/runner_doiact_nosort.h b/src/runner_doiact_nosort.h new file mode 100644 index 0000000000000000000000000000000000000000..057dd756b4a8d636253eccd8808417ac452e193d --- /dev/null +++ b/src/runner_doiact_nosort.h @@ -0,0 +1,305 @@ + +/** + * @brief Compute the interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void DOPAIR1_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + if (!cell_is_drifted(ci, e)) cell_drift(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift(cj, e); + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + space_getsid(e->s, &ci, &cj, shift); + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + if (!part_is_active(pi, e)) continue; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pj = &parts_j[pjd]; + if (!part_is_active(pj, e)) continue; + const float hj = pj->h; + + double pjx[3]; + for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + const float hjg2 = hj * hj * kernel_gamma2; + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pi = &parts_i[pid]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pjx[k] - pi->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hjg2) { + IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); + } + + } /* loop over the parts in ci. */ + + } /* loop over the parts in cj. */ + + TIMER_TOC(TIMER_DOPAIR); +} + +/** + * @brief Compute the interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void DOPAIR2_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + if (!cell_is_drifted(ci, e)) cell_drift(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift(cj, e); + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + space_getsid(e->s, &ci, &cj, shift); + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + if (!part_is_active(pi, e)) continue; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + const float hjg2 = pj->h * pj->h * kernel_gamma2; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2 || r2 < hjg2) { + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pj = &parts_j[pjd]; + if (!part_is_active(pj, e)) continue; + const float hj = pj->h; + + double pjx[3]; + for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + const float hjg2 = hj * hj * kernel_gamma2; + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pi = &parts_i[pid]; + const float hig2 = pi->h * pi->h * kernel_gamma2; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pjx[k] - pi->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hjg2 || r2 < hig2) { + IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); + } + + } /* loop over the parts in ci. */ + + } /* loop over the parts in cj. */ + + TIMER_TOC(TIMER_DOPAIR); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param parts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param count The number of particles in @c ind. + * @param cj The second #cell. + */ +void DOPAIR_SUBSET_NOSORT(struct runner *r, struct cell *restrict ci, + struct part *restrict parts_i, int *restrict ind, + int count, struct cell *restrict cj) { + + struct engine *e = r->e; + + TIMER_TIC; + + const int count_j = cj->count; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the parts_i. */ + for (int pid = 0; pid < count; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[ind[pid]]; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; + + if (!part_is_active(pi, e)) + error("Trying to correct smoothing length of inactive particle !"); + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ + + TIMER_TOC(timer_dopair_subset); +} diff --git a/src/scheduler.c b/src/scheduler.c index de3f9f51257a0a32811819e2bc6982f3176208a9..dd9b92b60daa5fd7e5df78e26c9266b5bd4c39e4 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -135,8 +135,11 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) { t->type = task_type_none; + t->subtype = task_subtype_none; + t->cj = NULL; t->skip = 1; break; } @@ -692,6 +695,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, enum task_subtypes subtype, int flags, int wait, struct cell *ci, struct cell *cj, int tight) { +#ifdef SWIFT_DEBUG_CHECKS + if (ci == NULL && cj != NULL) + error("Added a task with ci==NULL and cj!=NULL type=%s/%s", + taskID_names[type], subtaskID_names[subtype]); +#endif + /* Get the next free task. */ const int ind = atomic_inc(&s->tasks_next); @@ -964,12 +973,18 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_ghost: if (t->ci == t->ci->super) cost = wscale * t->ci->count; break; + case task_type_drift: + cost = wscale * t->ci->count; + break; case task_type_kick1: cost = wscale * t->ci->count; break; case task_type_kick2: cost = wscale * t->ci->count; break; + case task_type_timestep: + cost = wscale * t->ci->count; + break; case task_type_init: cost = wscale * t->ci->count; break; @@ -1070,6 +1085,11 @@ void scheduler_start(struct scheduler *s) { struct cell *ci = t->ci; struct cell *cj = t->cj; + if (t->type == task_type_none) continue; + + /* Don't check MPI stuff */ + if (t->type == task_type_send || t->type == task_type_recv) continue; + if (ci == NULL && cj == NULL) { if (t->type != task_type_grav_gather_m && t->type != task_type_grav_fft) @@ -1098,14 +1118,18 @@ void scheduler_start(struct scheduler *s) { } else { /* pair */ - if ((ci->ti_end_min == ti_current || cj->ti_end_min == ti_current) && - t->skip) - error( - "Task (type='%s/%s') should not have been skipped " - "ti_current=%lld " - "ci->ti_end_min=%lld cj->ti_end_min=%lld", - taskID_names[t->type], subtaskID_names[t->subtype], ti_current, - ci->ti_end_min, cj->ti_end_min); + if (t->skip) { + + /* Check that the pair is active if the local cell is active */ + if ((ci->ti_end_min == ti_current && ci->nodeID == engine_rank) || + (cj->ti_end_min == ti_current && cj->nodeID == engine_rank)) + error( + "Task (type='%s/%s') should not have been skipped " + "ti_current=%lld " + "ci->ti_end_min=%lld cj->ti_end_min=%lld", + taskID_names[t->type], subtaskID_names[t->subtype], ti_current, + ci->ti_end_min, cj->ti_end_min); + } } } } @@ -1153,7 +1177,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { /* Otherwise, look for a suitable queue. */ else { #ifdef WITH_MPI - int err; + int err = MPI_SUCCESS; #endif /* Find the previous owner for each task type, and do @@ -1166,6 +1190,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_kick1: case task_type_kick2: case task_type_drift: + case task_type_timestep: case task_type_init: qid = t->ci->super->owner; break; @@ -1179,19 +1204,26 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_recv: #ifdef WITH_MPI if (t->subtype == task_subtype_tend) { - t->buff = malloc(sizeof(int) * t->ci->pcell_size); - err = MPI_Irecv(t->buff, t->ci->pcell_size, MPI_INT, t->ci->nodeID, - t->flags, MPI_COMM_WORLD, &t->req); - } else { + t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size); + err = MPI_Irecv(t->buff, t->ci->pcell_size * sizeof(integertime_t), + MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, + &t->req); + } else if (t->subtype == task_subtype_xv || + t->subtype == task_subtype_rho) { err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + // message( "receiving %i parts with tag=%i from %i to %i." , + // t->ci->count , t->flags , t->ci->nodeID , s->nodeID ); + // fflush(stdout); + } else if (t->subtype == task_subtype_gpart) { + err = MPI_Irecv(t->ci->gparts, t->ci->gcount, gpart_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } else { + error("Unknown communication sub-type"); } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit irecv for particle data."); } - // message( "receiving %i parts with tag=%i from %i to %i." , - // t->ci->count , t->flags , t->ci->nodeID , s->nodeID ); - // fflush(stdout); qid = 1 % s->nr_queues; #else error("SWIFT was not compiled with MPI support."); @@ -1200,20 +1232,33 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_send: #ifdef WITH_MPI if (t->subtype == task_subtype_tend) { - t->buff = malloc(sizeof(int) * t->ci->pcell_size); + t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size); cell_pack_ti_ends(t->ci, t->buff); - err = MPI_Isend(t->buff, t->ci->pcell_size, MPI_INT, t->cj->nodeID, - t->flags, MPI_COMM_WORLD, &t->req); - } else { + err = MPI_Isend(t->buff, t->ci->pcell_size * sizeof(integertime_t), + MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, + &t->req); + } else if (t->subtype == task_subtype_xv || + t->subtype == task_subtype_rho) { +#ifdef SWIFT_DEBUG_CHECKS + for (int k = 0; k < t->ci->count; k++) + if (t->ci->parts[k].ti_drift != s->space->e->ti_current) + error("Sending un-drifted particle !"); +#endif err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + + // message( "sending %i parts with tag=%i from %i to %i." , + // t->ci->count , t->flags , s->nodeID , t->cj->nodeID ); + // fflush(stdout); + } else if (t->subtype == task_subtype_gpart) { + err = MPI_Isend(t->ci->gparts, t->ci->gcount, gpart_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + } else { + error("Unknown communication sub-type"); } if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit isend for particle data."); } - // message( "sending %i parts with tag=%i from %i to %i." , - // t->ci->count , t->flags , s->nodeID , t->cj->nodeID ); - // fflush(stdout); qid = 0; #else error("SWIFT was not compiled with MPI support."); diff --git a/src/space.c b/src/space.c index 5aff6bc17265a2c058366804fe8a1adfa0d898df..b7b3523e6a4dc345f3acdad3552482ec8738b8f3 100644 --- a/src/space.c +++ b/src/space.c @@ -473,7 +473,7 @@ void space_rebuild(struct space *s, int verbose) { /* Be verbose about this. */ #ifdef SWIFT_DEBUG_CHECKS - message("re)building space"); + if (s->e->nodeID == 0 || verbose) message("re)building space"); fflush(stdout); #endif @@ -845,6 +845,13 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); ind[k] = index; +#ifdef SWIFT_DEBUG_CHECKS + if (pos_x > dim_x || pos_y > dim_y || pos_z > pos_z || pos_x < 0. || + pos_y < 0. || pos_z < 0.) + error("Particle outside of simulation box. p->x=[%e %e %e]", pos_x, pos_y, + pos_z); +#endif + /* Update the position */ p->x[0] = pos_x; p->x[1] = pos_y; @@ -1008,7 +1015,7 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, if (ind[i - 1] > ind[i]) error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); - message("Sorting succeeded."); + if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); #endif /* Clean up. */ @@ -1186,7 +1193,7 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, if (ind[i - 1] > ind[i]) error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); - message("Sorting succeeded."); + if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); #endif /* Clean up. */ @@ -2037,6 +2044,19 @@ void space_check_drift_point(struct space *s, integertime_t ti_current) { space_map_cells_pre(s, 1, cell_check_drift_point, &ti_current); } +/** + * @brief Checks that all particles and local cells have a non-zero time-step. + */ +void space_check_timesteps(struct space *s) { +#ifdef SWIFT_DEBUG_CHECKS + + for (int i = 0; i < s->nr_cells; ++i) { + cell_check_timesteps(&s->cells_top[i]); + } + +#endif +} + /** * @brief Frees up the memory allocated for this #space */ diff --git a/src/space.h b/src/space.h index 67ff51cad34b0a9d1f4fd5492ab0e9d80ef064f2..3513c39e624570e8f1ad31ee711548bbec1a036d 100644 --- a/src/space.h +++ b/src/space.h @@ -186,6 +186,7 @@ void space_init_parts(struct space *s); void space_init_gparts(struct space *s); void space_link_cleanup(struct space *s); void space_check_drift_point(struct space *s, integertime_t ti_current); +void space_check_timesteps(struct space *s); void space_clean(struct space *s); #endif /* SWIFT_SPACE_H */ diff --git a/src/task.c b/src/task.c index ee4d92cf714aed2c23ae1795e053103aaf876e4e..0a078b40c3914419cef294b63a527b65d9bea077 100644 --- a/src/task.c +++ b/src/task.c @@ -55,7 +55,8 @@ const char *taskID_names[task_type_count] = { "sourceterms"}; const char *subtaskID_names[task_subtype_count] = { - "none", "density", "gradient", "force", "grav", "external_grav", "tend"}; + "none", "density", "gradient", "force", "grav", + "external_grav", "tend", "xv", "rho", "gpart"}; /** * @brief Computes the overlap between the parts array of two given cells. diff --git a/src/task.h b/src/task.h index 2294129ea6f8921f45e5488d2ff77b24c4c5c882..57fe4dcf506ba9dee47d8bc60931b49ab05e8a38 100644 --- a/src/task.h +++ b/src/task.h @@ -71,6 +71,9 @@ enum task_subtypes { task_subtype_grav, task_subtype_external_grav, task_subtype_tend, + task_subtype_xv, + task_subtype_rho, + task_subtype_gpart, task_subtype_count } __attribute__((packed)); diff --git a/src/timers.h b/src/timers.h index 6106b93fdb7e66c7c295c35aef6c20007554aa8a..1e7965f3fdda5a7d7aa9213a3c36abdc783efccb 100644 --- a/src/timers.h +++ b/src/timers.h @@ -60,7 +60,8 @@ enum { timer_dopair_subset, timer_do_ghost, timer_do_extra_ghost, - timer_dorecv_cell, + timer_dorecv_part, + timer_dorecv_gpart, timer_gettask, timer_qget, timer_qsteal, diff --git a/src/vector.h b/src/vector.h index b2b3049f6abdf2a8464cb516147b050a6f4fc3e1..5e7c978ce6c3df9b1fbc47be2a43ee76c85a352a 100644 --- a/src/vector.h +++ b/src/vector.h @@ -216,9 +216,15 @@ #define VEC_INT __m128i #define vec_load(a) _mm_load_ps(a) #define vec_store(a, addr) _mm_store_ps(addr, a) +#define vec_setzero() _mm_setzero_ps() +#define vec_setintzero() _mm_setzero_si256() #define vec_set1(a) _mm_set1_ps(a) +#define vec_setint1(a) _mm_set1_epi32(a) #define vec_set(a, b, c, d) _mm_set_ps(d, c, b, a) #define vec_dbl_set(a, b) _mm_set_pd(b, a) +#define vec_add(a, b) _mm_add_ps(a, b) +#define vec_sub(a, b) _mm_sub_ps(a, b) +#define vec_mul(a, b) _mm_mul_ps(a, b) #define vec_sqrt(a) _mm_sqrt_ps(a) #define vec_rcp(a) _mm_rcp_ps(a) #define vec_rsqrt(a) _mm_rsqrt_ps(a) @@ -227,9 +233,11 @@ #define vec_fmax(a, b) _mm_max_ps(a, b) #define vec_fabs(a) _mm_andnot_ps(_mm_set1_ps(-0.f), a) #define vec_floor(a) _mm_floor_ps(a) +#define vec_cmp_gt(a, b) _mm_cmpgt_ps(a, b) #define vec_cmp_lt(a, b) _mm_cmplt_ps(a, b) #define vec_cmp_lte(a, b) _mm_cmp_ps(a, b, _CMP_LE_OQ) #define vec_cmp_result(a) _mm_movemask_ps(a) +#define vec_and(a, b) _mm_and_ps(a, b) #define vec_todbl_lo(a) _mm_cvtps_pd(a) #define vec_todbl_hi(a) _mm_cvtps_pd(_mm_movehl_ps(a, a)) #define vec_dbl_tofloat(a, b) _mm_movelh_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b)) @@ -243,6 +251,12 @@ #define vec_dbl_fmax(a, b) _mm_max_pd(a, b) #define FILL_VEC(a) \ { .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a } +#define VEC_HADD(a, b) \ + a.v = _mm_hadd_ps(a.v, a.v); \ + b += a.f[0] + a.f[1]; +#ifndef vec_fma +#define vec_fma(a, b, c) vec_add(vec_mul(a, b), c) +#endif #else #define VEC_SIZE 4 #endif diff --git a/tests/Makefile.am b/tests/Makefile.am index 966df5af7dd11057fb79cf7cd5b155f713752b1c..0db5c2544433012dcd7f451f535391aa81b1f802 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -31,7 +31,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testSPHStep testPair test27cells test125cells testParser \ testKernel testKernelGrav testFFT testInteractions testMaths \ - testSymmetry testThreadpool \ + testSymmetry testThreadpool benchmarkInteractions \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ testRiemannHLLC testMatrixInversion testDump testLogger @@ -66,6 +66,8 @@ testFFT_SOURCES = testFFT.c testInteractions_SOURCES = testInteractions.c +benchmarkInteractions_SOURCES = benchmarkInteractions.c + testAdiabaticIndex_SOURCES = testAdiabaticIndex.c testRiemannExact_SOURCES = testRiemannExact.c diff --git a/tests/benchmarkInteractions.c b/tests/benchmarkInteractions.c new file mode 100644 index 0000000000000000000000000000000000000000..be23fe0d8d00aa35a37747b049750d6f3b31fa92 --- /dev/null +++ b/tests/benchmarkInteractions.c @@ -0,0 +1,496 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "swift.h" + +#define array_align sizeof(float) * VEC_SIZE +#define ACC_THRESHOLD 1e-5 + +#ifdef NONSYM_DENSITY +#define IACT runner_iact_nonsym_density +#define IACT_VEC runner_iact_nonsym_2_vec_density +#define IACT_NAME "test_nonsym_density" +#endif + +#ifdef SYM_DENSITY +#define IACT runner_iact_density +#define IACT_VEC runner_iact_vec_density +#define IACT_NAME "test_sym_density" +#endif + +#ifdef NONSYM_FORCE +#define IACT runner_iact_nonsym_force +#define IACT_VEC runner_iact_nonsym_vec_force +#define IACT_NAME "test_nonsym_force" +#endif + +#ifdef SYM_FORCE +#define IACT runner_iact_force +#define IACT_VEC runner_iact_vec_force +#define IACT_NAME "test_sym_force" +#endif + +#ifndef IACT +#define IACT runner_iact_nonsym_density +#define IACT_VEC runner_iact_nonsym_2_vec_density +#define IACT_NAME "test_nonsym_density" +#endif + +/** + * @brief Constructs an array of particles in a valid state prior to + * a IACT_NONSYM and IACT_NONSYM_VEC call. + * + * @param count No. of particles to create + * @param offset The position of the particle offset from (0,0,0). + * @param spacing Particle spacing. + * @param h The smoothing length of the particles in units of the inter-particle + *separation. + * @param partId The running counter of IDs. + */ +struct part *make_particles(size_t count, double *offset, double spacing, + double h, long long *partId) { + + struct part *particles; + if (posix_memalign((void **)&particles, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(particles, count * sizeof(struct part)); + + /* Construct the particles */ + struct part *p; + + /* Set test particle at centre of unit sphere. */ + p = &particles[0]; + + /* Place the test particle at the centre of a unit sphere. */ + p->x[0] = 0.0f; + p->x[1] = 0.0f; + p->x[2] = 0.0f; + + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + + /* Place rest of particles around the test particle + * with random position within a unit sphere. */ + for (size_t i = 1; i < count; ++i) { + p = &particles[i]; + + /* Randomise positions within a unit sphere. */ + p->x[0] = random_uniform(-1.0, 1.0); + p->x[1] = random_uniform(-1.0, 1.0); + p->x[2] = random_uniform(-1.0, 1.0); + + /* Randomise velocities. */ + p->v[0] = random_uniform(-0.05, 0.05); + p->v[1] = random_uniform(-0.05, 0.05); + p->v[2] = random_uniform(-0.05, 0.05); + + p->h = h; + p->id = ++(*partId); + p->mass = 1.0f; + } + return particles; +} + +/** + * @brief Populates particle properties needed for the force calculation. + */ +void prepare_force(struct part *parts, size_t count) { + + struct part *p; + for (size_t i = 0; i < count; ++i) { + p = &parts[i]; + p->rho = i + 1; + p->force.balsara = random_uniform(0.0, 1.0); + p->force.P_over_rho2 = i + 1; + p->force.soundspeed = random_uniform(2.0, 3.0); + p->force.v_sig = 0.0f; + p->force.h_dt = 0.0f; + } +} + +/** + * @brief Dumps all particle information to a file + */ +void dump_indv_particle_fields(char *fileName, struct part *p) { + + FILE *file = fopen(fileName, "a"); + + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e " + "%13e %13e %13e %13e " + "%13e %13e %13e\n", + p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], + p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho, + p->density.rho_dh, p->density.wcount, p->density.wcount_dh, + p->force.h_dt, p->force.v_sig, +#if defined(MINIMAL_SPH) + 0., 0., 0., 0. +#else + p->density.div_v, p->density.rot_v[0], p->density.rot_v[1], + p->density.rot_v[2] +#endif + ); + fclose(file); +} + +/** + * @brief Creates a header for the output file + */ +void write_header(char *fileName) { + + FILE *file = fopen(fileName, "w"); + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s " + "%13s %13s %13s %13s" + "%13s %13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y", + "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig", + "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt"); + fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n"); + fclose(file); +} + +/** + * @brief Compares the vectorised result against + * the serial result of the interaction. + * + * @param serial_test_part Particle that has been updated serially + * @param serial_parts Particle array that has been interacted serially + * @param vec_test_part Particle that has been updated using vectors + * @param vec_parts Particle array to be interacted using vectors + * @param count No. of particles that have been interacted + * + * @return Non-zero value if difference found, 0 otherwise + */ +int check_results(struct part serial_test_part, struct part *serial_parts, + struct part vec_test_part, struct part *vec_parts, + int count) { + int result = 0; + result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD); + + for (int i = 0; i < count; i++) + result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD); + + return result; +} + +/* + * @brief Calls the serial and vectorised version of the non-symmetrical density + * interaction. + * + * @param test_part Particle that will be updated + * @param parts Particle array to be interacted + * @param count No. of particles to be interacted + * @param serial_inter_func Serial interaction function to be called + * @param vec_inter_func Vectorised interaction function to be called + * @param runs No. of times to call interactions + * + */ +void test_interactions(struct part test_part, struct part *parts, size_t count, + char *filePrefix, int runs) { + + ticks serial_time = 0; +#ifdef WITH_VECTORIZATION + ticks vec_time = 0; +#endif + + FILE *file; + char serial_filename[200] = ""; + char vec_filename[200] = ""; + + strcpy(serial_filename, filePrefix); + strcpy(vec_filename, filePrefix); + sprintf(serial_filename + strlen(serial_filename), "_serial.dat"); + sprintf(vec_filename + strlen(vec_filename), "_vec.dat"); + + write_header(serial_filename); + write_header(vec_filename); + + struct part pi_serial, pi_vec; + struct part pj_serial[count], pj_vec[count]; + + float r2[count] __attribute__((aligned(array_align))); + float dx[3 * count] __attribute__((aligned(array_align))); + + struct part *piq[count], *pjq[count]; + for (size_t k = 0; k < count; k++) { + piq[k] = NULL; + pjq[k] = NULL; + } + +#ifdef WITH_VECTORIZATION + float r2q[count] __attribute__((aligned(array_align))); + float hiq[count] __attribute__((aligned(array_align))); + float dxq[count] __attribute__((aligned(array_align))); + + float dyq[count] __attribute__((aligned(array_align))); + float dzq[count] __attribute__((aligned(array_align))); + float mjq[count] __attribute__((aligned(array_align))); + float vixq[count] __attribute__((aligned(array_align))); + float viyq[count] __attribute__((aligned(array_align))); + float vizq[count] __attribute__((aligned(array_align))); + float vjxq[count] __attribute__((aligned(array_align))); + float vjyq[count] __attribute__((aligned(array_align))); + float vjzq[count] __attribute__((aligned(array_align))); +#endif + + /* Call serial interaction a set number of times. */ + for (int k = 0; k < runs; k++) { + /* Reset particle to initial setup */ + pi_serial = test_part; + for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i]; + + /* Only dump data on first run. */ + if (k == 0) { + /* Dump state of particles before serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi_serial); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(serial_filename, &pj_serial[i]); + } + + /* Perform serial interaction */ + for (size_t i = 0; i < count; i++) { + /* Compute the pairwise distance. */ + r2[i] = 0.0f; + for (int k = 0; k < 3; k++) { + int ind = (3 * i) + k; + dx[ind] = pi_serial.x[k] - pj_serial[i].x[k]; + r2[i] += dx[ind] * dx[ind]; + } + } + + const ticks tic = getticks(); +/* Perform serial interaction */ +#ifdef __ICC +#pragma novector +#endif + for (size_t i = 0; i < count; i++) { + IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial, + &pj_serial[i]); + } + serial_time += getticks() - tic; + } + + file = fopen(serial_filename, "a"); + fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(serial_filename, &pi_serial); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(serial_filename, &pj_serial[i]); + + /* Call vector interaction a set number of times. */ + for (int k = 0; k < runs; k++) { + /* Reset particle to initial setup */ + pi_vec = test_part; + for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i]; + + /* Setup arrays for vector interaction. */ + for (size_t i = 0; i < count; i++) { + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pi_vec.x[k] - pj_vec[i].x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef WITH_VECTORIZATION + r2q[i] = r2; + dxq[i] = dx[0]; + hiq[i] = pi_vec.h; + piq[i] = &pi_vec; + pjq[i] = &pj_vec[i]; + + dyq[i] = dx[1]; + dzq[i] = dx[2]; + mjq[i] = pj_vec[i].mass; + vixq[i] = pi_vec.v[0]; + viyq[i] = pi_vec.v[1]; + vizq[i] = pi_vec.v[2]; + vjxq[i] = pj_vec[i].v[0]; + vjyq[i] = pj_vec[i].v[1]; + vjzq[i] = pj_vec[i].v[2]; +#endif + } + + /* Only dump data on first run. */ + if (k == 0) { + /* Dump state of particles before vector interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); + } + +/* Perform vector interaction. */ +#ifdef WITH_VECTORIZATION + vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, mask, mask2; + vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, + curlvySum, curlvzSum; + + rhoSum.v = vec_set1(0.f); + rho_dhSum.v = vec_set1(0.f); + wcountSum.v = vec_set1(0.f); + wcount_dhSum.v = vec_set1(0.f); + div_vSum.v = vec_set1(0.f); + curlvxSum.v = vec_set1(0.f); + curlvySum.v = vec_set1(0.f); + curlvzSum.v = vec_set1(0.f); + + hi_vec.v = vec_load(&hiq[0]); + vix_vec.v = vec_load(&vixq[0]); + viy_vec.v = vec_load(&viyq[0]); + viz_vec.v = vec_load(&vizq[0]); + + hi_inv_vec = vec_reciprocal(hi_vec); + mask.m = vec_setint1(0xFFFFFFFF); + mask2.m = vec_setint1(0xFFFFFFFF); + +#ifdef HAVE_AVX512_F + KNL_MASK_16 knl_mask, knl_mask2; + knl_mask = 0xFFFF; + knl_mask2 = 0xFFFF; +#endif + + const ticks vec_tic = getticks(); + + for (size_t i = 0; i < count; i += 2 * VEC_SIZE) { + + IACT_VEC(&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec), + (vix_vec), (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]), + &(vjzq[i]), &(mjq[i]), &rhoSum, &rho_dhSum, &wcountSum, + &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, + mask, mask2, +#ifdef HAVE_AVX512_F + knl_mask, knl_mask2); +#else + 0, 0); +#endif + } + + VEC_HADD(rhoSum, piq[0]->rho); + VEC_HADD(rho_dhSum, piq[0]->density.rho_dh); + VEC_HADD(wcountSum, piq[0]->density.wcount); + VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh); + VEC_HADD(div_vSum, piq[0]->density.div_v); + VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]); + VEC_HADD(curlvySum, piq[0]->density.rot_v[1]); + VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]); + + vec_time += getticks() - vec_tic; +#endif + } + + file = fopen(vec_filename, "a"); + fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n"); + fclose(file); + + /* Dump result of serial interaction. */ + dump_indv_particle_fields(vec_filename, piq[0]); + for (size_t i = 0; i < count; i++) + dump_indv_particle_fields(vec_filename, pjq[i]); + +#ifdef WITH_VECTORIZATION + /* Check serial results against the vectorised results. */ + if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count)) + message("Differences found..."); +#endif + + message("The serial interactions took : %15lli ticks.", + serial_time / runs); +#ifdef WITH_VECTORIZATION + message("The vectorised interactions took : %15lli ticks.", vec_time / runs); + message("Speed up: %15fx.", (double)(serial_time) / vec_time); +#endif +} + +/* And go... */ +int main(int argc, char *argv[]) { + size_t runs = 10000; + double h = 1.0, spacing = 0.5; + double offset[3] = {0.0, 0.0, 0.0}; + size_t count = 256; + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 's': + sscanf(optarg, "%lf", &spacing); + case 'n': + sscanf(optarg, "%zu", &count); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || spacing < 0) { + printf( + "\nUsage: %s [OPTIONS...]\n" + "\nGenerates a particle array with equal particle separation." + "\nThese are then interacted using runner_iact_density and " + "runner_iact_vec_density." + "\n\nOptions:" + "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-s SPACING=0.5 - Spacing between particles" + "\n-n NUMBER=9 - No. of particles", + argv[0]); + exit(1); + } + + /* Correct count so that VEC_SIZE of particles interact with the test + * particle. */ + count = count - (count % VEC_SIZE) + 1; + + /* Build the infrastructure */ + static long long partId = 0; + struct part test_particle; + struct part *particles = make_particles(count, offset, spacing, h, &partId); + +#if defined(NONSYM_FORCE) || defined(SYM_FORCE) + prepare_force(particles, count); +#endif + + test_particle = particles[0]; + /* Call the non-sym density test. */ + message("Testing %s interaction...", IACT_NAME); + test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs); + + return 0; +} diff --git a/tests/tolerance_27_normal.dat b/tests/tolerance_27_normal.dat index dd93f7191d6c0ad38e3de43de3abebf544d555aa..9c7ca10414507746b41e453d75426a072f989d2e 100644 --- a/tests/tolerance_27_normal.dat +++ b/tests/tolerance_27_normal.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 4e-5 2e-4 2e-3 7e-6 5e-6 5e-6 5e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 4e-5 2e-4 2e-3 8e-6 6e-6 6e-6 6e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-4 1e-4 1e-4 2e-4 1e-4 1e-4 1e-4 diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat index 478bfd94cbbbbabea791eaab9ca2966b4c52b921..53de4ec7632039a56a3757488881e890296e3ac8 100644 --- a/tests/tolerance_27_perturbed.dat +++ b/tests/tolerance_27_perturbed.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 4e-5 2e-3 3.1e-6 3e-6 3e-6 3e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 5e-3 1e-5 1e-4 2e-5 2e-3 2e-3 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 5e-5 2e-3 3.1e-6 3e-6 3e-6 3e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-2 1e-5 1e-4 2e-5 2e-3 2e-3 2e-3