diff --git a/.gitignore b/.gitignore index 7d6d9021f12ebfcb837d19c443362f1ecbc4077f..28a830818af36faad3f4278c6adcba5562b59ee7 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,8 @@ tests/brute_force_27_perturbed.dat tests/swift_dopair_27_perturbed.dat tests/brute_force_125_standard.dat tests/swift_dopair_125_standard.dat +tests/brute_force_125_perturbed.dat +tests/swift_dopair_125_perturbed.dat tests/testGreetings tests/testReading tests/input.hdf5 @@ -65,6 +67,7 @@ tests/parser_output.yml tests/test27cells.sh tests/test27cellsPerturbed.sh tests/test125cells.sh +tests/test125cellsPerturbed.sh tests/testPair.sh tests/testPairPerturbed.sh tests/testParser.sh diff --git a/configure.ac b/configure.ac index 8a2d0f30ae297993b34153bc9a4c04085f4748f5..788bb57eed801c1a1dff2204b57b34c4fadf3b58 100644 --- a/configure.ac +++ b/configure.ac @@ -853,6 +853,7 @@ AC_CONFIG_FILES([tests/testPairPerturbed.sh], [chmod +x tests/testPairPerturbed. AC_CONFIG_FILES([tests/test27cells.sh], [chmod +x tests/test27cells.sh]) AC_CONFIG_FILES([tests/test27cellsPerturbed.sh], [chmod +x tests/test27cellsPerturbed.sh]) AC_CONFIG_FILES([tests/test125cells.sh], [chmod +x tests/test125cells.sh]) +AC_CONFIG_FILES([tests/test125cellsPerturbed.sh], [chmod +x tests/test125cellsPerturbed.sh]) AC_CONFIG_FILES([tests/testParser.sh], [chmod +x tests/testParser.sh]) # Save the compilation options diff --git a/examples/DiscPatch/HydroStatic/plot.py b/examples/DiscPatch/HydroStatic/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..2de749f9e3b3c287390218e09ea347d660f9ce8a --- /dev/null +++ b/examples/DiscPatch/HydroStatic/plot.py @@ -0,0 +1,103 @@ +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2017 Bert Vandenbroucke (bert.vandenbroucke@gmail.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +## +# This script plots the Disc-Patch_*.hdf5 snapshots. +# It takes two (optional) parameters: the counter value of the first and last +# snapshot to plot (default: 0 81). +## + +import numpy as np +import h5py +import matplotlib +matplotlib.use("Agg") +import pylab as pl +import glob +import sys + +# Parameters +surface_density = 10. +scale_height = 100. +z_disc = 200. +utherm = 20.2615290634 +gamma = 5. / 3. + +start = 0 +stop = 81 +if len(sys.argv) > 1: + start = int(sys.argv[1]) +if len(sys.argv) > 2: + stop = int(sys.argv[2]) + +# Get the analytic solution for the density +def get_analytic_density(x): + return 0.5 * surface_density / scale_height / \ + np.cosh( (x - z_disc) / scale_height )**2 + +# Get the analytic solution for the (isothermal) pressure +def get_analytic_pressure(x): + return (gamma - 1.) * utherm * get_analytic_density(x) + +# Get the data fields to plot from the snapshot file with the given name: +# snapshot time, z-coord, density, pressure, velocity norm +def get_data(name): + file = h5py.File(name, "r") + coords = np.array(file["/PartType0/Coordinates"]) + rho = np.array(file["/PartType0/Density"]) + u = np.array(file["/PartType0/InternalEnergy"]) + v = np.array(file["/PartType0/Velocities"]) + + P = (gamma - 1.) * rho * u + + vtot = np.sqrt( v[:,0]**2 + v[:,1]**2 + v[:,2]**2 ) + + return float(file["/Header"].attrs["Time"]), coords[:,2], rho, P, vtot + +# scan the folder for snapshot files and plot all of them (within the requested +# range) +for f in sorted(glob.glob("Disc-Patch_*.hdf5")): + num = int(f[-8:-5]) + if num < start or num > stop: + continue + + print "processing", f, "..." + + zrange = np.linspace(0., 400., 1000) + time, z, rho, P, v = get_data(f) + + fig, ax = pl.subplots(3, 1, sharex = True) + + ax[0].plot(z, rho, "r.") + ax[0].plot(zrange, get_analytic_density(zrange), "k-") + ax[0].set_ylabel("density") + + ax[1].plot(z, v, "r.") + ax[1].plot(zrange, np.zeros(len(zrange)), "k-") + ax[1].set_ylabel("velocity norm") + + ax[2].plot(z, P, "r.") + ax[2].plot(zrange, get_analytic_pressure(zrange), "k-") + ax[2].set_xlim(0., 400.) + ax[2].set_xlabel("z") + ax[2].set_ylabel("pressure") + + pl.suptitle("t = {0:.2f}".format(time)) + + pl.savefig("{name}.png".format(name = f[:-5])) + pl.close() diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml index 69010d06c6b2c02dd982c8f22c58778691d8bdca..7d07b2cef22f2a23b7d66af79b3ef1306df2de01 100644 --- a/examples/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_12/eagle_12.yml @@ -31,8 +31,6 @@ Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. theta: 0.7 # Opening angle (Multipole acceptance criterion) epsilon: 0.0001 # Softening length (in internal units). - a_smooth: 1000. - r_cut: 4. # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/HydrostaticHalo/density_profile.py b/examples/HydrostaticHalo/density_profile.py index d0afd399f951cf3b727e869ca8571a3a802c2e8d..5248587ec343d3c0ffe2cef0cbd8716b9a1e055c 100644 --- a/examples/HydrostaticHalo/density_profile.py +++ b/examples/HydrostaticHalo/density_profile.py @@ -1,6 +1,27 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + import numpy as np import h5py as h5 -import matplotlib.pyplot as plt +import matplotlib +matplotlib.use("Agg") +from pylab import * import sys #for the plotting @@ -46,7 +67,8 @@ for i in range(n_snaps): f = h5.File(filename,'r') coords_dset = f["PartType0/Coordinates"] coords = np.array(coords_dset) -#translate coords by centre of box + + #translate coords by centre of box header = f["Header"] snap_time = header.attrs["Time"] snap_time_cgs = snap_time * unit_time_cgs @@ -63,58 +85,46 @@ for i in range(n_snaps): bin_width = bin_edges[1] - bin_edges[0] hist = np.histogram(r,bins = bin_edges)[0] # number of particles in each bin -#find the mass in each radial bin + #find the mass in each radial bin mass_dset = f["PartType0/Masses"] -#mass of each particles should be equal + + #mass of each particles should be equal part_mass = np.array(mass_dset)[0] part_mass_cgs = part_mass * unit_mass_cgs part_mass_over_virial_mass = part_mass_cgs / M_vir_cgs mass_hist = hist * part_mass_over_virial_mass radial_bin_mids = np.linspace(bin_width/2.,max_r - bin_width/2.,n_radial_bins) -#volume in each radial bin + + #volume in each radial bin volume = 4.*np.pi * radial_bin_mids**2 * bin_width -#now divide hist by the volume so we have a density in each bin + #now divide hist by the volume so we have a density in each bin density = mass_hist / volume - ##read the densities - - # density_dset = f["PartType0/Density"] - # density = np.array(density_dset) - # density_cgs = density * unit_mass_cgs / unit_length_cgs**3 - # rho = density_cgs * r_vir_cgs**3 / M_vir_cgs - t = np.linspace(10./n_radial_bins,10.0,1000) rho_analytic = t**(-2)/(4.*np.pi) - #calculate cooling radius - - #r_cool_over_r_vir = np.sqrt((2.*(gamma - 1.)*lambda_cgs*M_vir_cgs*X_H**2)/(4.*np.pi*CONST_m_H_CGS**2*v_c_cgs**2*r_vir_cgs**3))*np.sqrt(snap_time_cgs) - #initial analytic density profile - + #initial analytic density profile if (i == 0): r_0 = radial_bin_mids[0] rho_0 = density[0] - rho_analytic_init = rho_0 * (radial_bin_mids/r_0)**(-2) - plt.plot(radial_bin_mids,density/rho_analytic_init,'ko',label = "Average density of shell") - #plt.plot(t,rho_analytic,label = "Initial analytic density profile" - plt.xlabel(r"$r / r_{vir}$") - plt.ylabel(r"$\rho / \rho_{init})$") - plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c)) - #plt.ylim((1.e-2,1.e1)) - #plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,20),'r',label = "Cooling radius") - plt.xlim((radial_bin_mids[0],max_r)) - plt.ylim((0,20)) - plt.plot((0,max_r),(1,1)) - #plt.xscale('log') - #plt.yscale('log') - plt.legend(loc = "upper right") + + figure() + plot(radial_bin_mids,density/rho_analytic_init,'ko',label = "Average density of shell") + #plot(t,rho_analytic,label = "Initial analytic density profile") + xlabel(r"$r / r_{vir}$") + ylabel(r"$\rho / \rho_{init}$") + title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c)) + xlim((radial_bin_mids[0],max_r)) + ylim((0,2)) + plot((0,max_r),(1,1)) + legend(loc = "upper right") plot_filename = "./plots/density_profile/density_profile_%03d.png" %i - plt.savefig(plot_filename,format = "png") - plt.close() + savefig(plot_filename,format = "png") + close() diff --git a/examples/HydrostaticHalo/internal_energy_profile.py b/examples/HydrostaticHalo/internal_energy_profile.py index ea52cf8fc5fd098a46f05eaa58494529a868000c..f1be049adb8e972f89fd9ffe86106b1b9f3b19dc 100644 --- a/examples/HydrostaticHalo/internal_energy_profile.py +++ b/examples/HydrostaticHalo/internal_energy_profile.py @@ -1,6 +1,27 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + import numpy as np import h5py as h5 -import matplotlib.pyplot as plt +import matplotlib +matplotlib.use("Agg") +from pylab import * import sys def do_binning(x,y,x_bin_edges): @@ -48,8 +69,6 @@ unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"] unit_time_cgs = unit_length_cgs / unit_velocity_cgs v_c = float(params.attrs["IsothermalPotential:vrot"]) v_c_cgs = v_c * unit_velocity_cgs -#lambda_cgs = float(params.attrs["LambdaCooling:lambda_cgs"]) -#X_H = float(params.attrs["LambdaCooling:hydrogen_mass_abundance"]) header = f["Header"] N = header.attrs["NumPart_Total"][0] box_centre = np.array(header.attrs["BoxSize"]) @@ -64,7 +83,8 @@ for i in range(n_snaps): f = h5.File(filename,'r') coords_dset = f["PartType0/Coordinates"] coords = np.array(coords_dset) -#translate coords by centre of box + + #translate coords by centre of box header = f["Header"] snap_time = header.attrs["Time"] snap_time_cgs = snap_time * unit_time_cgs @@ -75,11 +95,11 @@ for i in range(n_snaps): radius_cgs = radius*unit_length_cgs radius_over_virial_radius = radius_cgs / r_vir_cgs -#get the internal energies + #get the internal energies u_dset = f["PartType0/InternalEnergy"] u = np.array(u_dset) -#make dimensionless + #make dimensionless u /= v_c**2/(2. * (gamma - 1.)) r = radius_over_virial_radius @@ -90,21 +110,16 @@ for i in range(n_snaps): radial_bin_mids = np.linspace(bin_widths / 2. , max_r - bin_widths / 2. , n_radial_bins) binned_u = u_totals / hist - #calculate cooling radius - - #r_cool_over_r_vir = np.sqrt((2.*(gamma - 1.)*lambda_cgs*M_vir_cgs*X_H**2)/(4.*np.pi*CONST_m_H_CGS**2*v_c_cgs**2*r_vir_cgs**3))*np.sqrt(snap_time_cgs) - - plt.plot(radial_bin_mids,binned_u,'ko',label = "Numerical solution") - #plt.plot((0,1),(1,1),label = "Analytic Solution") - #plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,2),'r',label = "Cooling radius") - plt.legend(loc = "lower right") - plt.xlabel(r"$r / r_{vir}$") - plt.ylabel(r"$u / (v_c^2 / (2(\gamma - 1)) $") - plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c)) - plt.ylim((0,2)) + figure() + plot(radial_bin_mids,binned_u,'ko',label = "Numerical solution") + legend(loc = "lower right") + xlabel(r"$r / r_{vir}$") + ylabel(r"$u / (v_c^2 / (2(\gamma - 1)) $") + title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c)) + ylim((0,2)) plot_filename = "./plots/internal_energy/internal_energy_profile_%03d.png" %i - plt.savefig(plot_filename,format = "png") - plt.close() + savefig(plot_filename,format = "png") + close() diff --git a/examples/HydrostaticHalo/run.sh b/examples/HydrostaticHalo/run.sh index d23ead6a67f43c9d19d76a797e72d050a3978d61..82584282559c1fceb0492aada671ff83fb74c924 100755 --- a/examples/HydrostaticHalo/run.sh +++ b/examples/HydrostaticHalo/run.sh @@ -1,11 +1,14 @@ #!/bin/bash # Generate the initial conditions if they are not present. -echo "Generating initial conditions for the isothermal potential box example..." -python makeIC.py 100000 +if [ ! -e Hydrostatic.hdf5 ] +then + echo "Generating initial conditions for the isothermal potential box example..." + python makeIC.py 100000 +fi # Run for 10 dynamical times -../swift -g -s -t 2 hydrostatic.yml 2>&1 | tee output.log +../swift -g -s -t 1 hydrostatic.yml 2>&1 | tee output.log echo "Plotting density profiles" mkdir plots diff --git a/examples/HydrostaticHalo/test_energy_conservation.py b/examples/HydrostaticHalo/test_energy_conservation.py index ca091050c4127d11a37a2cc7504e42d244031e25..8368d475813d248ca93c12e46737b062752ab779 100644 --- a/examples/HydrostaticHalo/test_energy_conservation.py +++ b/examples/HydrostaticHalo/test_energy_conservation.py @@ -1,6 +1,27 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + import numpy as np import h5py as h5 -import matplotlib.pyplot as plt +import matplotlib +matplotlib.use("Agg") +from pylab import * import sys n_snaps = int(sys.argv[1]) @@ -24,7 +45,7 @@ unit_mass_cgs = float(params.attrs["InternalUnitSystem:UnitMass_in_cgs"]) unit_length_cgs = float(params.attrs["InternalUnitSystem:UnitLength_in_cgs"]) unit_velocity_cgs = float(params.attrs["InternalUnitSystem:UnitVelocity_in_cgs"]) unit_time_cgs = unit_length_cgs / unit_velocity_cgs -v_c = float(params.attrs["SoftenedIsothermalPotential:vrot"]) +v_c = float(params.attrs["IsothermalPotential:vrot"]) v_c_cgs = v_c * unit_velocity_cgs header = f["Header"] N = header.attrs["NumPart_Total"][0] @@ -45,7 +66,8 @@ for i in range(n_snaps): f = h5.File(filename,'r') coords_dset = f["PartType0/Coordinates"] coords = np.array(coords_dset) -#translate coords by centre of box + + #translate coords by centre of box header = f["Header"] snap_time = header.attrs["Time"] snap_time_cgs = snap_time * unit_time_cgs @@ -73,7 +95,6 @@ for i in range(n_snaps): internal_energy_array = np.append(internal_energy_array,total_internal_energy) #put energies in units of v_c^2 and rescale by number of particles - pe = potential_energy_array / (N*v_c**2) ke = kinetic_energy_array / (N*v_c**2) ie = internal_energy_array / (N*v_c**2) @@ -82,14 +103,15 @@ te = pe + ke + ie dyn_time_cgs = r_vir_cgs / v_c_cgs time_array = time_array_cgs / dyn_time_cgs -plt.plot(time_array,ke,label = "Kinetic Energy") -plt.plot(time_array,pe,label = "Potential Energy") -plt.plot(time_array,ie,label = "Internal Energy") -plt.plot(time_array,te,label = "Total Energy") -plt.legend(loc = "lower right") -plt.xlabel(r"$t / t_{dyn}$") -plt.ylabel(r"$E / v_c^2$") -plt.title(r"$%d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(N,v_c)) -plt.ylim((-2,2)) -plt.savefig("energy_conservation.png",format = 'png') +figure() +plot(time_array,ke,label = "Kinetic Energy") +plot(time_array,pe,label = "Potential Energy") +plot(time_array,ie,label = "Internal Energy") +plot(time_array,te,label = "Total Energy") +legend(loc = "lower right") +xlabel(r"$t / t_{dyn}$") +ylabel(r"$E / v_c^2$") +title(r"$%d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(N,v_c)) +ylim((-2,2)) +savefig("energy_conservation.png",format = 'png') diff --git a/examples/HydrostaticHalo/velocity_profile.py b/examples/HydrostaticHalo/velocity_profile.py index 9133195d942233514148aa419003ee0ab7923494..f8f607362846a323937a9203dab8bc228f52a149 100644 --- a/examples/HydrostaticHalo/velocity_profile.py +++ b/examples/HydrostaticHalo/velocity_profile.py @@ -1,6 +1,27 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Stefan Arridge (stefan.arridge@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + import numpy as np import h5py as h5 -import matplotlib.pyplot as plt +import matplotlib +matplotlib.use("Agg") +from pylab import * import sys def do_binning(x,y,x_bin_edges): @@ -62,7 +83,8 @@ for i in range(n_snaps): f = h5.File(filename,'r') coords_dset = f["PartType0/Coordinates"] coords = np.array(coords_dset) -#translate coords by centre of box + + #translate coords by centre of box header = f["Header"] snap_time = header.attrs["Time"] snap_time_cgs = snap_time * unit_time_cgs @@ -73,16 +95,15 @@ for i in range(n_snaps): radius_cgs = radius*unit_length_cgs radius_over_virial_radius = radius_cgs / r_vir_cgs -#get the internal energies + #get the internal energies vel_dset = f["PartType0/Velocities"] vel = np.array(vel_dset) -#make dimensionless + #make dimensionless vel /= v_c r = radius_over_virial_radius #find radial component of velocity - v_r = np.zeros(r.size) for j in range(r.size): v_r[j] = -np.dot(coords[j,:],vel[j,:])/radius[j] @@ -94,18 +115,13 @@ for i in range(n_snaps): radial_bin_mids = np.linspace(bin_widths / 2. , max_r - bin_widths / 2. , n_radial_bins) binned_v_r = v_r_totals / hist - #calculate cooling radius - - #r_cool_over_r_vir = np.sqrt((2.*(gamma - 1.)*lambda_cgs*M_vir_cgs*X_H**2)/(4.*np.pi*CONST_m_H_CGS**2*v_c_cgs**2*r_vir_cgs**3))*np.sqrt(snap_time_cgs) - - plt.plot(radial_bin_mids,binned_v_r,'ko',label = "Average radial velocity in shell") - #plt.plot((0,1),(1,1),label = "Analytic Solution") - #plt.plot((r_cool_over_r_vir,r_cool_over_r_vir),(0,2),'r',label = "Cooling radius") - plt.legend(loc = "upper right") - plt.xlabel(r"$r / r_{vir}$") - plt.ylabel(r"$v_r / v_c$") - plt.title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c)) - plt.ylim((0,2)) + figure() + plot(radial_bin_mids,binned_v_r,'ko',label = "Average radial velocity in shell") + legend(loc = "upper right") + xlabel(r"$r / r_{vir}$") + ylabel(r"$v_r / v_c$") + title(r"$\mathrm{Time}= %.3g \, s \, , \, %d \, \, \mathrm{particles} \,,\, v_c = %.1f \, \mathrm{km / s}$" %(snap_time_cgs,N,v_c)) + ylim((-1,1)) plot_filename = "./plots/radial_velocity_profile/velocity_profile_%03d.png" %i - plt.savefig(plot_filename,format = "png") - plt.close() + savefig(plot_filename,format = "png") + close() diff --git a/examples/UniformDMBox/makeIC.py b/examples/UniformDMBox/makeIC.py index 8e032500016eb6cc8e0decc54968bb5b841d7f93..8f3cd943b3cf19c4ae231d125c5ef97d076e0e8e 100644 --- a/examples/UniformDMBox/makeIC.py +++ b/examples/UniformDMBox/makeIC.py @@ -26,7 +26,7 @@ from numpy import * # with a density of 1 # Parameters -periodic= 0 # 1 For periodic box +periodic= 1 # 1 For periodic box boxSize = 1. rho = 1. L = int(sys.argv[1]) # Number of particles along one axis diff --git a/examples/UniformDMBox/uniformBox.yml b/examples/UniformDMBox/uniformBox.yml index 8d9ec300164a7bf8f3df257c34ee44d4f77fe94e..cffd442a9a5b16d8e042e41caf9991fcf0e1202e 100644 --- a/examples/UniformDMBox/uniformBox.yml +++ b/examples/UniformDMBox/uniformBox.yml @@ -35,4 +35,4 @@ Statistics: # Parameters related to the initial conditions InitialConditions: - file_name: ./uniformDMBox_100.hdf5 # The file to read + file_name: ./uniformDMBox_50.hdf5 # The file to read diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py new file mode 100755 index 0000000000000000000000000000000000000000..04cd59feedba7ee41621ac0891d544c4aa294543 --- /dev/null +++ b/examples/analyse_tasks.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +""" +Usage: + analsyse_tasks.py [options] input.dat + +where input.dat is a thread info file for a step. Use the '-y interval' flag +of the swift command to create these. + +The output is an analysis of the task timings, including deadtime per thread +and step, total amount of time spent for each task type, for the whole step +and per thread and the minimum and maximum times spent per task type. + +This file is part of SWIFT. +Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import matplotlib +matplotlib.use("Agg") +import matplotlib.collections as collections +import matplotlib.ticker as plticker +import pylab as pl +import sys +import argparse + +# Handle the command line. +parser = argparse.ArgumentParser(description="Analyse task dumps") + +parser.add_argument("input", help="Thread data file (-y output)") +parser.add_argument("-v", "--verbose", dest="verbose", + help="Verbose output (default: False)", + default=False, action="store_true") + +args = parser.parse_args() +infile = args.input + +# Tasks and subtypes. Indexed as in tasks.h. +TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", + "init_grav", "ghost", "extra_ghost", "drift_part", + "drift_gpart", "kick1", "kick2", "timestep", "send", "recv", + "grav_top_level", "grav_long_range", "grav_mm", "grav_down", + "cooling", "sourceterms", "count"] + +SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", + "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] + +# Read input. +data = pl.loadtxt( infile ) + +maxthread = int(max(data[:,0])) + 1 +print "# Maximum thread id:", maxthread + +# Recover the start and end time +full_step = data[0,:] +tic_step = int(full_step[4]) +toc_step = int(full_step[5]) +CPU_CLOCK = float(full_step[-1]) / 1000.0 +data = data[1:,:] +if args.verbose: + print "CPU frequency:", CPU_CLOCK * 1000.0 + +# Avoid start and end times of zero. +data = data[data[:,4] != 0] +data = data[data[:,5] != 0] + +# Calculate the time range. +total_t = (toc_step - tic_step)/ CPU_CLOCK +print "# Data range: ", total_t, "ms" + +# Correct times to relative values. +start_t = float(tic_step) +data[:,4] -= start_t +data[:,5] -= start_t + +tasks = {} +tasks[-1] = [] +for i in range(maxthread): + tasks[i] = [] + +# Gather into by thread data. +num_lines = pl.size(data) / 10 +for line in range(num_lines): + thread = int(data[line,0]) + tic = int(data[line,4]) / CPU_CLOCK + toc = int(data[line,5]) / CPU_CLOCK + tasktype = int(data[line,1]) + subtype = int(data[line,2]) + + tasks[thread].append([tic,toc,tasktype,subtype]) + +# Sort by tic and gather used thread ids. +threadids = [] +for i in range(maxthread): + if len(tasks[i]) > 0: + tasks[i] = sorted(tasks[i], key=lambda task: task[0]) + threadids.append(i) + +# Times per task. +print "# Task times:" +print "# {0:<16s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\ + .format("type/subtype", "count","minimum", "maximum", + "sum", "mean", "percent") +alltasktimes = {} +for i in threadids: + tasktimes = {} + for task in tasks[i]: + key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]] + dt = task[1] - task[0] + if not key in tasktimes: + tasktimes[key] = [] + tasktimes[key].append(dt) + + if not key in alltasktimes: + alltasktimes[key] = [] + alltasktimes[key].append(dt) + + print "# Thread : ", i + for key in sorted(tasktimes.keys()): + taskmin = min(tasktimes[key]) + taskmax = max(tasktimes[key]) + tasksum = sum(tasktimes[key]) + print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ + .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum, + tasksum / len(tasktimes[key]), tasksum / total_t * 100.0) + print + +print "# All threads : " +for key in sorted(alltasktimes.keys()): + taskmin = min(alltasktimes[key]) + taskmax = max(alltasktimes[key]) + tasksum = sum(alltasktimes[key]) + print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ + .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum, + tasksum / len(alltasktimes[key]), + tasksum / (len(threadids) * total_t) * 100.0) +print + +# Dead times. +print "# Deadtimes:" +print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ + .format("count", "minimum", "maximum", "sum", "mean", "percent") +alldeadtimes = [] +for i in threadids: + deadtimes = [] + last = 0 + for task in tasks[i]: + dt = task[0] - last + deadtimes.append(dt) + last = task[1] + dt = total_t - last + deadtimes.append(dt) + + deadmin = min(deadtimes) + deadmax = max(deadtimes) + deadsum = sum(deadtimes) + print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ + .format(i, len(deadtimes), deadmin, deadmax, deadsum, + deadsum / len(deadtimes), deadsum / total_t * 100.0) + alldeadtimes.extend(deadtimes) + +deadmin = min(alldeadtimes) +deadmax = max(alldeadtimes) +deadsum = sum(alldeadtimes) +print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ + .format(len(alldeadtimes), deadmin, deadmax, deadsum, + deadsum / len(alldeadtimes), + deadsum / (len(threadids) * total_t ) * 100.0) +print + + +sys.exit(0) diff --git a/examples/analyse_tasks_MPI.py b/examples/analyse_tasks_MPI.py new file mode 100755 index 0000000000000000000000000000000000000000..9feffaf67ec393257d75428e310a2e8b807df39a --- /dev/null +++ b/examples/analyse_tasks_MPI.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python +""" +Usage: + analsyse_tasks_MPI.py [options] input.dat + +where input.dat is a thread info file for an MPI step. Use the '-y interval' +flag of the swift command to create these. + +The output is an analysis of the task timings, including deadtime per thread +and step, total amount of time spent for each task type, for the whole step +and per thread and the minimum and maximum times spent per task type. + +This file is part of SWIFT. +Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import matplotlib +matplotlib.use("Agg") +import matplotlib.collections as collections +import matplotlib.ticker as plticker +import pylab as pl +import sys +import argparse + +# Handle the command line. +parser = argparse.ArgumentParser(description="Analyse task dumps") + +parser.add_argument("input", help="Thread data file (-y output)") +parser.add_argument("-v", "--verbose", dest="verbose", + help="Verbose output (default: False)", + default=False, action="store_true") + +args = parser.parse_args() +infile = args.input + +# Tasks and subtypes. Indexed as in tasks.h. +TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", + "init_grav", "ghost", "extra_ghost", "drift_part", + "drift_gpart", "kick1", "kick2", "timestep", "send", "recv", + "grav_top_level", "grav_long_range", "grav_mm", "grav_down", + "cooling", "sourceterms", "count"] + +SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", + "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] + +# Read input. +data = pl.loadtxt( infile ) + +# Get the CPU clock to convert ticks into milliseconds. +full_step = data[0,:] +CPU_CLOCK = float(full_step[-1]) / 1000.0 +if args.verbose: + print "# CPU frequency:", CPU_CLOCK * 1000.0 + +nranks = int(max(data[:,0])) + 1 +print "# Number of ranks:", nranks +maxthread = int(max(data[:,1])) + 1 +print "# Maximum thread id:", maxthread + +# Avoid start and end times of zero. +sdata = data[data[:,5] != 0] +sdata = data[data[:,6] != 0] + +# Now we process all the ranks. +for rank in range(nranks): + print "# Rank", rank + data = sdata[sdata[:,0] == rank] + + # Recover the start and end time + full_step = data[0,:] + tic_step = int(full_step[5]) + toc_step = int(full_step[6]) + data = data[1:,:] + + # Avoid start and end times of zero. + data = data[data[:,5] != 0] + data = data[data[:,6] != 0] + + # Calculate the time range. + total_t = (toc_step - tic_step)/ CPU_CLOCK + print "# Data range: ", total_t, "ms" + + # Correct times to relative values. + start_t = float(tic_step) + data[:,5] -= start_t + data[:,6] -= start_t + end_t = (toc_step - start_t) / CPU_CLOCK + + tasks = {} + tasks[-1] = [] + for i in range(maxthread): + tasks[i] = [] + + # Gather into by thread data. + num_lines = pl.size(data) / 12 + for line in range(num_lines): + thread = int(data[line,1]) + tic = int(data[line,5]) / CPU_CLOCK + toc = int(data[line,6]) / CPU_CLOCK + tasktype = int(data[line,2]) + subtype = int(data[line,3]) + + tasks[thread].append([tic,toc,tasktype,subtype]) + + # Sort by tic and gather used threads. + threadids = [] + for i in range(maxthread): + tasks[i] = sorted(tasks[i], key=lambda task: task[0]) + threadids.append(i) + + # Times per task. + print "# Task times:" + print "# {0:<16s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\ + .format("type/subtype", "count","minimum", "maximum", + "sum", "mean", "percent") + alltasktimes = {} + for i in threadids: + tasktimes = {} + for task in tasks[i]: + key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]] + dt = task[1] - task[0] + if not key in tasktimes: + tasktimes[key] = [] + tasktimes[key].append(dt) + + if not key in alltasktimes: + alltasktimes[key] = [] + alltasktimes[key].append(dt) + + print "# Thread : ", i + for key in sorted(tasktimes.keys()): + taskmin = min(tasktimes[key]) + taskmax = max(tasktimes[key]) + tasksum = sum(tasktimes[key]) + print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ + .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum, + tasksum / len(tasktimes[key]), tasksum / total_t * 100.0) + print + + print "# All threads : " + for key in sorted(alltasktimes.keys()): + taskmin = min(alltasktimes[key]) + taskmax = max(alltasktimes[key]) + tasksum = sum(alltasktimes[key]) + print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ + .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum, + tasksum / len(alltasktimes[key]), + tasksum / (len(threadids) * total_t) * 100.0) + print + + # Dead times. + print "# Deadtimes:" + print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ + .format("count", "minimum", "maximum", "sum", "mean", "percent") + alldeadtimes = [] + for i in threadids: + deadtimes = [] + last = 0 + for task in tasks[i]: + dt = task[0] - last + deadtimes.append(dt) + last = task[1] + dt = total_t - last + deadtimes.append(dt) + + deadmin = min(deadtimes) + deadmax = max(deadtimes) + deadsum = sum(deadtimes) + print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ + .format(i, len(deadtimes), deadmin, deadmax, deadsum, + deadsum / len(deadtimes), deadsum / total_t * 100.0) + alldeadtimes.extend(deadtimes) + + deadmin = min(alldeadtimes) + deadmax = max(alldeadtimes) + deadsum = sum(alldeadtimes) + print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ + .format(len(alldeadtimes), deadmin, deadmax, deadsum, + deadsum / len(alldeadtimes), + deadsum / (len(threadids) * total_t ) * 100.0) + print + + +sys.exit(0) diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 14bc60bc1e1c05ecdc66fb7ac828102b1d5748bf..8006c1a325845d6e9fec655b809310a63daa9ddb 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -107,6 +107,12 @@ DiscPatchPotential: timestep_mult: 0.03 # Dimensionless pre-factor for the time-step condition growth_time: 5. # (Optional) Time for the disc to grow to its final size (multiple of the dynamical time) +# Sine Wave potential +SineWavePotential: + amplitude: 10. # Amplitude of the sine wave (internal units) + timestep_limit: 1. # Time-step dimensionless pre-factor. + growth_time: 0. # (Optional) Time for the potential to grow to its final size. + # Parameters related to cooling function ---------------------------------------------- # Constant du/dt cooling function diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index 1be59d1c8449970321b8ef9053ddf24b4559dabd..88f176687db8116cfd4370970769164985e4d366 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -1,18 +1,20 @@ #!/usr/bin/env python """ Usage: - plot_tasks.py input.dat output.png [time-range-ms] + plot_tasks.py [options] input.dat output.png -where input.dat is a thread info file for a step. Use the '-y interval' -flag of the swift MPI commands to create these. The output plot will be -called 'output.png'. Use the time-range-ms in millisecs to produce -plots with the same time span. +where input.dat is a thread info file for a step. Use the '-y interval' flag +of the swift command to create these. The output plot will be called +'output.png'. The --limit option can be used to produce plots with the same +time span and the --expand option to expand each thread line into '*expand' +lines, so that adjacent tasks of the same type can be distinguished. Other +options can be seen using the --help flag. This file is part of SWIFT. Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk), Bert Vandenbroucke (bert.vandenbroucke@ugent.be) Matthieu Schaller (matthieu.schaller@durham.ac.uk) - (c) 2016 Peter W. Draper (p.w.draper@durham.ac.uk) + (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published @@ -29,11 +31,42 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. """ import matplotlib +matplotlib.use("Agg") import matplotlib.collections as collections -matplotlib.use('Agg') +import matplotlib.ticker as plticker import pylab as pl -import numpy as np import sys +import argparse + +# Handle the command line. +parser = argparse.ArgumentParser(description="Plot task graphs") + +parser.add_argument("input", help="Thread data file (-y output)") +parser.add_argument("outpng", help="Name for output graphic file (PNG)") +parser.add_argument("-l", "--limit", dest="limit", + help="Upper time limit in millisecs (def: depends on data)", + default=0, type=int) +parser.add_argument("-e", "--expand", dest="expand", + help="Thread expansion factor (def: 1)", + default=1, type=int) +parser.add_argument("--height", dest="height", + help="Height of plot in inches (def: 4)", + default=4., type=float) +parser.add_argument("--width", dest="width", + help="Width of plot in inches (def: 16)", + default=16., type=float) +parser.add_argument("--nolegend", dest="nolegend", + help="Whether to show the legend (def: False)", + default=False, action="store_true") +parser.add_argument("-v", "--verbose", dest="verbose", + help="Show colour assignments and other details (def: False)", + default=False, action="store_true") + +args = parser.parse_args() +infile = args.input +outpng = args.outpng +delta_t = args.limit +expand = args.expand # Basic plot configuration. PLOT_PARAMS = {"axes.labelsize": 10, @@ -42,7 +75,7 @@ PLOT_PARAMS = {"axes.labelsize": 10, "legend.fontsize": 12, "xtick.labelsize": 10, "ytick.labelsize": 10, - "figure.figsize" : (16., 4.), + "figure.figsize" : (args.width, args.height), "figure.subplot.left" : 0.03, "figure.subplot.right" : 0.995, "figure.subplot.bottom" : 0.1, @@ -56,9 +89,11 @@ pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", - "init", "ghost", "extra_ghost", "drift", "kick1", "kick2", - "timestep", "send", "recv", "grav_top_level", "grav_long_range", - "grav_mm", "grav_down", "cooling", "sourceterms", "count"] + "init_grav", "ghost", "extra_ghost", "drift_part", + "drift_gpart", "kick1", "kick2", "timestep", "send", "recv", + "grav_top_level", "grav_long_range", "grav_mm", "grav_down", + "cooling", "sourceterms", "count"] + SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] @@ -69,14 +104,16 @@ FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force", "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", "recv/tend", "send/tend"] -# Get a number of colours for the various types. -colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon", - "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki", - "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen", - "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey", - "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue", - "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink", - "pink"] +# A number of colours for the various types. Recycled when there are +# more task types than colours... +colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue", + "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen", + "brown", "purple", "moccasin", "olivedrab", "chartreuse", + "darksage", "darkgreen", "green", "mediumseagreen", + "mediumaquamarine", "darkslategrey", "mediumturquoise", + "black", "cadetblue", "skyblue", "red", "slategray", "gold", + "slateblue", "blueviolet", "mediumorchid", "firebrick", + "magenta", "hotpink", "pink", "orange", "lightgreen"] maxcolours = len(colours) # Set colours of task/subtype. @@ -87,30 +124,21 @@ for task in TASKTYPES: ncolours = (ncolours + 1) % maxcolours SUBCOLOURS = {} -for task in SUBTYPES: +for task in FULLTYPES: SUBCOLOURS[task] = colours[ncolours] ncolours = (ncolours + 1) % maxcolours -for task in FULLTYPES: +for task in SUBTYPES: SUBCOLOURS[task] = colours[ncolours] ncolours = (ncolours + 1) % maxcolours -# Show docs if help is requested. -if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ): - from pydoc import help - help( "__main__" ) - sys.exit( 0 ) - -# Handle command-line. -if len( sys.argv ) != 3 and len( sys.argv ) != 4: - print "Usage: ", sys.argv[0], "input.dat output.png [time-range-ms]" - sys.exit(1) - -infile = sys.argv[1] -outpng = sys.argv[2] -delta_t = 0 -if len( sys.argv ) == 4: - delta_t = int(sys.argv[3]) +# For fiddling with colours... +if args.verbose: + print "#Selected colours:" + for task in sorted(TASKCOLOURS.keys()): + print "# " + task + ": " + TASKCOLOURS[task] + for task in sorted(SUBCOLOURS.keys()): + print "# " + task + ": " + SUBCOLOURS[task] # Read input. data = pl.loadtxt( infile ) @@ -118,51 +146,61 @@ data = pl.loadtxt( infile ) nthread = int(max(data[:,0])) + 1 print "Number of threads:", nthread -# Recover the start and end time +# Recover the start and end time full_step = data[0,:] tic_step = int(full_step[4]) toc_step = int(full_step[5]) -CPU_CLOCK = float(full_step[-1]) +CPU_CLOCK = float(full_step[-1]) / 1000.0 data = data[1:,:] +if args.verbose: + print "CPU frequency:", CPU_CLOCK * 1000.0 -print "CPU frequency:", CPU_CLOCK - -# Avoid start and end times of zero. +# Avoid start and end times of zero. data = data[data[:,4] != 0] data = data[data[:,5] != 0] -# Calculate the time range, if not given. -delta_t = delta_t * CPU_CLOCK / 1000 +# Calculate the time range, if not given. +delta_t = delta_t * CPU_CLOCK if delta_t == 0: - dt = max(data[:,5]) - min(data[:,4]) + dt = toc_step - tic_step if dt > delta_t: delta_t = dt - print "Data range: ", delta_t / CPU_CLOCK * 1000, "ms" + print "Data range: ", delta_t / CPU_CLOCK, "ms" -# Once more doing the real gather and plots this time. -start_t = tic_step +# Once more doing the real gather and plots this time. +start_t = float(tic_step) data[:,4] -= start_t data[:,5] -= start_t -end_t = (toc_step - start_t) / CPU_CLOCK * 1000 +end_t = (toc_step - start_t) / CPU_CLOCK tasks = {} tasks[-1] = [] -for i in range(nthread): +for i in range(nthread*expand): tasks[i] = [] +# Counters for each thread when expanding. +ecounter = [] +for i in range(nthread): + ecounter.append(0) + num_lines = pl.size(data) / 10 for line in range(num_lines): thread = int(data[line,0]) + + # Expand to cover extra lines if expanding. + ethread = thread * expand + (ecounter[thread] % expand) + ecounter[thread] = ecounter[thread] + 1 + thread = ethread + tasks[thread].append({}) tasktype = TASKTYPES[int(data[line,1])] subtype = SUBTYPES[int(data[line,2])] tasks[thread][-1]["type"] = tasktype tasks[thread][-1]["subtype"] = subtype - tic = int(data[line,4]) / CPU_CLOCK * 1000 - toc = int(data[line,5]) / CPU_CLOCK * 1000 + tic = int(data[line,4]) / CPU_CLOCK + toc = int(data[line,5]) / CPU_CLOCK tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc - tasks[thread][-1]["t"] = (toc + tic)/ 2 if "self" in tasktype or "pair" in tasktype: fulltype = tasktype + "/" + subtype if fulltype in SUBCOLOURS: @@ -171,31 +209,24 @@ for line in range(num_lines): tasks[thread][-1]["colour"] = SUBCOLOURS[subtype] else: tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype] - -for thread in range(nthread): - tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"]) - + +# Use expanded threads from now on. +nthread = nthread * expand + typesseen = [] fig = pl.figure() ax = fig.add_subplot(1,1,1) -ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK) +ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) ax.set_ylim(0, nthread) -tictoc = np.zeros(2) for i in range(nthread): # Collect ranges and colours into arrays. - tictocs = np.zeros(len(tasks[i])*2) - colours = np.empty(len(tasks[i])*2, dtype='object') - coloursseen = [] + tictocs = [] + colours = [] j = 0 for task in tasks[i]: - tictocs[j] = task["tic"] - tictocs[j+1] = task["toc"] - colours[j] = task["colour"] - colours[j+1] = task["colour"] - j = j + 2 - if task["colour"] not in coloursseen: - coloursseen.append(task["colour"]) + tictocs.append((task["tic"], task["toc"] - task["tic"])) + colours.append(task["colour"]) # Legend support, collections don't add to this. if task["subtype"] != "none": @@ -206,31 +237,33 @@ for i in range(nthread): pl.plot([], [], color=task["colour"], label=qtask) typesseen.append(qtask) - # Now plot each colour, faster to use a mask to select colour ranges. - for colour in coloursseen: - collection = collections.BrokenBarHCollection.span_where(tictocs, ymin=i+0.05, ymax=i+0.95, - where=colours == colour, - facecolor=colour, - linewidths=0) - ax.add_collection(collection) - + # Now plot. + ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0) # Legend and room for it. nrow = len(typesseen) / 5 -if len(typesseen) * 5 < nrow: - nrow = nrow + 1 -ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white") -ax.set_ylim(0, nthread + nrow + 1) -ax.legend(loc=1, shadow=True, mode="expand", ncol=5) +if not args.nolegend: + if len(typesseen) * 5 < nrow: + nrow = nrow + 1 + ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white") + ax.set_ylim(0, nthread + nrow + 1) + ax.legend(loc=1, shadow=True, mode="expand", ncol=5) # Start and end of time-step ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1) ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1) ax.set_xlabel("Wall clock time [ms]") -ax.set_ylabel("Thread ID" ) +if expand == 1: + ax.set_ylabel("Thread ID" ) +else: + ax.set_ylabel("Thread ID * " + str(expand) ) ax.set_yticks(pl.array(range(nthread)), True) +loc = plticker.MultipleLocator(base=expand) +ax.yaxis.set_major_locator(loc) +ax.grid(True, which='major', axis="y", linestyle="-") + pl.show() pl.savefig(outpng) print "Graphics done, output written to", outpng diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py index c95bfa1fd2d087cc907b57201c1a1397cbeb1460..83465aee87e8b641775d760fa4db2f06b125dd8b 100755 --- a/examples/plot_tasks_MPI.py +++ b/examples/plot_tasks_MPI.py @@ -1,13 +1,15 @@ #!/usr/bin/env python """ Usage: - plot_tasks_MPI.py input.dat png-output-prefix [time-range-ms] + plot_tasks_MPI.py [options] input.dat png-output-prefix -where input.dat is a thread info file for a step of an MPI run. Use the '-y -interval' flag of the swift MPI commands to create these. The output plots -will be called 'png-output-prefix<mpi-rank>.png', i.e. one each for all the -threads in each MPI rank. Use the time-range-ms in millisecs to produce -plots with the same time span. +where input.dat is a thread info file for a step. Use the '-y interval' flag +of the swift MPI command to create these. The output plot will be called +'png-output-prefix<mpi-rank>.png', i.e. one each for all the threads in each +MPI rank. The --limit option can be used to produce plots with the same time +span and the --expand option to expand each thread line into '*expand' lines, +so that adjacent tasks of the same type can be distinguished. Other options +can be seen using the --help flag. See the command 'process_plot_tasks_MPI' to efficiently wrap this command to process a number of thread info files and create an HTML file to view them. @@ -17,7 +19,7 @@ This file is part of SWIFT. Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk), Bert Vandenbroucke (bert.vandenbroucke@ugent.be) Matthieu Schaller (matthieu.schaller@durham.ac.uk) - Peter W. Draper (p.w.draper@durham.ac.uk) + (C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) All Rights Reserved. This program is free software: you can redistribute it and/or modify @@ -35,13 +37,42 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. """ import matplotlib -import matplotlib.collections as collections matplotlib.use("Agg") +import matplotlib.collections as collections +import matplotlib.ticker as plticker import pylab as pl -import numpy as np import sys -#import warnings -#warnings.simplefilter("error") +import argparse + +# Handle the command line. +parser = argparse.ArgumentParser(description="Plot task graphs") + +parser.add_argument("input", help="Thread data file (-y output)") +parser.add_argument("outbase", help="Base name for output graphic files (PNG)") +parser.add_argument("-l", "--limit", dest="limit", + help="Upper time limit in millisecs (def: depends on data)", + default=0, type=int) +parser.add_argument("-e", "--expand", dest="expand", + help="Thread expansion factor (def: 1)", + default=1, type=int) +parser.add_argument("--height", dest="height", + help="Height of plot in inches (def: 4)", + default=4., type=float) +parser.add_argument("--width", dest="width", + help="Width of plot in inches (def: 16)", + default=16., type=float) +parser.add_argument("--nolegend", dest="nolegend", + help="Whether to show the legend (def: False)", + default=False, action="store_true") +parser.add_argument("-v", "--verbose", dest="verbose", + help="Show colour assignments and other details (def: False)", + default=False, action="store_true") + +args = parser.parse_args() +infile = args.input +outbase = args.outbase +delta_t = args.limit +expand = args.expand # Basic plot configuration. PLOT_PARAMS = {"axes.labelsize": 10, @@ -50,7 +81,7 @@ PLOT_PARAMS = {"axes.labelsize": 10, "legend.fontsize": 12, "xtick.labelsize": 10, "ytick.labelsize": 10, - "figure.figsize" : (16., 4.), + "figure.figsize" : (args.width, args.height), "figure.subplot.left" : 0.03, "figure.subplot.right" : 0.995, "figure.subplot.bottom" : 0.1, @@ -64,26 +95,31 @@ pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", - "init", "ghost", "extra_ghost", "drift", "kick1", "kick2", - "timestep", "send", "recv", "grav_gather_m", "grav_fft", - "grav_mm", "grav_up", "cooling", "sourceterms", "count"] + "init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart", + "kick1", "kick2", "timestep", "send", "recv", "grav_top_level", + "grav_long_range", "grav_mm", "grav_down", "cooling", + "sourceterms", "count"] + SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "count"] + "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] # Task/subtypes of interest. -FULLTYPES = ["self/force", "self/density", "sub_self/force", - "sub_self/density", "pair/force", "pair/density", "sub_pair/force", +FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force", + "sub_self/density", "pair/force", "pair/density", "pair/grav", + "sub_pair/force", "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", "recv/tend", "send/tend"] -# Get a number of colours for the various types. -colours = ["black", "gray", "rosybrown", "firebrick", "red", "darksalmon", - "sienna", "sandybrown", "bisque", "tan", "moccasin", "gold", "darkkhaki", - "lightgoldenrodyellow", "olivedrab", "chartreuse", "darksage", "lightgreen", - "green", "mediumseagreen", "mediumaquamarine", "mediumturquoise", "darkslategrey", - "cyan", "cadetblue", "skyblue", "dodgerblue", "slategray", "darkblue", - "slateblue", "blueviolet", "mediumorchid", "purple", "magenta", "hotpink", - "pink"] +# A number of colours for the various types. Recycled when there are +# more task types than colours... +colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue", + "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen", + "brown", "purple", "moccasin", "olivedrab", "chartreuse", + "darksage", "darkgreen", "green", "mediumseagreen", + "mediumaquamarine", "darkslategrey", "mediumturquoise", + "black", "cadetblue", "skyblue", "red", "slategray", "gold", + "slateblue", "blueviolet", "mediumorchid", "firebrick", + "magenta", "hotpink", "pink", "orange", "lightgreen"] maxcolours = len(colours) # Set colours of task/subtype. @@ -94,43 +130,30 @@ for task in TASKTYPES: ncolours = (ncolours + 1) % maxcolours SUBCOLOURS = {} -for task in SUBTYPES: +for task in FULLTYPES: SUBCOLOURS[task] = colours[ncolours] ncolours = (ncolours + 1) % maxcolours -for task in FULLTYPES: +for task in SUBTYPES: SUBCOLOURS[task] = colours[ncolours] ncolours = (ncolours + 1) % maxcolours -# Show docs if help is requested. -if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ): - from pydoc import help - help( "__main__" ) - sys.exit( 0 ) - -# Handle command-line. -if len( sys.argv ) != 3 and len( sys.argv ) != 4: - print "Usage: ", sys.argv[0], "input.dat png-output-prefix [time-range-ms]" - sys.exit(1) - - -infile = sys.argv[1] -outbase = sys.argv[2] -delta_t = 0 -if len( sys.argv ) == 4: - delta_t = int(sys.argv[3]) +# For fiddling with colours... +if args.verbose: + print "#Selected colours:" + for task in sorted(TASKCOLOURS.keys()): + print "# " + task + ": " + TASKCOLOURS[task] + for task in sorted(SUBCOLOURS.keys()): + print "# " + task + ": " + SUBCOLOURS[task] # Read input. data = pl.loadtxt( infile ) -# Recover the start and end time +# Get CPU_CLOCK to convert ticks into milliseconds. full_step = data[0,:] -tic_step = int(full_step[5]) -toc_step = int(full_step[6]) -CPU_CLOCK = float(full_step[-1]) - -print "CPU frequency:", CPU_CLOCK - +CPU_CLOCK = float(full_step[-1]) / 1000.0 +if args.verbose: + print "CPU frequency:", CPU_CLOCK * 1000.0 nranks = int(max(data[:,0])) + 1 print "Number of ranks:", nranks @@ -144,60 +167,74 @@ sdata = sdata[sdata[:,6] != 0] # Each rank can have different clock (compute node), but we want to use the # same delta times range for comparisons, so we suck it up and take the hit of # precalculating this, unless the user knows better. -delta_t = delta_t * CPU_CLOCK / 1000 +delta_t = delta_t * CPU_CLOCK if delta_t == 0: for rank in range(nranks): data = sdata[sdata[:,0] == rank] - dt = max(data[:,6]) - min(data[:,5]) + full_step = data[0,:] + tic_step = int(full_step[5]) + toc_step = int(full_step[6]) + dt = toc_step - tic_step if dt > delta_t: delta_t = dt - print "Data range: ", delta_t / CPU_CLOCK * 1000, "ms" - + print "Data range: ", delta_t / CPU_CLOCK, "ms" # Once more doing the real gather and plots this time. for rank in range(nranks): data = sdata[sdata[:,0] == rank] + # Start and end times for this rank. full_step = data[0,:] tic_step = int(full_step[5]) toc_step = int(full_step[6]) data = data[1:,:] typesseen = [] + nethread = 0 # Dummy image for ranks that have no tasks. if data.size == 0: print "rank ", rank, " has no tasks" fig = pl.figure() ax = fig.add_subplot(1,1,1) - ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK) - ax.set_ylim(0, nthread) + ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) + ax.set_ylim(0, nthread*expand) start_t = tic_step - end_t = (toc_step - start_t) / CPU_CLOCK * 1000 + end_t = (toc_step - start_t) / CPU_CLOCK else: - start_t = tic_step + start_t = float(tic_step) data[:,5] -= start_t data[:,6] -= start_t - end_t = (toc_step - start_t) / CPU_CLOCK * 1000 + end_t = (toc_step - start_t) / CPU_CLOCK tasks = {} tasks[-1] = [] - for i in range(nthread): + for i in range(nthread*expand): tasks[i] = [] + # Counters for each thread when expanding. + ecounter = [] + for i in range(nthread): + ecounter.append(0) + num_lines = pl.shape(data)[0] for line in range(num_lines): thread = int(data[line,1]) + + # Expand to cover extra lines if expanding. + ethread = thread * expand + (ecounter[thread] % expand) + ecounter[thread] = ecounter[thread] + 1 + thread = ethread + tasks[thread].append({}) tasktype = TASKTYPES[int(data[line,2])] subtype = SUBTYPES[int(data[line,3])] tasks[thread][-1]["type"] = tasktype tasks[thread][-1]["subtype"] = subtype - tic = int(data[line,5]) / CPU_CLOCK * 1000 - toc = int(data[line,6]) / CPU_CLOCK * 1000 + tic = int(data[line,5]) / CPU_CLOCK + toc = int(data[line,6]) / CPU_CLOCK tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc - tasks[thread][-1]["t"] = (toc + tic)/ 2 if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype: fulltype = tasktype + "/" + subtype if fulltype in SUBCOLOURS: @@ -207,29 +244,23 @@ for rank in range(nranks): else: tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype] - for thread in range(nthread): - tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"]) + # Use expanded threads from now on. + nethread = nthread * expand + typesseen = [] fig = pl.figure() ax = fig.add_subplot(1,1,1) - ax.set_xlim(-delta_t * 0.03 * 1000 / CPU_CLOCK, delta_t * 1.03 * 1000 / CPU_CLOCK) - ax.set_ylim(0, nthread) - tictoc = np.zeros(2) - for i in range(nthread): + ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) + ax.set_ylim(0, nethread) + for i in range(nethread): # Collect ranges and colours into arrays. - tictocs = np.zeros(len(tasks[i])*2) - colours = np.empty(len(tasks[i])*2, dtype='object') - coloursseen = [] + tictocs = [] + colours = [] j = 0 for task in tasks[i]: - tictocs[j] = task["tic"] - tictocs[j+1] = task["toc"] - colours[j] = task["colour"] - colours[j+1] = task["colour"] - j = j + 2 - if task["colour"] not in coloursseen: - coloursseen.append(task["colour"]) + tictocs.append((task["tic"], task["toc"] - task["tic"])) + colours.append(task["colour"]) # Legend support, collections don't add to this. if task["subtype"] != "none": @@ -241,33 +272,34 @@ for rank in range(nranks): pl.plot([], [], color=task["colour"], label=qtask) typesseen.append(qtask) - # Now plot each colour, faster to use a mask to select colour ranges. - for colour in coloursseen: - collection = collections.BrokenBarHCollection.span_where(tictocs, - ymin=i+0.05, - ymax=i+0.95, - where=colours == colour, - facecolor=colour, - linewidths=0) - ax.add_collection(collection) + # Now plot. + ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0) # Legend and room for it. nrow = len(typesseen) / 5 if len(typesseen) * 5 < nrow: nrow = nrow + 1 - ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white") - ax.set_ylim(0, nthread + nrow + 1) + ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white") + ax.set_ylim(0, nethread + nrow + 1) if data.size > 0: ax.legend(loc=1, shadow=True, mode="expand", ncol=5) # Start and end of time-step - ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1) - ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1) + ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1) + ax.plot([end_t, end_t], [0, nethread + nrow + 1], 'k--', linewidth=1) ax.set_xlabel("Wall clock time [ms]") - ax.set_ylabel("Thread ID for MPI Rank " + str(rank) ) - ax.set_yticks(pl.array(range(nthread)), True) + + if expand == 1: + ax.set_ylabel("Thread ID" ) + else: + ax.set_ylabel("Thread ID * " + str(expand) ) + ax.set_yticks(pl.array(range(nethread)), True) + + loc = plticker.MultipleLocator(base=expand) + ax.yaxis.set_major_locator(loc) + ax.grid(True, which='major', axis="y", linestyle="-") pl.show() outpng = outbase + str(rank) + ".png" diff --git a/examples/process_plot_tasks b/examples/process_plot_tasks index cf19401b582c29f7e35073be93569ea8039f958d..b46fce03d8c5f21046a0e4a95a304e006c7b2293 100755 --- a/examples/process_plot_tasks +++ b/examples/process_plot_tasks @@ -56,7 +56,8 @@ done # And process them, echo "Processing thread info files..." -echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks.py \$0 \$2 $TIMERANGE" +echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks.py --expand 1 --limit $TIMERANGE --width 16 --height 4 \$0 \$2 " +echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./analyse_tasks.py \$0 > \$2.stats" echo "Writing output index.html file" # Construct document - serial. @@ -75,8 +76,21 @@ echo $list | xargs -n 3 | while read f s g; do <h2>Step $s</h2> EOF cat <<EOF >> index.html -<a href="step${s}r${i}.png"><img src="step${s}r${i}.png" width=400px/></a> +<a href="step${s}r${i}.html"><img src="step${s}r${i}.png" width=400px/></a> EOF + cat <<EOF > step${s}r${i}.html + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<body> +<img src="step${s}r${i}.png"> +<pre> +EOF +cat step${s}r${i}.stats >> step${s}r${i}.html +cat <<EOF >> step${s}r${i}.html +</body> +</html> +EOF + done cat <<EOF >> index.html diff --git a/examples/process_plot_tasks_MPI b/examples/process_plot_tasks_MPI index d3eb5d4a5fc5918b287cd5d98efcc5881b6f910c..b2672b3711823eb87d0bede5b1ffd8945a735f98 100755 --- a/examples/process_plot_tasks_MPI +++ b/examples/process_plot_tasks_MPI @@ -61,7 +61,8 @@ nrank=$(($nrank-1)) # And process them, echo "Processing thread info files..." -echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks_MPI.py \$0 \$2 $TIMERANGE" +echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks_MPI.py --expand 1 --limit $TIMERANGE \$0 \$2 " +echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./analyse_tasks_MPI.py \$0 > \$2.stats" echo "Writing output index.html file" # Construct document - serial. @@ -78,12 +79,31 @@ EOF echo $list | xargs -n 3 | while read f s g; do cat <<EOF >> index.html <h2>Step $s</h2> +<ul style="list-style-type:none"> +<li> EOF for i in $(seq 0 $nrank); do - cat <<EOF >> index.html -<a href="step${s}r${i}.png"><img src="step${s}r${i}.png" width=400px/></a> -EOF + cat <<EOF2 >> index.html +<a href="step${s}r${i}.html"><img src="step${s}r${i}.png" width=400px/></a> +EOF2 + cat <<EOF2 > step${s}r${i}.html + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<body> +<img src="step${s}r${i}.png"> +<pre> +EOF2 +cat step${s}r.stats >> step${s}r${i}.html +cat <<EOF2 >> step${s}r${i}.html +</pre> +</body> +</html> +EOF2 done +cat <<EOF >> index.html +</li> +</ul> +EOF done cat <<EOF >> index.html diff --git a/m4/ax_gcc_archflag.m4 b/m4/ax_gcc_archflag.m4 index 0d0bf431138689487a5fb63a419dfc58ae70d5d0..bba53a4c8a8cb363a017c55c4e4ebbb4c6528dae 100644 --- a/m4/ax_gcc_archflag.m4 +++ b/m4/ax_gcc_archflag.m4 @@ -107,7 +107,7 @@ case $host_cpu in *2?6[[ad]]?:*:*:*) ax_gcc_arch="sandybridge corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *3?6[[ae]]?:*:*:*) ax_gcc_arch="ivybridge core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*) ax_gcc_arch="haswell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; - *3?6d?:*:*:*) ax_gcc_arch="broadwell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; + *3?6d?:*:*:*|*4?6f?:*:*:*) ax_gcc_arch="broadwell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *1?6c?:*:*:*|*2?6[[67]]?:*:*:*|*3?6[[56]]?:*:*:*) ax_gcc_arch="bonnell atom core2 pentium-m pentium3 pentiumpro" ;; *3?67?:*:*:*|*[[45]]?6[[ad]]?:*:*:*) ax_gcc_arch="silvermont atom core2 pentium-m pentium3 pentiumpro" ;; *000?f[[012]]?:*:*:*|?f[[012]]?:*:*:*|f[[012]]?:*:*:*) ax_gcc_arch="pentium4 pentiumpro" ;; diff --git a/src/Makefile.am b/src/Makefile.am index 7bec5327f4759fcf7d3e1af9d041677ffbc7ab55..2ddcdb0908201c65053d7cc5380a4217277b5c13 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -63,7 +63,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \ runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ - dimension.h equation_of_state.h part_type.h \ + dimension.h equation_of_state.h part_type.h periodic.h \ gravity.h gravity_io.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \ @@ -86,6 +86,8 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h hydro/Gizmo/hydro_slope_limiters_cell.h \ hydro/Gizmo/hydro_slope_limiters_face.h \ hydro/Gizmo/hydro_slope_limiters.h \ + hydro/Gizmo/hydro_unphysical.h \ + hydro/Gizmo/hydro_velocities.h \ hydro/Shadowswift/hydro_debug.h \ hydro/Shadowswift/hydro_gradients.h hydro/Shadowswift/hydro.h \ hydro/Shadowswift/hydro_iact.h \ diff --git a/src/active.h b/src/active.h index 02e504f762735994e6c57f7e155071fede016713..58e88835b6f51ae15f9fd7270c0e1f89bbd6d61a 100644 --- a/src/active.h +++ b/src/active.h @@ -29,25 +29,48 @@ #include "timeline.h" /** - * @brief Check that a cell been drifted to the current time. + * @brief Check that the #part in a #cell have been drifted to the current time. * * @param c The #cell. * @param e The #engine containing information about the current time. * @return 1 if the #cell has been drifted to the current time, 0 otherwise. */ -__attribute__((always_inline)) INLINE static int cell_is_drifted( +__attribute__((always_inline)) INLINE static int cell_are_part_drifted( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_old > e->ti_current) + if (c->ti_old_part > e->ti_current) error( "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) " "and e->ti_current=%lld (t=%e)", - c->ti_old, c->ti_old * e->timeBase, e->ti_current, + c->ti_old_part, c->ti_old_part * e->timeBase, e->ti_current, e->ti_current * e->timeBase); #endif - return (c->ti_old == e->ti_current); + return (c->ti_old_part == e->ti_current); +} + +/** + * @brief Check that the #gpart in a #cell have been drifted to the current + * time. + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell has been drifted to the current time, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_are_gpart_drifted( + const struct cell *c, const struct engine *e) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->ti_old_gpart > e->ti_current) + error( + "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) " + "and e->ti_current=%lld (t=%e)", + c->ti_old_gpart, c->ti_old_gpart * e->timeBase, e->ti_current, + e->ti_current * e->timeBase); +#endif + + return (c->ti_old_gpart == e->ti_current); } /* Are cells / particles active for regular tasks ? */ diff --git a/src/cell.c b/src/cell.c index ccc101243ccdffbb25d8a71353e65c9d393b7148..78defcd660eca9a580f4fdb86eaf0fe7ff5ac1ec 100644 --- a/src/cell.c +++ b/src/cell.c @@ -99,7 +99,8 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { c->h_max = pc->h_max; c->ti_end_min = pc->ti_end_min; c->ti_end_max = pc->ti_end_max; - c->ti_old = pc->ti_old; + c->ti_old_part = pc->ti_old_part; + c->ti_old_gpart = pc->ti_old_gpart; c->count = pc->count; c->gcount = pc->gcount; c->scount = pc->scount; @@ -128,7 +129,8 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { if (k & 1) temp->loc[2] += temp->width[2]; temp->depth = c->depth + 1; temp->split = 0; - temp->dx_max = 0.f; + temp->dx_max_part = 0.f; + temp->dx_max_gpart = 0.f; temp->dx_max_sort = 0.f; temp->nodeID = c->nodeID; temp->parent = c; @@ -239,7 +241,8 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->h_max = c->h_max; pc->ti_end_min = c->ti_end_min; pc->ti_end_max = c->ti_end_max; - pc->ti_old = c->ti_old; + pc->ti_old_part = c->ti_old_part; + pc->ti_old_gpart = c->ti_old_gpart; pc->count = c->count; pc->gcount = c->gcount; pc->scount = c->scount; @@ -1018,7 +1021,7 @@ void cell_clean_links(struct cell *c, void *data) { } /** - * @brief Checks that the particles in a cell are at the + * @brief Checks that the #part in a cell are at the * current point in time * * Calls error() if the cell is not at the current time. @@ -1026,7 +1029,7 @@ void cell_clean_links(struct cell *c, void *data) { * @param c Cell to act upon * @param data The current time on the integer time-line */ -void cell_check_particle_drift_point(struct cell *c, void *data) { +void cell_check_part_drift_point(struct cell *c, void *data) { #ifdef SWIFT_DEBUG_CHECKS @@ -1035,14 +1038,40 @@ void cell_check_particle_drift_point(struct cell *c, void *data) { /* Only check local cells */ if (c->nodeID != engine_rank) return; - if (c->ti_old != ti_drift) - error("Cell in an incorrect time-zone! c->ti_old=%lld ti_drift=%lld", - c->ti_old, ti_drift); + if (c->ti_old_part != ti_drift) + error("Cell in an incorrect time-zone! c->ti_old_part=%lld ti_drift=%lld", + c->ti_old_part, ti_drift); for (int i = 0; i < c->count; ++i) if (c->parts[i].ti_drift != ti_drift) error("part in an incorrect time-zone! p->ti_drift=%lld ti_drift=%lld", c->parts[i].ti_drift, ti_drift); +#else + error("Calling debugging code without debugging flag activated."); +#endif +} + +/** + * @brief Checks that the #gpart and #spart in a cell are at the + * current point in time + * + * Calls error() if the cell is not at the current time. + * + * @param c Cell to act upon + * @param data The current time on the integer time-line + */ +void cell_check_gpart_drift_point(struct cell *c, void *data) { + +#ifdef SWIFT_DEBUG_CHECKS + + const integertime_t ti_drift = *(integertime_t *)data; + + /* Only check local cells */ + if (c->nodeID != engine_rank) return; + + if (c->ti_old_gpart != ti_drift) + error("Cell in an incorrect time-zone! c->ti_old_gpart=%lld ti_drift=%lld", + c->ti_old_gpart, ti_drift); for (int i = 0; i < c->gcount; ++i) if (c->gparts[i].ti_drift != ti_drift) @@ -1622,7 +1651,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { error("bad flags in sort task."); #endif scheduler_activate(s, ci->sorts); - if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift); + if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift_part); } if (cj->dx_max_sort > space_maxreldx * cj->dmin) { for (struct cell *finger = cj; finger != NULL; @@ -1638,7 +1667,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { error("bad flags in sort task."); #endif scheduler_activate(s, cj->sorts); - if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift); + if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part); } } /* Store current values of dx_max and h_max. */ @@ -1651,7 +1680,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Check whether there was too much particle motion, i.e. the cell neighbour conditions were violated. */ - if (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin) + if (max(ci->h_max, cj->h_max) + ci->dx_max_part + cj->dx_max_part > + cj->dmin) rebuild = 1; #ifdef WITH_MPI @@ -1662,6 +1692,9 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, ci->recv_xv); if (cell_is_active(ci, e)) { scheduler_activate(s, ci->recv_rho); +#ifdef EXTRA_HYDRO_LOOP + scheduler_activate(s, ci->recv_gradient); +#endif scheduler_activate(s, ci->recv_ti); } @@ -1674,19 +1707,28 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift) - scheduler_activate(s, l->t->ci->drift); + if (l->t->ci->drift_part) + scheduler_activate(s, l->t->ci->drift_part); else error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, cj->drift); + if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part); if (cell_is_active(cj, e)) { + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_rho task."); scheduler_activate(s, l->t); +#ifdef EXTRA_HYDRO_LOOP + for (l = cj->send_gradient; + l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) + ; + if (l == NULL) error("Missing link to send_gradient task."); + scheduler_activate(s, l->t); +#endif + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) ; @@ -1700,6 +1742,9 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, cj->recv_xv); if (cell_is_active(cj, e)) { scheduler_activate(s, cj->recv_rho); +#ifdef EXTRA_HYDRO_LOOP + scheduler_activate(s, cj->recv_gradient); +#endif scheduler_activate(s, cj->recv_ti); } @@ -1712,19 +1757,28 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift) - scheduler_activate(s, l->t->ci->drift); + if (l->t->ci->drift_part) + scheduler_activate(s, l->t->ci->drift_part); else error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, ci->drift); + if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part); if (cell_is_active(ci, e)) { + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_rho task."); scheduler_activate(s, l->t); +#ifdef EXTRA_HYDRO_LOOP + for (l = ci->send_gradient; + l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) + ; + if (l == NULL) error("Missing link to send_gradient task."); + scheduler_activate(s, l->t); +#endif + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; @@ -1732,13 +1786,13 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); } } else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift); - scheduler_activate(s, cj->drift); + scheduler_activate(s, ci->drift_part); + scheduler_activate(s, cj->drift_part); } #else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift); - scheduler_activate(s, cj->drift); + scheduler_activate(s, ci->drift_part); + scheduler_activate(s, cj->drift_part); } #endif } @@ -1756,13 +1810,15 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out); if (c->ghost != NULL) scheduler_activate(s, c->ghost); if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); - if (c->drift != NULL) scheduler_activate(s, c->drift); + if (c->drift_part != NULL) scheduler_activate(s, c->drift_part); + if (c->drift_gpart != NULL) scheduler_activate(s, c->drift_gpart); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); if (c->kick2 != NULL) scheduler_activate(s, c->kick2); if (c->timestep != NULL) scheduler_activate(s, c->timestep); + if (c->grav_ghost[0] != NULL) scheduler_activate(s, c->grav_ghost[0]); + if (c->grav_ghost[1] != NULL) scheduler_activate(s, c->grav_ghost[1]); if (c->grav_down != NULL) scheduler_activate(s, c->grav_down); if (c->grav_long_range != NULL) scheduler_activate(s, c->grav_long_range); - if (c->grav_top_level != NULL) scheduler_activate(s, c->grav_top_level); if (c->cooling != NULL) scheduler_activate(s, c->cooling); if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); @@ -1790,30 +1846,28 @@ void cell_set_super(struct cell *c, struct cell *super) { } /** - * @brief Recursively drifts particles of all kinds in a cell hierarchy. + * @brief Recursively drifts the #part in a cell hierarchy. * * @param c The #cell. * @param e The #engine (to get ti_current). */ -void cell_drift_particles(struct cell *c, const struct engine *e) { +void cell_drift_part(struct cell *c, const struct engine *e) { const float hydro_h_max = e->hydro_properties->h_max; const double timeBase = e->timeBase; - const integertime_t ti_old = c->ti_old; + const integertime_t ti_old_part = c->ti_old_part; const integertime_t ti_current = e->ti_current; struct part *const parts = c->parts; struct xpart *const xparts = c->xparts; - struct gpart *const gparts = c->gparts; - struct spart *const sparts = c->sparts; /* Drift from the last time the cell was drifted to the current time */ - const double dt = (ti_current - ti_old) * timeBase; + const double dt = (ti_current - ti_old_part) * timeBase; float dx_max = 0.f, dx2_max = 0.f; float dx_max_sort = 0.0f, dx2_max_sort = 0.f; float cell_h_max = 0.f; /* Check that we are actually going to move forward. */ - if (ti_current < ti_old) error("Attempt to drift to the past"); + if (ti_current < ti_old_part) error("Attempt to drift to the past"); /* Are we not in a leaf ? */ if (c->split) { @@ -1824,37 +1878,15 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { struct cell *cp = c->progeny[k]; /* Collect */ - cell_drift_particles(cp, e); + cell_drift_part(cp, e); /* Update */ - dx_max = max(dx_max, cp->dx_max); + dx_max = max(dx_max, cp->dx_max_part); dx_max_sort = max(dx_max_sort, cp->dx_max_sort); cell_h_max = max(cell_h_max, cp->h_max); } - } else if (ti_current > ti_old) { - - /* Loop over all the g-particles in the cell */ - const size_t nr_gparts = c->gcount; - for (size_t k = 0; k < nr_gparts; k++) { - - /* Get a handle on the gpart. */ - struct gpart *const gp = &gparts[k]; - - /* Drift... */ - drift_gpart(gp, dt, timeBase, ti_old, ti_current); - - /* Compute (square of) motion since last cell construction */ - const float dx2 = gp->x_diff[0] * gp->x_diff[0] + - gp->x_diff[1] * gp->x_diff[1] + - gp->x_diff[2] * gp->x_diff[2]; - dx2_max = max(dx2_max, dx2); - - /* Init gravity force fields. */ - if (gpart_is_active(gp, e)) { - gravity_init_gpart(gp); - } - } + } else if (ti_current > ti_old_part) { /* Loop over all the gas particles in the cell */ const size_t nr_parts = c->count; @@ -1865,7 +1897,7 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { struct xpart *const xp = &xparts[k]; /* Drift... */ - drift_part(p, xp, dt, timeBase, ti_old, ti_current); + drift_part(p, xp, dt, timeBase, ti_old_part, ti_current); /* Limit h to within the allowed range */ p->h = min(p->h, hydro_h_max); @@ -1889,6 +1921,86 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { } } + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); + dx_max_sort = sqrtf(dx2_max_sort); + + } else { + + cell_h_max = c->h_max; + dx_max = c->dx_max_part; + dx_max_sort = c->dx_max_sort; + } + + /* Store the values */ + c->h_max = cell_h_max; + c->dx_max_part = dx_max; + c->dx_max_sort = dx_max_sort; + + /* Update the time of the last drift */ + c->ti_old_part = ti_current; +} + +/** + * @brief Recursively drifts the #gpart in a cell hierarchy. + * + * @param c The #cell. + * @param e The #engine (to get ti_current). + */ +void cell_drift_gpart(struct cell *c, const struct engine *e) { + + const double timeBase = e->timeBase; + const integertime_t ti_old_gpart = c->ti_old_gpart; + const integertime_t ti_current = e->ti_current; + struct gpart *const gparts = c->gparts; + struct spart *const sparts = c->sparts; + + /* Drift from the last time the cell was drifted to the current time */ + const double dt = (ti_current - ti_old_gpart) * timeBase; + float dx_max = 0.f, dx2_max = 0.f; + + /* Check that we are actually going to move forward. */ + if (ti_current < ti_old_gpart) error("Attempt to drift to the past"); + + /* Are we not in a leaf ? */ + if (c->split) { + + /* Loop over the progeny and collect their data. */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + + /* Recurse */ + cell_drift_gpart(cp, e); + + /* Update */ + dx_max = max(dx_max, cp->dx_max_gpart); + } + + } else if (ti_current > ti_old_gpart) { + + /* Loop over all the g-particles in the cell */ + const size_t nr_gparts = c->gcount; + for (size_t k = 0; k < nr_gparts; k++) { + + /* Get a handle on the gpart. */ + struct gpart *const gp = &gparts[k]; + + /* Drift... */ + drift_gpart(gp, dt, timeBase, ti_old_gpart, ti_current); + + /* Compute (square of) motion since last cell construction */ + const float dx2 = gp->x_diff[0] * gp->x_diff[0] + + gp->x_diff[1] * gp->x_diff[1] + + gp->x_diff[2] * gp->x_diff[2]; + dx2_max = max(dx2_max, dx2); + + /* Init gravity force fields. */ + if (gpart_is_active(gp, e)) { + gravity_init_gpart(gp); + } + } + /* Loop over all the star particles in the cell */ const size_t nr_sparts = c->scount; for (size_t k = 0; k < nr_sparts; k++) { @@ -1897,29 +2009,24 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { struct spart *const sp = &sparts[k]; /* Drift... */ - drift_spart(sp, dt, timeBase, ti_old, ti_current); + drift_spart(sp, dt, timeBase, ti_old_gpart, ti_current); /* Note: no need to compute dx_max as all spart have a gpart */ } /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); - dx_max_sort = sqrtf(dx2_max_sort); } else { - cell_h_max = c->h_max; - dx_max = c->dx_max; - dx_max_sort = c->dx_max_sort; + dx_max = c->dx_max_gpart; } /* Store the values */ - c->h_max = cell_h_max; - c->dx_max = dx_max; - c->dx_max_sort = dx_max_sort; + c->dx_max_gpart = dx_max; /* Update the time of the last drift */ - c->ti_old = ti_current; + c->ti_old_gpart = ti_current; } /** diff --git a/src/cell.h b/src/cell.h index 05fed82d79b0c3c4f8e4343813a4d6938d402bf8..dfee0fe3fe2563449e000e473b7bc04575b04951 100644 --- a/src/cell.h +++ b/src/cell.h @@ -74,7 +74,7 @@ struct pcell { /* Stats on this cell's particles. */ double h_max; - integertime_t ti_end_min, ti_end_max, ti_beg_max, ti_old; + integertime_t ti_end_min, ti_end_max, ti_beg_max, ti_old_part, ti_old_gpart; /* Number of particles in this cell. */ int count, gcount, scount; @@ -159,8 +159,11 @@ struct cell { /*! The extra ghost task for complex hydro schemes */ struct task *extra_ghost; - /*! The drift task */ - struct task *drift; + /*! The drift task for parts */ + struct task *drift_part; + + /*! The drift task for gparts */ + struct task *drift_gpart; /*! The first kick task */ struct task *kick1; @@ -171,10 +174,10 @@ struct cell { /*! The task to compute time-steps */ struct task *timestep; - /*! Task constructing the multipole from the particles */ - struct task *grav_top_level; + /*! Task linking the FFT mesh to the rest of gravity tasks */ + struct task *grav_ghost[2]; - /*! Task constructing the multipole from the particles */ + /*! Task computing long range non-periodic gravity interactions */ struct task *grav_long_range; /*! Task propagating the multipole to the particles */ @@ -235,24 +238,30 @@ struct cell { /*! Maximum beginning of (integer) time step in this cell. */ integertime_t ti_beg_max; - /*! Last (integer) time the cell's particle was drifted forward in time. */ - integertime_t ti_old; - /*! Last (integer) time the cell's sort arrays were updated. */ integertime_t ti_sort; + /*! Last (integer) time the cell's part were drifted forward in time. */ + integertime_t ti_old_part; + + /*! Last (integer) time the cell's gpart were drifted forward in time. */ + integertime_t ti_old_gpart; + /*! Last (integer) time the cell's multipole was drifted forward in time. */ integertime_t ti_old_multipole; /*! Minimum dimension, i.e. smallest edge of this cell (min(width)). */ float dmin; - /*! Maximum particle movement in this cell since last construction. */ - float dx_max; - /*! Maximum particle movement in this cell since the last sort. */ float dx_max_sort; + /*! Maximum part movement in this cell since last construction. */ + float dx_max_part; + + /*! Maximum gpart movement in this cell since last construction. */ + float dx_max_gpart; + /*! Nr of #part in this cell. */ int count; @@ -364,13 +373,15 @@ void cell_clean_links(struct cell *c, void *data); void cell_make_multipoles(struct cell *c, integertime_t ti_current); void cell_check_multipole(struct cell *c, void *data); void cell_clean(struct cell *c); -void cell_check_particle_drift_point(struct cell *c, void *data); +void cell_check_part_drift_point(struct cell *c, void *data); +void cell_check_gpart_drift_point(struct cell *c, void *data); void cell_check_multipole_drift_point(struct cell *c, void *data); void cell_reset_task_counters(struct cell *c); int cell_is_drift_needed(struct cell *c, const struct engine *e); int cell_unskip_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); -void cell_drift_particles(struct cell *c, const struct engine *e); +void cell_drift_part(struct cell *c, const struct engine *e); +void cell_drift_gpart(struct cell *c, const struct engine *e); void cell_drift_multipole(struct cell *c, const struct engine *e); void cell_drift_all_multipoles(struct cell *c, const struct engine *e); void cell_check_timesteps(struct cell *c); diff --git a/src/common_io.c b/src/common_io.c index df0bbdc29ec357da3ba14410c0f9c56e0d69160a..168fcf2c695014cf532e622c928414b875fc54d5 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -74,7 +74,7 @@ hid_t io_hdf5_type(enum IO_DATA_TYPE type) { case DOUBLE: return H5T_NATIVE_DOUBLE; case CHAR: - return H5T_C_S1; + return H5T_NATIVE_CHAR; default: error("Unknown type"); return 0; diff --git a/src/const.h b/src/const.h index 6962ee8bca32e92664e3f20cdb23e7cb6fbc4abd..141eb48acc633542aa98655caa8debdd2dbce530 100644 --- a/src/const.h +++ b/src/const.h @@ -52,8 +52,43 @@ /* Options to control the movement of particles for GIZMO_SPH. */ /* This option disables particle movement */ //#define GIZMO_FIX_PARTICLES +/* Try to keep cells regular by adding a correction velocity. */ +#define GIZMO_STEER_MOTION //#define GIZMO_TOTAL_ENERGY +/* Options to control handling of unphysical values (GIZMO_SPH only). */ +/* In GIZMO, mass and energy (and hence density and pressure) can in principle + become negative, which will cause unwanted behaviour that can make the code + crash. + If no options are selected below, we assume (and pray) that this will not + happen, and add no restrictions to how these variables are treated. */ +/* Check for unphysical values and crash if they occur. */ +//#define GIZMO_UNPHYSICAL_ERROR +/* Check for unphysical values and reset them to safe values. */ +#define GIZMO_UNPHYSICAL_RESCUE +/* Show a warning message if an unphysical value was reset (only works if + GIZMO_UNPHYSICAL_RESCUE is also selected). */ +//#define GIZMO_UNPHYSICAL_WARNING + +/* Parameters that control how GIZMO handles pathological particle + configurations. */ +/* Show a warning message if a pathological configuration has been detected. */ +//#define GIZMO_PATHOLOGICAL_WARNING +/* Crash if a pathological configuration has been detected. */ +//#define GIZMO_PATHOLOGICAL_ERROR +/* Maximum allowed gradient matrix condition number. If the condition number of + the gradient matrix (defined in equation C1 in Hopkins, 2015) is larger than + this value, we artificially increase the number of neighbours to get a more + homogeneous sampling. */ +#define const_gizmo_max_condition_number 100.0f +/* Correction factor applied to the particle wcount to force more neighbours if + the condition number is too large. */ +#define const_gizmo_w_correction_factor 0.9f +/* Lower limit on the wcount correction factor. If the condition number is still + too high after this wcount correction has been applied, we give up on the + gradient matrix and use SPH gradients instead. */ +#define const_gizmo_min_wcorr 0.5f + /* Types of gradients to use for SHADOWFAX_SPH */ /* If no option is chosen, no gradients are used (first order scheme) */ #define SHADOWFAX_GRADIENTS diff --git a/src/debug.c b/src/debug.c index 3732ee5e769277deb393926ea2dc6f04fba93782..601f63d6e11bbbf95f62eaef1ec6ec7ec06d3ad9 100644 --- a/src/debug.c +++ b/src/debug.c @@ -259,8 +259,8 @@ int checkCellhdxmax(const struct cell *c, int *depth) { message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; } - if (c->dx_max != dx_max) { - message("%d Inconsistent dx_max: %f != %f", *depth, c->dx_max, dx_max); + if (c->dx_max_part != dx_max) { + message("%d Inconsistent dx_max: %f != %f", *depth, c->dx_max_part, dx_max); message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; } diff --git a/src/drift.h b/src/drift.h index d9b79f7f0549d85b6f05e8ce4a394aaa5b2a4d8d..e86d290cb796153d3c3fc43c21b25d2c7e435657 100644 --- a/src/drift.h +++ b/src/drift.h @@ -39,7 +39,7 @@ * @param ti_current Integer end of time-step */ __attribute__((always_inline)) INLINE static void drift_gpart( - struct gpart *restrict gp, float dt, double timeBase, integertime_t ti_old, + struct gpart *restrict gp, double dt, double timeBase, integertime_t ti_old, integertime_t ti_current) { #ifdef SWIFT_DEBUG_CHECKS @@ -75,7 +75,7 @@ __attribute__((always_inline)) INLINE static void drift_gpart( * @param ti_current Integer end of time-step */ __attribute__((always_inline)) INLINE static void drift_part( - struct part *restrict p, struct xpart *restrict xp, float dt, + struct part *restrict p, struct xpart *restrict xp, double dt, double timeBase, integertime_t ti_old, integertime_t ti_current) { #ifdef SWIFT_DEBUG_CHECKS @@ -119,7 +119,7 @@ __attribute__((always_inline)) INLINE static void drift_part( * @param ti_current Integer end of time-step */ __attribute__((always_inline)) INLINE static void drift_spart( - struct spart *restrict sp, float dt, double timeBase, integertime_t ti_old, + struct spart *restrict sp, double dt, double timeBase, integertime_t ti_old, integertime_t ti_current) { #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/engine.c b/src/engine.c index 414b40f959ac4d3ecb449759823a3631b9a657a3..4618d6b8be1ced8742c6e97465a91df9b9bb5db2 100644 --- a/src/engine.c +++ b/src/engine.c @@ -151,6 +151,7 @@ void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in, void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { struct scheduler *s = &e->sched; + const int periodic = e->s->periodic; const int is_hydro = (e->policy & engine_policy_hydro); const int is_self_gravity = (e->policy & engine_policy_self_gravity); const int is_with_cooling = (e->policy & engine_policy_cooling); @@ -186,18 +187,13 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { c->grav_long_range = scheduler_addtask( s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL); - /* Gravity top-level periodic calculation */ - c->grav_top_level = scheduler_addtask(s, task_type_grav_top_level, - task_subtype_none, 0, 0, c, NULL); - /* Gravity recursive down-pass */ c->grav_down = scheduler_addtask(s, task_type_grav_down, task_subtype_none, 0, 0, c, NULL); + if (periodic) scheduler_addunlock(s, c->init_grav, c->grav_ghost[0]); scheduler_addunlock(s, c->init_grav, c->grav_long_range); - scheduler_addunlock(s, c->init_grav, c->grav_top_level); scheduler_addunlock(s, c->grav_long_range, c->grav_down); - scheduler_addunlock(s, c->grav_top_level, c->grav_down); scheduler_addunlock(s, c->grav_down, c->kick2); } @@ -1073,10 +1069,10 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, #endif /* Drift before you send */ - if (ci->drift == NULL) - ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, - 0, ci, NULL); - scheduler_addunlock(s, ci->drift, t_xv); + if (ci->drift_part == NULL) + ci->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, ci, NULL); + scheduler_addunlock(s, ci->drift_part, t_xv); /* The super-cell's timestep task should unlock the send_ti task. */ scheduler_addunlock(s, ci->super->timestep, t_ti); @@ -1675,41 +1671,98 @@ void engine_make_self_gravity_tasks(struct engine *e) { struct space *s = e->s; struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; + const int periodic = s->periodic; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + const int cdim_ghost[3] = {s->cdim[0] / 4 + 1, s->cdim[1] / 4 + 1, + s->cdim[2] / 4 + 1}; const double theta_crit_inv = e->gravity_properties->theta_crit_inv; struct cell *cells = s->cells_top; - const int nr_cells = s->nr_cells; + struct task **ghosts = NULL; + const int n_ghosts = cdim_ghost[0] * cdim_ghost[1] * cdim_ghost[2] * 2; + + /* Create the top-level task if periodic */ + if (periodic) { + + /* Create the FFT task for this MPI rank */ + s->grav_top_level = scheduler_addtask(sched, task_type_grav_top_level, + task_subtype_none, 0, 0, NULL, NULL); + + /* Create a grid of ghosts to deal with the dependencies */ + if ((ghosts = malloc(n_ghosts * sizeof(struct task *))) == 0) + error("Error allocating memory for gravity fft ghosts"); + + /* Make the ghosts implicit and add the dependencies */ + for (int n = 0; n < n_ghosts / 2; ++n) { + ghosts[2 * n + 0] = scheduler_addtask( + sched, task_type_grav_ghost, task_subtype_none, 0, 0, NULL, NULL); + ghosts[2 * n + 1] = scheduler_addtask( + sched, task_type_grav_ghost, task_subtype_none, 0, 0, NULL, NULL); + ghosts[2 * n + 0]->implicit = 1; + ghosts[2 * n + 1]->implicit = 1; + scheduler_addunlock(sched, ghosts[2 * n + 0], s->grav_top_level); + scheduler_addunlock(sched, s->grav_top_level, ghosts[2 * n + 1]); + } + } - for (int cid = 0; cid < nr_cells; ++cid) { + /* Run through the higher level cells */ + for (int i = 0; i < cdim[0]; i++) { + for (int j = 0; j < cdim[1]; j++) { + for (int k = 0; k < cdim[2]; k++) { - struct cell *ci = &cells[cid]; + /* Get the cell */ + const int cid = cell_getid(cdim, i, j, k); + struct cell *ci = &cells[cid]; - /* Skip cells without gravity particles */ - if (ci->gcount == 0) continue; + /* Skip cells without gravity particles */ + if (ci->gcount == 0) continue; - /* Is that cell local ? */ - if (ci->nodeID != nodeID) continue; + /* Is that cell local ? */ + if (ci->nodeID != nodeID) continue; - /* If the cells is local build a self-interaction */ - scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL); + /* If the cells is local build a self-interaction */ + scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, + NULL); + + /* Deal with periodicity dependencies */ + const int ghost_id = cell_getid(cdim_ghost, i / 4, j / 4, k / 4); + if (ghost_id > n_ghosts) error("Invalid ghost_id"); + if (periodic) { + ci->grav_ghost[0] = ghosts[2 * ghost_id + 0]; + ci->grav_ghost[1] = ghosts[2 * ghost_id + 1]; + } - /* Loop over every other cell */ - for (int cjd = cid + 1; cjd < nr_cells; ++cjd) { + /* Loop over every other cell */ + for (int ii = 0; ii < cdim[0]; ii++) { + for (int jj = 0; jj < cdim[1]; jj++) { + for (int kk = 0; kk < cdim[2]; kk++) { + + /* Get the cell */ + const int cjd = cell_getid(cdim, ii, jj, kk); + struct cell *cj = &cells[cjd]; - struct cell *cj = &cells[cjd]; + /* Avoid duplicates */ + if (cid <= cjd) continue; - /* Skip cells without gravity particles */ - if (cj->gcount == 0) continue; + /* Skip cells without gravity particles */ + if (cj->gcount == 0) continue; - /* Is that neighbour local ? */ - if (cj->nodeID != nodeID) continue; // MATTHIEU + /* Is that neighbour local ? */ + if (cj->nodeID != nodeID) continue; // MATTHIEU - /* Are the cells to close for a MM interaction ? */ - if (!gravity_multipole_accept(ci->multipole, cj->multipole, - theta_crit_inv, 1)) - scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci, - cj); + /* Are the cells to close for a MM interaction ? */ + if (!gravity_multipole_accept(ci->multipole, cj->multipole, + theta_crit_inv, 1)) { + + scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, + 0, ci, cj); + } + } + } + } + } } } + if (periodic) free(ghosts); } void engine_make_external_gravity_tasks(struct engine *e) { @@ -1836,10 +1889,15 @@ void engine_count_and_link_tasks(struct engine *e) { } /* Link drift tasks to all the higher drift task. */ - else if (t->type == task_type_drift) { + else if (t->type == task_type_drift_part) { for (struct cell *finger = t->ci->parent; finger != NULL; finger = finger->parent) - if (finger->drift != NULL) scheduler_addunlock(sched, t, finger->drift); + if (finger->drift_part != NULL) scheduler_addunlock(sched, t, finger->drift_part); + } + else if (t->type == task_type_drift_gpart) { + for (struct cell *finger = t->ci->parent; finger != NULL; + finger = finger->parent) + if (finger->drift_gpart != NULL) scheduler_addunlock(sched, t, finger->drift_gpart); } /* Link self tasks to cells. */ @@ -1930,7 +1988,7 @@ static inline void engine_make_external_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> external gravity --> kick */ - scheduler_addunlock(sched, c->drift, gravity); + scheduler_addunlock(sched, c->drift_gpart, gravity); scheduler_addunlock(sched, gravity, c->super->kick2); } @@ -1944,6 +2002,7 @@ void engine_link_gravity_tasks(struct engine *e) { struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; const int nr_tasks = sched->nr_tasks; + const int periodic = e->s->periodic; for (int k = 0; k < nr_tasks; k++) { @@ -1954,6 +2013,7 @@ void engine_link_gravity_tasks(struct engine *e) { if (t->type == task_type_self && t->subtype == task_subtype_grav) { engine_make_self_gravity_dependencies(sched, t, t->ci); + if (periodic) scheduler_addunlock(sched, t->ci->super->grav_ghost[1], t); } /* Self-interaction for external gravity ? */ @@ -1969,11 +2029,15 @@ void engine_link_gravity_tasks(struct engine *e) { if (t->ci->nodeID == nodeID) { engine_make_self_gravity_dependencies(sched, t, t->ci); + if (periodic && t->ci->super < t->cj->super) + scheduler_addunlock(sched, t->ci->super->grav_ghost[1], t); } if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { engine_make_self_gravity_dependencies(sched, t, t->cj); + if (periodic && t->ci->super < t->cj->super) + scheduler_addunlock(sched, t->cj->super->grav_ghost[1], t); } } @@ -2096,14 +2160,14 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Sort tasks depend on the drift of the cell. */ if (t->type == task_type_sort && t->ci->nodeID == engine_rank) { - scheduler_addunlock(sched, t->ci->drift, t); + scheduler_addunlock(sched, t->ci->drift_part, t); } /* Self-interaction? */ else if (t->type == task_type_self && t->subtype == task_subtype_density) { /* Make all density tasks depend on the drift. */ - scheduler_addunlock(sched, t->ci->drift, t); + scheduler_addunlock(sched, t->ci->drift_part, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2139,9 +2203,9 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Make all density tasks depend on the drift. */ if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->drift, t); + scheduler_addunlock(sched, t->ci->drift_part, t); if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->drift, t); + scheduler_addunlock(sched, t->cj->drift_part, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2546,7 +2610,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, error("bad flags in sort task."); #endif scheduler_activate(s, cj->sorts); - if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift); + if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part); } } /* Store current values of dx_max and h_max. */ @@ -2562,6 +2626,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, ci->recv_xv); if (cell_is_active(ci, e)) { scheduler_activate(s, ci->recv_rho); +#ifdef EXTRA_HYDRO_LOOP + scheduler_activate(s, ci->recv_gradient); +#endif scheduler_activate(s, ci->recv_ti); } @@ -2574,11 +2641,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, l->t); /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift) - scheduler_activate(s, l->t->ci->drift); + if (l->t->ci->drift_part) + scheduler_activate(s, l->t->ci->drift_part); else error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, cj->drift); + if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part); if (cell_is_active(cj, e)) { for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; @@ -2587,6 +2654,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_rho task."); scheduler_activate(s, l->t); +#ifdef EXTRA_HYDRO_LOOP + for (l = cj->send_gradient; + l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) + ; + if (l == NULL) error("Missing link to send_gradient task."); + scheduler_activate(s, l->t); +#endif + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) ; @@ -2600,6 +2675,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, cj->recv_xv); if (cell_is_active(cj, e)) { scheduler_activate(s, cj->recv_rho); +#ifdef EXTRA_HYDRO_LOOP + scheduler_activate(s, cj->recv_gradient); +#endif scheduler_activate(s, cj->recv_ti); } @@ -2612,11 +2690,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, l->t); /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift) - scheduler_activate(s, l->t->ci->drift); + if (l->t->ci->drift_part) + scheduler_activate(s, l->t->ci->drift_part); else error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, ci->drift); + if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part); if (cell_is_active(ci, e)) { for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -2625,6 +2703,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_rho task."); scheduler_activate(s, l->t); +#ifdef EXTRA_HYDRO_LOOP + for (l = ci->send_gradient; + l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) + ; + if (l == NULL) error("Missing link to send_gradient task."); + scheduler_activate(s, l->t); +#endif + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; @@ -2633,30 +2719,37 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } } else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift); - scheduler_activate(s, cj->drift); + scheduler_activate(s, ci->drift_part); + scheduler_activate(s, cj->drift_part); } #else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift); - scheduler_activate(s, cj->drift); + scheduler_activate(s, ci->drift_part); + scheduler_activate(s, cj->drift_part); } #endif } - /* Kick/Drift? */ + /* Kick/Drift/init ? */ else if (t->type == task_type_kick1 || t->type == task_type_kick2 || - t->type == task_type_drift || t->type == task_type_init_grav) { + t->type == task_type_drift_part || + t->type == task_type_drift_gpart || + t->type == task_type_init_grav) { if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } /* Gravity ? */ else if (t->type == task_type_grav_down || - t->type == task_type_grav_long_range || - t->type == task_type_grav_top_level) { + t->type == task_type_grav_long_range) { if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } + /* Periodic gravity ? */ + else if (t->type == task_type_grav_top_level || + t->type == task_type_grav_ghost) { + scheduler_activate(s, t); + } + /* Time-step? */ else if (t->type == task_type_timestep) { t->ci->updated = 0; @@ -3034,7 +3127,7 @@ void engine_print_stats(struct engine *e) { e->policy & engine_policy_self_gravity); /* Be verbose about this */ - message("Saving statistics at t=%e.", e->time); + if (e->nodeID == 0) message("Saving statistics at t=%e.", e->time); #else if (e->verbose) message("Saving statistics at t=%e.", e->time); #endif @@ -3083,10 +3176,12 @@ void engine_skip_force_and_kick(struct engine *e) { struct task *t = &tasks[i]; /* Skip everything that updates the particles */ - if (t->type == task_type_drift || t->type == task_type_kick1 || - t->type == task_type_kick2 || t->type == task_type_timestep || - t->subtype == task_subtype_force || t->subtype == task_subtype_grav || + if (t->type == task_type_drift_part || t->type == task_type_drift_gpart || + t->type == task_type_kick1 || t->type == task_type_kick2 || + t->type == task_type_timestep || t->subtype == task_subtype_force || + t->subtype == task_subtype_grav || t->type == task_type_grav_long_range || + t->type == task_type_grav_ghost || t->type == task_type_grav_top_level || t->type == task_type_grav_down || t->type == task_type_cooling || t->type == task_type_sourceterms) t->skip = 1; @@ -3337,8 +3432,8 @@ void engine_step(struct engine *e) { if (e->policy & engine_policy_reconstruct_mpoles) engine_reconstruct_multipoles(e); - else - engine_drift_top_multipoles(e); + // else + // engine_drift_top_multipoles(e); } /* Print the number of active tasks ? */ @@ -3450,9 +3545,15 @@ int engine_is_done(struct engine *e) { void engine_unskip(struct engine *e) { const ticks tic = getticks(); + + /* Activate all the regular tasks */ threadpool_map(&e->threadpool, runner_do_unskip_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); + /* And the top level gravity FFT one */ + if (e->s->periodic && (e->policy & engine_policy_self_gravity)) + scheduler_activate(&e->sched, e->s->grav_top_level); + if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -3476,7 +3577,10 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements, struct cell *c = &cells[ind]; if (c != NULL && c->nodeID == e->nodeID) { /* Drift all the particles */ - cell_drift_particles(c, e); + cell_drift_part(c, e); + + /* Drift all the g-particles */ + cell_drift_gpart(c, e); /* Drift the multipoles */ if (e->policy & engine_policy_self_gravity) @@ -3502,10 +3606,15 @@ void engine_drift_all(struct engine *e) { threadpool_map(&e->threadpool, engine_do_drift_all_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); + /* Synchronize particle positions */ + space_synchronize_particle_positions(e->s); + #ifdef SWIFT_DEBUG_CHECKS /* Check that all cells have been drifted to the current time. */ space_check_drift_point(e->s, e->ti_current, e->policy & engine_policy_self_gravity); + part_verify_links(e->s->parts, e->s->gparts, e->s->sparts, e->s->nr_parts, + e->s->nr_gparts, e->s->nr_sparts, e->verbose); #endif if (e->verbose) @@ -3804,7 +3913,7 @@ void engine_dump_snapshot(struct engine *e) { e->policy & engine_policy_self_gravity); /* Be verbose about this */ - message("writing snapshot at t=%e.", e->time); + if (e->nodeID == 0) message("writing snapshot at t=%e.", e->time); #else if (e->verbose) message("writing snapshot at t=%e.", e->time); #endif diff --git a/src/equation_of_state.h b/src/equation_of_state.h index 28c97c7b96b778c7bbb7bcbfb6ffe682ce54ba22..e51ed99519dc9c418e34789fcce95b5f28d69a99 100644 --- a/src/equation_of_state.h +++ b/src/equation_of_state.h @@ -275,7 +275,7 @@ gas_pressure_from_internal_energy(float density, float u) { */ __attribute__((always_inline)) INLINE static float gas_internal_energy_from_pressure(float density, float pressure) { - return const_isothermal_energy; + return const_isothermal_internal_energy; } /** diff --git a/src/gravity_properties.c b/src/gravity_properties.c index 7b9b8cd7c35f8fa9b21ff34ce2589b5d45ce8393..b1098888b96cdef2205ed513e60a3799c63e8b9f 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -69,11 +69,9 @@ void gravity_props_print(const struct gravity_props *p) { message("Self-gravity softening: epsilon=%.4f (Plummer equivalent: %.4f)", p->epsilon, p->epsilon / 3.); - if (p->a_smooth != gravity_props_default_a_smooth) - message("Self-gravity MM smoothing-scale: a_smooth=%f", p->a_smooth); + message("Self-gravity MM smoothing-scale: a_smooth=%f", p->a_smooth); - if (p->r_cut != gravity_props_default_r_cut) - message("Self-gravity MM cut-off: r_cut=%f", p->r_cut); + message("Self-gravity MM cut-off: r_cut=%f", p->r_cut); } #if defined(HAVE_HDF5) diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 747c81a8e64c18a06b04160cfab326a3521c5901..91626749a89ede387547b6351dce59fa3569307a 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -293,7 +293,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration( p->force.h_dt = 0.0f; /* Reset maximal signal velocity */ - p->force.v_sig = 0.0f; + p->force.v_sig = p->force.soundspeed; } /** diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h index 2e340a03b99ae51bc49a2e57456f4d6838d62f21..6d39c54d2ddc3571ac34c54fc9eede6f7dee6ac5 100644 --- a/src/hydro/Gizmo/hydro.h +++ b/src/hydro/Gizmo/hydro.h @@ -2,6 +2,7 @@ /******************************************************************************* * This file is part of SWIFT. * Coypright (c) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2016, 2017 Bert Vandenbroucke (bert.vandenbroucke@gmail.com) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -24,9 +25,13 @@ #include "equation_of_state.h" #include "hydro_gradients.h" #include "hydro_space.h" +#include "hydro_unphysical.h" +#include "hydro_velocities.h" #include "minmax.h" #include "riemann.h" +//#define GIZMO_LLOYD_ITERATION + /** * @brief Computes the hydro time-step of a given particle * @@ -40,6 +45,10 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( const float CFL_condition = hydro_properties->CFL_condition; +#ifdef GIZMO_LLOYD_ITERATION + return CFL_condition; +#endif + if (p->timestepvars.vmax == 0.) { /* vmax can be zero in vacuum cells that only have vacuum neighbours */ /* in this case, the time step should be limited by the maximally @@ -47,7 +56,9 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( the time step to a very large value */ return FLT_MAX; } else { - return CFL_condition * p->h / fabsf(p->timestepvars.vmax); + const float psize = powf(p->geometry.volume / hydro_dimension_unit_sphere, + hydro_dimension_inv); + return 2. * CFL_condition * psize / fabsf(p->timestepvars.vmax); } } @@ -128,16 +139,27 @@ __attribute__((always_inline)) INLINE static void hydro_first_init_part( p->conserved.momentum[2] * p->primitives.v[2]); #endif -#if defined(GIZMO_FIX_PARTICLES) - /* make sure the particles are initially at rest */ +#ifdef GIZMO_LLOYD_ITERATION + /* overwrite all variables to make sure they have safe values */ + p->primitives.rho = 1.; + p->primitives.v[0] = 0.; + p->primitives.v[1] = 0.; + p->primitives.v[2] = 0.; + p->primitives.P = 1.; + + p->conserved.mass = 1.; + p->conserved.momentum[0] = 0.; + p->conserved.momentum[1] = 0.; + p->conserved.momentum[2] = 0.; + p->conserved.energy = 1.; + p->v[0] = 0.; p->v[1] = 0.; p->v[2] = 0.; #endif - xp->v_full[0] = p->v[0]; - xp->v_full[1] = p->v[1]; - xp->v_full[2] = p->v[2]; + /* initialize the particle velocity based on the primitive fluid velocity */ + hydro_velocities_init(p, xp); /* we cannot initialize wcorr in init_part, as init_part gets called every time the density loop is repeated, and the whole point of storing wcorr @@ -169,6 +191,9 @@ __attribute__((always_inline)) INLINE static void hydro_init_part( p->geometry.matrix_E[2][0] = 0.0f; p->geometry.matrix_E[2][1] = 0.0f; p->geometry.matrix_E[2][2] = 0.0f; + p->geometry.centroid[0] = 0.0f; + p->geometry.centroid[1] = 0.0f; + p->geometry.centroid[2] = 0.0f; p->geometry.Atot = 0.0f; /* Set the active flag to active. */ @@ -226,6 +251,14 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( p->geometry.matrix_E[2][1] = ihdim * p->geometry.matrix_E[2][1]; p->geometry.matrix_E[2][2] = ihdim * p->geometry.matrix_E[2][2]; + p->geometry.centroid[0] *= kernel_norm; + p->geometry.centroid[1] *= kernel_norm; + p->geometry.centroid[2] *= kernel_norm; + + p->geometry.centroid[0] /= p->density.wcount; + p->geometry.centroid[1] /= p->density.wcount; + p->geometry.centroid[2] /= p->density.wcount; + /* Check the condition number to see if we have a stable geometry. */ float condition_number_E = 0.0f; int i, j; @@ -249,12 +282,18 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( float condition_number = hydro_dimension_inv * sqrtf(condition_number_E * condition_number_Einv); - if (condition_number > 100.0f) { - // error("Condition number larger than 100!"); - // message("Condition number too large: %g (p->id: %llu)!", - // condition_number, p->id); + if (condition_number > const_gizmo_max_condition_number && + p->density.wcorr > const_gizmo_min_wcorr) { +#ifdef GIZMO_PATHOLOGICAL_ERROR + error("Condition number larger than %g (%g)!", + const_gizmo_max_condition_number, condition_number); +#endif +#ifdef GIZMO_PATHOLOGICAL_WARNING + message("Condition number too large: %g (> %g, p->id: %llu)!", + condition_number, const_gizmo_max_condition_number, p->id); +#endif /* add a correction to the number of neighbours for this particle */ - p->density.wcorr *= 0.75; + p->density.wcorr *= const_gizmo_w_correction_factor; } hydro_gradients_init(p); @@ -264,8 +303,8 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( const float m = p->conserved.mass; #ifdef SWIFT_DEBUG_CHECKS - if (m == 0.) { - error("Mass is 0!"); + if (m < 0.) { + error("Mass is negative!"); } if (volume == 0.) { @@ -278,15 +317,20 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( momentum[1] = p->conserved.momentum[1]; momentum[2] = p->conserved.momentum[2]; p->primitives.rho = m / volume; - p->primitives.v[0] = momentum[0] / m; - p->primitives.v[1] = momentum[1] / m; - p->primitives.v[2] = momentum[2] / m; + if (m == 0.) { + p->primitives.v[0] = 0.; + p->primitives.v[1] = 0.; + p->primitives.v[2] = 0.; + } else { + p->primitives.v[0] = momentum[0] / m; + p->primitives.v[1] = momentum[1] / m; + p->primitives.v[2] = momentum[2] / m; + } #ifdef EOS_ISOTHERMAL_GAS /* although the pressure is not formally used anywhere if an isothermal eos has been selected, we still make sure it is set to the correct value */ - p->primitives.P = const_isothermal_soundspeed * const_isothermal_soundspeed * - p->primitives.rho; + p->primitives.P = gas_pressure_from_internal_energy(p->primitives.rho, 0.); #else float energy = p->conserved.energy; @@ -304,12 +348,17 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( #endif /* sanity checks */ - /* it would probably be safer to throw a warning if netive densities or - pressures occur */ - if (p->primitives.rho < 0.0f || p->primitives.P < 0.0f) { - p->primitives.rho = 0.0f; - p->primitives.P = 0.0f; - } + gizmo_check_physical_quantity("density", p->primitives.rho); + gizmo_check_physical_quantity("pressure", p->primitives.P); + +#ifdef GIZMO_LLOYD_ITERATION + /* overwrite primitive variables to make sure they still have safe values */ + p->primitives.rho = 1.; + p->primitives.v[0] = 0.; + p->primitives.v[1] = 0.; + p->primitives.v[2] = 0.; + p->primitives.P = 1.; +#endif /* Add a correction factor to wcount (to force a neighbour number increase if the geometry matrix is close to singular) */ @@ -330,8 +379,6 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( * * @param p The particle to act upon. * @param xp The extended particle data to act upon. - * @param ti_current Current integer time. - * @param timeBase Conversion factor between integer time and physical time. */ __attribute__((always_inline)) INLINE static void hydro_prepare_force( struct part* restrict p, struct xpart* restrict xp) { @@ -340,10 +387,7 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force( p->timestepvars.vmax = 0.0f; /* Set the actual velocity of the particle */ - /* if GIZMO_FIX_PARTICLES has been selected, v_full will always be zero */ - p->force.v_full[0] = xp->v_full[0]; - p->force.v_full[1] = xp->v_full[1]; - p->force.v_full[2] = xp->v_full[2]; + hydro_velocities_prepare_force(p, xp); } /** @@ -364,6 +408,11 @@ __attribute__((always_inline)) INLINE static void hydro_end_gradient( p->gravity.mflux[0] = 0.0f; p->gravity.mflux[1] = 0.0f; p->gravity.mflux[2] = 0.0f; + +#ifdef GIZMO_LLOYD_ITERATION + /* reset the gradients to zero, as we don't want them */ + hydro_gradients_init(p); +#endif } /** @@ -422,6 +471,10 @@ __attribute__((always_inline)) INLINE static void hydro_convert_quantities( __attribute__((always_inline)) INLINE static void hydro_predict_extra( struct part* p, struct xpart* xp, float dt) { +#ifdef GIZMO_LLOYD_ITERATION + return; +#endif + const float h_inv = 1.0f / p->h; /* Predict smoothing length */ @@ -432,8 +485,9 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra( else h_corr = expf(w1); - /* Limit the smoothing length correction. */ - if (h_corr < 2.0f) { + /* Limit the smoothing length correction (and make sure it is always + positive). */ + if (h_corr < 2.0f && h_corr > 0.) { p->h *= h_corr; } @@ -483,22 +537,13 @@ __attribute__((always_inline)) INLINE static void hydro_end_force( /* set the variables that are used to drift the primitive variables */ - /* Add normalization to h_dt. */ - p->force.h_dt *= p->h * hydro_dimension_inv; - - if (p->force.dt) { + if (p->force.dt > 0.) { p->du_dt = p->conserved.flux.energy / p->force.dt; } else { p->du_dt = 0.0f; } -#if defined(GIZMO_FIX_PARTICLES) - p->du_dt = 0.0f; - - /* disable the smoothing length update, since the smoothing lengths should - stay the same for all steps (particles don't move) */ - p->force.h_dt = 0.0f; -#endif + hydro_velocities_end_force(p); } /** @@ -527,7 +572,12 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( p->conserved.energy += p->conserved.flux.energy; #endif + gizmo_check_physical_quantity("mass", p->conserved.mass); + gizmo_check_physical_quantity("energy", p->conserved.energy); + #ifdef SWIFT_DEBUG_CHECKS + /* Note that this check will only have effect if no GIZMO_UNPHYSICAL option + was selected. */ if (p->conserved.mass < 0.) { error( "Negative mass after conserved variables update (mass: %g, dmass: %g)!", @@ -535,7 +585,10 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( } if (p->conserved.energy < 0.) { - error("Negative energy after conserved variables update!"); + error( + "Negative energy after conserved variables update (energy: %g, " + "denergy: %g)!", + p->conserved.energy, p->conserved.flux.energy); } #endif @@ -549,7 +602,7 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( a_grav[2] = p->gpart->a_grav[2]; /* Store the gravitational acceleration for later use. */ - /* This is currently only used for output purposes. */ + /* This is used for the prediction step. */ p->gravity.old_a[0] = a_grav[0]; p->gravity.old_a[1] = a_grav[1]; p->gravity.old_a[2] = a_grav[2]; @@ -564,7 +617,7 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( p->conserved.momentum[1] += dt * p->conserved.mass * a_grav[1]; p->conserved.momentum[2] += dt * p->conserved.mass * a_grav[2]; -#if !defined(EOS_ISOTHERMAL_GAS) && defined(GIZMO_TOTAL_ENERGY) +#if !defined(EOS_ISOTHERMAL_GAS) /* This part still needs to be tested! */ p->conserved.energy += dt * (p->conserved.momentum[0] * a_grav[0] + p->conserved.momentum[1] * a_grav[1] + @@ -585,45 +638,25 @@ __attribute__((always_inline)) INLINE static void hydro_kick_extra( p->conserved.flux.momentum[2] = 0.0f; p->conserved.flux.energy = 0.0f; -#if defined(GIZMO_FIX_PARTICLES) - xp->v_full[0] = 0.; - xp->v_full[1] = 0.; - xp->v_full[2] = 0.; - - p->v[0] = 0.; - p->v[1] = 0.; - p->v[2] = 0.; - - if (p->gpart) { - p->gpart->v_full[0] = 0.; - p->gpart->v_full[1] = 0.; - p->gpart->v_full[2] = 0.; - } -#else - /* Set particle movement */ - if (p->conserved.mass > 0.) { - xp->v_full[0] = p->conserved.momentum[0] / p->conserved.mass; - xp->v_full[1] = p->conserved.momentum[1] / p->conserved.mass; - xp->v_full[2] = p->conserved.momentum[2] / p->conserved.mass; - } else { - /* vacuum particles don't move */ - xp->v_full[0] = 0.; - xp->v_full[1] = 0.; - xp->v_full[2] = 0.; - } + hydro_velocities_set(p, xp); + +#ifdef GIZMO_LLOYD_ITERATION + /* reset conserved variables to safe values */ + p->conserved.mass = 1.; + p->conserved.momentum[0] = 0.; + p->conserved.momentum[1] = 0.; + p->conserved.momentum[2] = 0.; + p->conserved.energy = 1.; + + /* set the particle velocities to the Lloyd velocities */ + /* note that centroid is the relative position of the centroid w.r.t. the + particle position (position - centroid) */ + xp->v_full[0] = -p->geometry.centroid[0] / p->force.dt; + xp->v_full[1] = -p->geometry.centroid[1] / p->force.dt; + xp->v_full[2] = -p->geometry.centroid[2] / p->force.dt; p->v[0] = xp->v_full[0]; p->v[1] = xp->v_full[1]; p->v[2] = xp->v_full[2]; - - /* Update gpart! */ - /* This is essential, as the gpart drift is done independently from the part - drift, and we don't want the gpart and the part to have different - positions! */ - if (p->gpart) { - p->gpart->v_full[0] = xp->v_full[0]; - p->gpart->v_full[1] = xp->v_full[1]; - p->gpart->v_full[2] = xp->v_full[2]; - } #endif /* reset wcorr */ diff --git a/src/hydro/Gizmo/hydro_gradients.h b/src/hydro/Gizmo/hydro_gradients.h index a5c1e9038d0d3de6896afe773e3193a2304a6b6b..5ad6d87619a7629a703a8b9c03d089e69ffbdf7d 100644 --- a/src/hydro/Gizmo/hydro_gradients.h +++ b/src/hydro/Gizmo/hydro_gradients.h @@ -22,6 +22,7 @@ #define SWIFT_HYDRO_GRADIENTS_H #include "hydro_slope_limiters.h" +#include "hydro_unphysical.h" #include "riemann.h" #if defined(GRADIENTS_SPH) @@ -98,6 +99,7 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict( float xij_j[3]; int k; float xfac; + float a_grav_i[3], a_grav_j[3]; /* perform gradient reconstruction in space and time */ /* space */ @@ -139,37 +141,38 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict( pj->primitives.gradients.P[1] * xij_j[1] + pj->primitives.gradients.P[2] * xij_j[2]; + a_grav_i[0] = pi->gravity.old_a[0]; + a_grav_i[1] = pi->gravity.old_a[1]; + a_grav_i[2] = pi->gravity.old_a[2]; + + a_grav_i[0] += pi->gravity.grad_a[0][0] * xij_i[0] + + pi->gravity.grad_a[0][1] * xij_i[1] + + pi->gravity.grad_a[0][2] * xij_i[2]; + a_grav_i[1] += pi->gravity.grad_a[1][0] * xij_i[0] + + pi->gravity.grad_a[1][1] * xij_i[1] + + pi->gravity.grad_a[1][2] * xij_i[2]; + a_grav_i[2] += pi->gravity.grad_a[2][0] * xij_i[0] + + pi->gravity.grad_a[2][1] * xij_i[1] + + pi->gravity.grad_a[2][2] * xij_i[2]; + + a_grav_j[0] = pj->gravity.old_a[0]; + a_grav_j[1] = pj->gravity.old_a[1]; + a_grav_j[2] = pj->gravity.old_a[2]; + + a_grav_j[0] += pj->gravity.grad_a[0][0] * xij_j[0] + + pj->gravity.grad_a[0][1] * xij_j[1] + + pj->gravity.grad_a[0][2] * xij_j[2]; + a_grav_j[1] += pj->gravity.grad_a[1][0] * xij_j[0] + + pj->gravity.grad_a[1][1] * xij_j[1] + + pj->gravity.grad_a[1][2] * xij_j[2]; + a_grav_j[2] += pj->gravity.grad_a[2][0] * xij_j[0] + + pj->gravity.grad_a[2][1] * xij_j[1] + + pj->gravity.grad_a[2][2] * xij_j[2]; + hydro_slope_limit_face(Wi, Wj, dWi, dWj, xij_i, xij_j, r); /* time */ if (Wi[0] > 0.0f) { -#ifdef EOS_ISOTHERMAL_GAS - dWi[0] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.rho[0] + - Wi[2] * pi->primitives.gradients.rho[1] + - Wi[3] * pi->primitives.gradients.rho[2] + - Wi[0] * (pi->primitives.gradients.v[0][0] + - pi->primitives.gradients.v[1][1] + - pi->primitives.gradients.v[2][2])); - dWi[1] -= 0.5 * mindt * - (Wi[1] * pi->primitives.gradients.v[0][0] + - Wi[2] * pi->primitives.gradients.v[0][1] + - Wi[3] * pi->primitives.gradients.v[0][2] + - const_isothermal_soundspeed * const_isothermal_soundspeed * - pi->primitives.gradients.rho[0] / Wi[0]); - dWi[2] -= 0.5 * mindt * - (Wi[1] * pi->primitives.gradients.v[1][0] + - Wi[2] * pi->primitives.gradients.v[1][1] + - Wi[3] * pi->primitives.gradients.v[1][2] + - const_isothermal_soundspeed * const_isothermal_soundspeed * - pi->primitives.gradients.rho[1] / Wi[0]); - dWi[3] -= 0.5 * mindt * - (Wi[1] * pi->primitives.gradients.v[2][0] + - Wi[2] * pi->primitives.gradients.v[2][1] + - Wi[3] * pi->primitives.gradients.v[2][2] + - const_isothermal_soundspeed * const_isothermal_soundspeed * - pi->primitives.gradients.rho[2] / Wi[0]); -/* we don't care about P in this case */ -#else dWi[0] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.rho[0] + Wi[2] * pi->primitives.gradients.rho[1] + Wi[3] * pi->primitives.gradients.rho[2] + @@ -195,36 +198,13 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict( hydro_gamma * Wi[4] * (pi->primitives.gradients.v[0][0] + pi->primitives.gradients.v[1][1] + pi->primitives.gradients.v[2][2])); -#endif + + dWi[1] += 0.5 * mindt * a_grav_i[0]; + dWi[2] += 0.5 * mindt * a_grav_i[1]; + dWi[3] += 0.5 * mindt * a_grav_i[2]; } if (Wj[0] > 0.0f) { -#ifdef EOS_ISOTHERMAL_GAS - dWj[0] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.rho[0] + - Wj[2] * pj->primitives.gradients.rho[1] + - Wj[3] * pj->primitives.gradients.rho[2] + - Wj[0] * (pj->primitives.gradients.v[0][0] + - pj->primitives.gradients.v[1][1] + - pj->primitives.gradients.v[2][2])); - dWj[1] -= 0.5 * mindt * - (Wj[1] * pj->primitives.gradients.v[0][0] + - Wj[2] * pj->primitives.gradients.v[0][1] + - Wj[3] * pj->primitives.gradients.v[0][2] + - const_isothermal_soundspeed * const_isothermal_soundspeed * - pj->primitives.gradients.rho[0] / Wj[0]); - dWj[2] -= 0.5 * mindt * - (Wj[1] * pj->primitives.gradients.v[1][0] + - Wj[2] * pj->primitives.gradients.v[1][1] + - Wj[3] * pj->primitives.gradients.v[1][2] + - const_isothermal_soundspeed * const_isothermal_soundspeed * - pj->primitives.gradients.rho[1] / Wj[0]); - dWj[3] -= 0.5 * mindt * - (Wj[1] * pj->primitives.gradients.v[2][0] + - Wj[2] * pj->primitives.gradients.v[2][1] + - Wj[3] * pj->primitives.gradients.v[2][2] + - const_isothermal_soundspeed * const_isothermal_soundspeed * - pj->primitives.gradients.rho[2] / Wj[0]); -#else dWj[0] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.rho[0] + Wj[2] * pj->primitives.gradients.rho[1] + Wj[3] * pj->primitives.gradients.rho[2] + @@ -250,36 +230,28 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_predict( hydro_gamma * Wj[4] * (pj->primitives.gradients.v[0][0] + pj->primitives.gradients.v[1][1] + pj->primitives.gradients.v[2][2])); -#endif - } - if (-dWi[0] > Wi[0]) { - Wi[0] = 0.0f; - } else { - Wi[0] += dWi[0]; + dWj[1] += 0.5 * mindt * a_grav_j[0]; + dWj[2] += 0.5 * mindt * a_grav_j[1]; + dWj[3] += 0.5 * mindt * a_grav_j[2]; } + + Wi[0] += dWi[0]; Wi[1] += dWi[1]; Wi[2] += dWi[2]; Wi[3] += dWi[3]; - if (-dWi[4] > Wi[4]) { - Wi[4] = 0.0f; - } else { - Wi[4] += dWi[4]; - } + Wi[4] += dWi[4]; - if (-dWj[0] > Wj[0]) { - Wj[0] = 0.0f; - } else { - Wj[0] += dWj[0]; - } + Wj[0] += dWj[0]; Wj[1] += dWj[1]; Wj[2] += dWj[2]; Wj[3] += dWj[3]; - if (-dWj[4] > Wj[4]) { - Wj[4] = 0.0f; - } else { - Wj[4] += dWj[4]; - } + Wj[4] += dWj[4]; + + gizmo_check_physical_quantity("density", Wi[0]); + gizmo_check_physical_quantity("pressure", Wi[4]); + gizmo_check_physical_quantity("density", Wj[0]); + gizmo_check_physical_quantity("pressure", Wj[4]); } #endif // SWIFT_HYDRO_GRADIENTS_H diff --git a/src/hydro/Gizmo/hydro_gradients_gizmo.h b/src/hydro/Gizmo/hydro_gradients_gizmo.h index aa6e4406b94e7a5cafcd0ca556162476003477de..ee3ad6919f81f042ceacc5db8b4e818d63c90266 100644 --- a/src/hydro/Gizmo/hydro_gradients_gizmo.h +++ b/src/hydro/Gizmo/hydro_gradients_gizmo.h @@ -45,6 +45,18 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_init( p->primitives.gradients.P[1] = 0.0f; p->primitives.gradients.P[2] = 0.0f; + p->gravity.grad_a[0][0] = 0.0f; + p->gravity.grad_a[0][1] = 0.0f; + p->gravity.grad_a[0][2] = 0.0f; + + p->gravity.grad_a[1][0] = 0.0f; + p->gravity.grad_a[1][1] = 0.0f; + p->gravity.grad_a[1][2] = 0.0f; + + p->gravity.grad_a[2][0] = 0.0f; + p->gravity.grad_a[2][1] = 0.0f; + p->gravity.grad_a[2][2] = 0.0f; + hydro_slope_limit_cell_init(p); } @@ -93,56 +105,146 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect( xi = r * hi_inv; kernel_deval(xi, &wi, &wi_dx); - /* Compute gradients for pi */ - /* there is a sign difference w.r.t. eqn. (6) because of the inverse - * definition of dx */ - pi->primitives.gradients.rho[0] += - (Wi[0] - Wj[0]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.rho[1] += - (Wi[0] - Wj[0]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.rho[2] += - (Wi[0] - Wj[0]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); - - pi->primitives.gradients.v[0][0] += - (Wi[1] - Wj[1]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.v[0][1] += - (Wi[1] - Wj[1]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.v[0][2] += - (Wi[1] - Wj[1]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); - pi->primitives.gradients.v[1][0] += - (Wi[2] - Wj[2]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.v[1][1] += - (Wi[2] - Wj[2]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.v[1][2] += - (Wi[2] - Wj[2]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); - pi->primitives.gradients.v[2][0] += - (Wi[3] - Wj[3]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.v[2][1] += - (Wi[3] - Wj[3]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.v[2][2] += - (Wi[3] - Wj[3]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); - - pi->primitives.gradients.P[0] += - (Wi[4] - Wj[4]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.P[1] += - (Wi[4] - Wj[4]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.P[2] += - (Wi[4] - Wj[4]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + if (pi->density.wcorr > const_gizmo_min_wcorr) { + /* Compute gradients for pi */ + /* there is a sign difference w.r.t. eqn. (6) because of the inverse + * definition of dx */ + pi->primitives.gradients.rho[0] += + (Wi[0] - Wj[0]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.rho[1] += + (Wi[0] - Wj[0]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.rho[2] += + (Wi[0] - Wj[0]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->primitives.gradients.v[0][0] += + (Wi[1] - Wj[1]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.v[0][1] += + (Wi[1] - Wj[1]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.v[0][2] += + (Wi[1] - Wj[1]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + pi->primitives.gradients.v[1][0] += + (Wi[2] - Wj[2]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.v[1][1] += + (Wi[2] - Wj[2]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.v[1][2] += + (Wi[2] - Wj[2]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + pi->primitives.gradients.v[2][0] += + (Wi[3] - Wj[3]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.v[2][1] += + (Wi[3] - Wj[3]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.v[2][2] += + (Wi[3] - Wj[3]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->primitives.gradients.P[0] += + (Wi[4] - Wj[4]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.P[1] += + (Wi[4] - Wj[4]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.P[2] += + (Wi[4] - Wj[4]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->gravity.grad_a[0][0] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->gravity.grad_a[0][1] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->gravity.grad_a[0][2] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->gravity.grad_a[1][0] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->gravity.grad_a[1][1] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->gravity.grad_a[1][2] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->gravity.grad_a[2][0] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->gravity.grad_a[2][1] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->gravity.grad_a[2][2] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + } else { + /* The gradient matrix was not well-behaved, switch to SPH gradients */ + + pi->primitives.gradients.rho[0] -= + wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r; + pi->primitives.gradients.rho[1] -= + wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r; + pi->primitives.gradients.rho[2] -= + wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r; + + pi->primitives.gradients.v[0][0] -= + wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + pi->primitives.gradients.v[0][1] -= + wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + pi->primitives.gradients.v[0][2] -= + wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + + pi->primitives.gradients.v[1][0] -= + wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + pi->primitives.gradients.v[1][1] -= + wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + pi->primitives.gradients.v[1][2] -= + wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + + pi->primitives.gradients.v[2][0] -= + wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + pi->primitives.gradients.v[2][1] -= + wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + pi->primitives.gradients.v[2][2] -= + wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + + pi->primitives.gradients.P[0] -= + wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r; + pi->primitives.gradients.P[1] -= + wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r; + pi->primitives.gradients.P[2] -= + wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r; + + pi->gravity.grad_a[0][0] -= + wi_dx * dx[0] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + pi->gravity.grad_a[0][1] -= + wi_dx * dx[1] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + pi->gravity.grad_a[0][2] -= + wi_dx * dx[2] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + + pi->gravity.grad_a[1][0] -= + wi_dx * dx[0] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + pi->gravity.grad_a[1][1] -= + wi_dx * dx[1] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + pi->gravity.grad_a[1][2] -= + wi_dx * dx[2] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + + pi->gravity.grad_a[2][0] -= + wi_dx * dx[0] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + pi->gravity.grad_a[2][1] -= + wi_dx * dx[1] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + pi->gravity.grad_a[2][2] -= + wi_dx * dx[2] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + } hydro_slope_limit_cell_collect(pi, pj, r); @@ -151,57 +253,146 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_collect( xj = r * hj_inv; kernel_deval(xj, &wj, &wj_dx); - /* Compute gradients for pj */ - /* there is no sign difference w.r.t. eqn. (6) because dx is now what we - * want - * it to be */ - pj->primitives.gradients.rho[0] += - (Wi[0] - Wj[0]) * wj * - (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); - pj->primitives.gradients.rho[1] += - (Wi[0] - Wj[0]) * wj * - (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); - pj->primitives.gradients.rho[2] += - (Wi[0] - Wj[0]) * wj * - (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); - - pj->primitives.gradients.v[0][0] += - (Wi[1] - Wj[1]) * wj * - (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); - pj->primitives.gradients.v[0][1] += - (Wi[1] - Wj[1]) * wj * - (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); - pj->primitives.gradients.v[0][2] += - (Wi[1] - Wj[1]) * wj * - (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); - pj->primitives.gradients.v[1][0] += - (Wi[2] - Wj[2]) * wj * - (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); - pj->primitives.gradients.v[1][1] += - (Wi[2] - Wj[2]) * wj * - (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); - pj->primitives.gradients.v[1][2] += - (Wi[2] - Wj[2]) * wj * - (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); - pj->primitives.gradients.v[2][0] += - (Wi[3] - Wj[3]) * wj * - (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); - pj->primitives.gradients.v[2][1] += - (Wi[3] - Wj[3]) * wj * - (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); - pj->primitives.gradients.v[2][2] += - (Wi[3] - Wj[3]) * wj * - (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); - - pj->primitives.gradients.P[0] += - (Wi[4] - Wj[4]) * wj * - (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); - pj->primitives.gradients.P[1] += - (Wi[4] - Wj[4]) * wj * - (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); - pj->primitives.gradients.P[2] += - (Wi[4] - Wj[4]) * wj * - (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + if (pj->density.wcorr > const_gizmo_min_wcorr) { + /* Compute gradients for pj */ + /* there is no sign difference w.r.t. eqn. (6) because dx is now what we + * want + * it to be */ + pj->primitives.gradients.rho[0] += + (Wi[0] - Wj[0]) * wj * + (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); + pj->primitives.gradients.rho[1] += + (Wi[0] - Wj[0]) * wj * + (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); + pj->primitives.gradients.rho[2] += + (Wi[0] - Wj[0]) * wj * + (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + + pj->primitives.gradients.v[0][0] += + (Wi[1] - Wj[1]) * wj * + (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); + pj->primitives.gradients.v[0][1] += + (Wi[1] - Wj[1]) * wj * + (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); + pj->primitives.gradients.v[0][2] += + (Wi[1] - Wj[1]) * wj * + (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + pj->primitives.gradients.v[1][0] += + (Wi[2] - Wj[2]) * wj * + (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); + pj->primitives.gradients.v[1][1] += + (Wi[2] - Wj[2]) * wj * + (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); + pj->primitives.gradients.v[1][2] += + (Wi[2] - Wj[2]) * wj * + (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + pj->primitives.gradients.v[2][0] += + (Wi[3] - Wj[3]) * wj * + (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); + pj->primitives.gradients.v[2][1] += + (Wi[3] - Wj[3]) * wj * + (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); + pj->primitives.gradients.v[2][2] += + (Wi[3] - Wj[3]) * wj * + (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + + pj->primitives.gradients.P[0] += + (Wi[4] - Wj[4]) * wj * + (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); + pj->primitives.gradients.P[1] += + (Wi[4] - Wj[4]) * wj * + (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); + pj->primitives.gradients.P[2] += + (Wi[4] - Wj[4]) * wj * + (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + + pj->gravity.grad_a[0][0] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wj * + (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); + pj->gravity.grad_a[0][1] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wj * + (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); + pj->gravity.grad_a[0][2] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wj * + (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + + pj->gravity.grad_a[1][0] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wj * + (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); + pj->gravity.grad_a[1][1] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wj * + (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); + pj->gravity.grad_a[1][2] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wj * + (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + + pj->gravity.grad_a[2][0] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wj * + (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]); + pj->gravity.grad_a[2][1] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wj * + (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]); + pj->gravity.grad_a[2][2] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wj * + (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]); + } else { + /* SPH gradients */ + + pj->primitives.gradients.rho[0] -= + wj_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r; + pj->primitives.gradients.rho[1] -= + wj_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r; + pj->primitives.gradients.rho[2] -= + wj_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r; + + pj->primitives.gradients.v[0][0] -= + wj_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + pj->primitives.gradients.v[0][1] -= + wj_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + pj->primitives.gradients.v[0][2] -= + wj_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + + pj->primitives.gradients.v[1][0] -= + wj_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + pj->primitives.gradients.v[1][1] -= + wj_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + pj->primitives.gradients.v[1][2] -= + wj_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + pj->primitives.gradients.v[2][0] -= + wj_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + pj->primitives.gradients.v[2][1] -= + wj_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + pj->primitives.gradients.v[2][2] -= + wj_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + + pj->primitives.gradients.P[0] -= + wj_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r; + pj->primitives.gradients.P[1] -= + wj_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r; + pj->primitives.gradients.P[2] -= + wj_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r; + + pj->gravity.grad_a[0][0] -= + wj_dx * dx[0] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + pj->gravity.grad_a[0][1] -= + wj_dx * dx[1] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + pj->gravity.grad_a[0][2] -= + wj_dx * dx[2] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + + pj->gravity.grad_a[1][0] -= + wj_dx * dx[0] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + pj->gravity.grad_a[1][1] -= + wj_dx * dx[1] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + pj->gravity.grad_a[1][2] -= + wj_dx * dx[2] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + + pj->gravity.grad_a[2][0] -= + wj_dx * dx[0] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + pj->gravity.grad_a[2][1] -= + wj_dx * dx[1] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + pj->gravity.grad_a[2][2] -= + wj_dx * dx[2] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + } hydro_slope_limit_cell_collect(pj, pi, r); } @@ -250,56 +441,145 @@ hydro_gradients_nonsym_collect(float r2, float *dx, float hi, float hj, xi = r * hi_inv; kernel_deval(xi, &wi, &wi_dx); - /* Compute gradients for pi */ - /* there is a sign difference w.r.t. eqn. (6) because of the inverse - * definition of dx */ - pi->primitives.gradients.rho[0] += - (Wi[0] - Wj[0]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.rho[1] += - (Wi[0] - Wj[0]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.rho[2] += - (Wi[0] - Wj[0]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); - - pi->primitives.gradients.v[0][0] += - (Wi[1] - Wj[1]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.v[0][1] += - (Wi[1] - Wj[1]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.v[0][2] += - (Wi[1] - Wj[1]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); - pi->primitives.gradients.v[1][0] += - (Wi[2] - Wj[2]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.v[1][1] += - (Wi[2] - Wj[2]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.v[1][2] += - (Wi[2] - Wj[2]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); - pi->primitives.gradients.v[2][0] += - (Wi[3] - Wj[3]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.v[2][1] += - (Wi[3] - Wj[3]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.v[2][2] += - (Wi[3] - Wj[3]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); - - pi->primitives.gradients.P[0] += - (Wi[4] - Wj[4]) * wi * - (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); - pi->primitives.gradients.P[1] += - (Wi[4] - Wj[4]) * wi * - (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); - pi->primitives.gradients.P[2] += - (Wi[4] - Wj[4]) * wi * - (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + if (pi->density.wcorr > const_gizmo_min_wcorr) { + /* Compute gradients for pi */ + /* there is a sign difference w.r.t. eqn. (6) because of the inverse + * definition of dx */ + pi->primitives.gradients.rho[0] += + (Wi[0] - Wj[0]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.rho[1] += + (Wi[0] - Wj[0]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.rho[2] += + (Wi[0] - Wj[0]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->primitives.gradients.v[0][0] += + (Wi[1] - Wj[1]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.v[0][1] += + (Wi[1] - Wj[1]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.v[0][2] += + (Wi[1] - Wj[1]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + pi->primitives.gradients.v[1][0] += + (Wi[2] - Wj[2]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.v[1][1] += + (Wi[2] - Wj[2]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.v[1][2] += + (Wi[2] - Wj[2]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + pi->primitives.gradients.v[2][0] += + (Wi[3] - Wj[3]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.v[2][1] += + (Wi[3] - Wj[3]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.v[2][2] += + (Wi[3] - Wj[3]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->primitives.gradients.P[0] += + (Wi[4] - Wj[4]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->primitives.gradients.P[1] += + (Wi[4] - Wj[4]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->primitives.gradients.P[2] += + (Wi[4] - Wj[4]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->gravity.grad_a[0][0] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->gravity.grad_a[0][1] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->gravity.grad_a[0][2] += + (pi->gravity.old_a[0] - pj->gravity.old_a[0]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->gravity.grad_a[1][0] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->gravity.grad_a[1][1] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->gravity.grad_a[1][2] += + (pi->gravity.old_a[1] - pj->gravity.old_a[1]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + + pi->gravity.grad_a[2][0] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi * + (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]); + pi->gravity.grad_a[2][1] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi * + (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]); + pi->gravity.grad_a[2][2] += + (pi->gravity.old_a[2] - pj->gravity.old_a[2]) * wi * + (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]); + } else { + /* Gradient matrix is not well-behaved, switch to SPH gradients */ + + pi->primitives.gradients.rho[0] -= + wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r; + pi->primitives.gradients.rho[1] -= + wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r; + pi->primitives.gradients.rho[2] -= + wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r; + + pi->primitives.gradients.v[0][0] -= + wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + pi->primitives.gradients.v[0][1] -= + wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + pi->primitives.gradients.v[0][2] -= + wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r; + pi->primitives.gradients.v[1][0] -= + wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + pi->primitives.gradients.v[1][1] -= + wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + pi->primitives.gradients.v[1][2] -= + wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r; + + pi->primitives.gradients.v[2][0] -= + wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + pi->primitives.gradients.v[2][1] -= + wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + pi->primitives.gradients.v[2][2] -= + wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r; + + pi->primitives.gradients.P[0] -= + wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r; + pi->primitives.gradients.P[1] -= + wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r; + pi->primitives.gradients.P[2] -= + wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r; + + pi->gravity.grad_a[0][0] -= + wi_dx * dx[0] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + pi->gravity.grad_a[0][1] -= + wi_dx * dx[1] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + pi->gravity.grad_a[0][2] -= + wi_dx * dx[2] * (pi->gravity.old_a[0] - pj->gravity.old_a[0]) / r; + + pi->gravity.grad_a[1][0] -= + wi_dx * dx[0] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + pi->gravity.grad_a[1][1] -= + wi_dx * dx[1] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + pi->gravity.grad_a[1][2] -= + wi_dx * dx[2] * (pi->gravity.old_a[1] - pj->gravity.old_a[1]) / r; + + pi->gravity.grad_a[2][0] -= + wi_dx * dx[0] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + pi->gravity.grad_a[2][1] -= + wi_dx * dx[1] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + pi->gravity.grad_a[2][2] -= + wi_dx * dx[2] * (pi->gravity.old_a[2] - pj->gravity.old_a[2]) / r; + } hydro_slope_limit_cell_collect(pi, pj, r); } @@ -319,23 +599,73 @@ __attribute__((always_inline)) INLINE static void hydro_gradients_finalize( ih = 1.0f / h; const float ihdim = pow_dimension(ih); - p->primitives.gradients.rho[0] *= ihdim; - p->primitives.gradients.rho[1] *= ihdim; - p->primitives.gradients.rho[2] *= ihdim; - - p->primitives.gradients.v[0][0] *= ihdim; - p->primitives.gradients.v[0][1] *= ihdim; - p->primitives.gradients.v[0][2] *= ihdim; - p->primitives.gradients.v[1][0] *= ihdim; - p->primitives.gradients.v[1][1] *= ihdim; - p->primitives.gradients.v[1][2] *= ihdim; - p->primitives.gradients.v[2][0] *= ihdim; - p->primitives.gradients.v[2][1] *= ihdim; - p->primitives.gradients.v[2][2] *= ihdim; - - p->primitives.gradients.P[0] *= ihdim; - p->primitives.gradients.P[1] *= ihdim; - p->primitives.gradients.P[2] *= ihdim; + if (p->density.wcorr > const_gizmo_min_wcorr) { + p->primitives.gradients.rho[0] *= ihdim; + p->primitives.gradients.rho[1] *= ihdim; + p->primitives.gradients.rho[2] *= ihdim; + + p->primitives.gradients.v[0][0] *= ihdim; + p->primitives.gradients.v[0][1] *= ihdim; + p->primitives.gradients.v[0][2] *= ihdim; + p->primitives.gradients.v[1][0] *= ihdim; + p->primitives.gradients.v[1][1] *= ihdim; + p->primitives.gradients.v[1][2] *= ihdim; + p->primitives.gradients.v[2][0] *= ihdim; + p->primitives.gradients.v[2][1] *= ihdim; + p->primitives.gradients.v[2][2] *= ihdim; + + p->primitives.gradients.P[0] *= ihdim; + p->primitives.gradients.P[1] *= ihdim; + p->primitives.gradients.P[2] *= ihdim; + + p->gravity.grad_a[0][0] *= ihdim; + p->gravity.grad_a[0][1] *= ihdim; + p->gravity.grad_a[0][2] *= ihdim; + + p->gravity.grad_a[1][0] *= ihdim; + p->gravity.grad_a[1][1] *= ihdim; + p->gravity.grad_a[1][2] *= ihdim; + + p->gravity.grad_a[2][0] *= ihdim; + p->gravity.grad_a[2][1] *= ihdim; + p->gravity.grad_a[2][2] *= ihdim; + } else { + const float ihdimp1 = pow_dimension_plus_one(ih); + + float volume = p->geometry.volume; + + /* finalize gradients by multiplying with volume */ + p->primitives.gradients.rho[0] *= ihdimp1 * volume; + p->primitives.gradients.rho[1] *= ihdimp1 * volume; + p->primitives.gradients.rho[2] *= ihdimp1 * volume; + + p->primitives.gradients.v[0][0] *= ihdimp1 * volume; + p->primitives.gradients.v[0][1] *= ihdimp1 * volume; + p->primitives.gradients.v[0][2] *= ihdimp1 * volume; + + p->primitives.gradients.v[1][0] *= ihdimp1 * volume; + p->primitives.gradients.v[1][1] *= ihdimp1 * volume; + p->primitives.gradients.v[1][2] *= ihdimp1 * volume; + p->primitives.gradients.v[2][0] *= ihdimp1 * volume; + p->primitives.gradients.v[2][1] *= ihdimp1 * volume; + p->primitives.gradients.v[2][2] *= ihdimp1 * volume; + + p->primitives.gradients.P[0] *= ihdimp1 * volume; + p->primitives.gradients.P[1] *= ihdimp1 * volume; + p->primitives.gradients.P[2] *= ihdimp1 * volume; + + p->gravity.grad_a[0][0] *= ihdimp1 * volume; + p->gravity.grad_a[0][1] *= ihdimp1 * volume; + p->gravity.grad_a[0][2] *= ihdimp1 * volume; + + p->gravity.grad_a[1][0] *= ihdimp1 * volume; + p->gravity.grad_a[1][1] *= ihdimp1 * volume; + p->gravity.grad_a[1][2] *= ihdimp1 * volume; + + p->gravity.grad_a[2][0] *= ihdimp1 * volume; + p->gravity.grad_a[2][1] *= ihdimp1 * volume; + p->gravity.grad_a[2][2] *= ihdimp1 * volume; + } hydro_slope_limit_cell(p); } diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h index d707e0ee1b5707086393ea206ea9f0f60f9c1853..8798dc859a790a83ab7a3b6f1709b1302f574581 100644 --- a/src/hydro/Gizmo/hydro_iact.h +++ b/src/hydro/Gizmo/hydro_iact.h @@ -23,6 +23,8 @@ #include "hydro_gradients.h" #include "riemann.h" +#define GIZMO_VOLUME_CORRECTION + /** * @brief Calculate the volume interaction between particle i and particle j * @@ -62,6 +64,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( for (k = 0; k < 3; k++) for (l = 0; l < 3; l++) pi->geometry.matrix_E[k][l] += dx[k] * dx[l] * wi; + pi->geometry.centroid[0] -= dx[0] * wi; + pi->geometry.centroid[1] -= dx[1] * wi; + pi->geometry.centroid[2] -= dx[2] * wi; + /* Compute density of pj. */ h_inv = 1.0 / hj; xj = r * h_inv; @@ -74,6 +80,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( pj->geometry.volume += wj; for (k = 0; k < 3; k++) for (l = 0; l < 3; l++) pj->geometry.matrix_E[k][l] += dx[k] * dx[l] * wj; + + pj->geometry.centroid[0] += dx[0] * wj; + pj->geometry.centroid[1] += dx[1] * wj; + pj->geometry.centroid[2] += dx[2] * wj; } /** @@ -117,6 +127,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( pi->geometry.volume += wi; for (k = 0; k < 3; k++) for (l = 0; l < 3; l++) pi->geometry.matrix_E[k][l] += dx[k] * dx[l] * wi; + + pi->geometry.centroid[0] -= dx[0] * wi; + pi->geometry.centroid[1] -= dx[1] * wi; + pi->geometry.centroid[2] -= dx[2] * wi; } /** @@ -325,14 +339,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( /* calculate the maximal signal velocity */ if (Wi[0] > 0.0f && Wj[0] > 0.0f) { -#ifdef EOS_ISOTHERMAL_GAS - /* we use a value that is slightly higher than necessary, since the correct - value does not always work */ - vmax = 2.5 * const_isothermal_soundspeed; -#else vmax = sqrtf(hydro_gamma * Wi[4] / Wi[0]) + sqrtf(hydro_gamma * Wj[4] / Wj[0]); -#endif } else { vmax = 0.0f; } @@ -381,23 +389,63 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( /* Compute area */ /* eqn. (7) */ Anorm = 0.0f; - for (k = 0; k < 3; k++) { - /* we add a minus sign since dx is pi->x - pj->x */ - A[k] = -Vi * (Bi[k][0] * dx[0] + Bi[k][1] * dx[1] + Bi[k][2] * dx[2]) * wi * - hi_inv_dim - - Vj * (Bj[k][0] * dx[0] + Bj[k][1] * dx[1] + Bj[k][2] * dx[2]) * wj * - hj_inv_dim; - Anorm += A[k] * A[k]; + if (pi->density.wcorr > const_gizmo_min_wcorr && + pj->density.wcorr > const_gizmo_min_wcorr) { + /* in principle, we use Vi and Vj as weights for the left and right + contributions to the generalized surface vector. + However, if Vi and Vj are very different (because they have very + different + smoothing lengths), then the expressions below are more stable. */ + float Xi = Vi; + float Xj = Vj; +#ifdef GIZMO_VOLUME_CORRECTION + if (fabsf(Vi - Vj) / fminf(Vi, Vj) > 1.5 * hydro_dimension) { + Xi = (Vi * hj + Vj * hi) / (hi + hj); + Xj = Xi; + } +#endif + for (k = 0; k < 3; k++) { + /* we add a minus sign since dx is pi->x - pj->x */ + A[k] = -Xi * (Bi[k][0] * dx[0] + Bi[k][1] * dx[1] + Bi[k][2] * dx[2]) * + wj * hj_inv_dim - + Xj * (Bj[k][0] * dx[0] + Bj[k][1] * dx[1] + Bj[k][2] * dx[2]) * + wi * hi_inv_dim; + Anorm += A[k] * A[k]; + } + } else { + /* ill condition gradient matrix: revert to SPH face area */ + Anorm = -(hidp1 * Vi * Vi * wi_dx + hjdp1 * Vj * Vj * wj_dx) * ri; + A[0] = -Anorm * dx[0]; + A[1] = -Anorm * dx[1]; + A[2] = -Anorm * dx[2]; + Anorm *= Anorm * r2; } - if (!Anorm) { + if (Anorm == 0.) { /* if the interface has no area, nothing happens and we return */ /* continuing results in dividing by zero and NaN's... */ return; } - /* compute the normal vector of the interface */ Anorm = sqrtf(Anorm); + +#ifdef SWIFT_DEBUG_CHECKS + /* For stability reasons, we do require A and dx to have opposite + directions (basically meaning that the surface normal for the surface + always points from particle i to particle j, as it would in a real + moving-mesh code). If not, our scheme is no longer upwind and hence can + become unstable. */ + float dA_dot_dx = A[0] * dx[0] + A[1] * dx[1] + A[2] * dx[2]; + /* In GIZMO, Phil Hopkins reverts to an SPH integration scheme if this + happens. We curently just ignore this case and display a message. */ + const float rdim = pow_dimension(r); + if (dA_dot_dx > 1.e-6 * rdim) { + message("Ill conditioned gradient matrix (%g %g %g %g %g)!", dA_dot_dx, + Anorm, Vi, Vj, r); + } +#endif + + /* compute the normal vector of the interface */ for (k = 0; k < 3; k++) n_unit[k] = A[k] / Anorm; /* Compute interface position (relative to pi, since we don't need the actual @@ -436,43 +484,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_fluxes_common( /* we don't need to rotate, we can use the unit vector in the Riemann problem * itself (see GIZMO) */ - if (Wi[0] < 0.0f || Wj[0] < 0.0f || Wi[4] < 0.0f || Wj[4] < 0.0f) { - printf("mindt: %g\n", mindt); - printf("WL: %g %g %g %g %g\n", pi->primitives.rho, pi->primitives.v[0], - pi->primitives.v[1], pi->primitives.v[2], pi->primitives.P); -#ifdef USE_GRADIENTS - printf("dWL: %g %g %g %g %g\n", dWi[0], dWi[1], dWi[2], dWi[3], dWi[4]); -#endif - printf("gradWL[0]: %g %g %g\n", pi->primitives.gradients.rho[0], - pi->primitives.gradients.rho[1], pi->primitives.gradients.rho[2]); - printf("gradWL[1]: %g %g %g\n", pi->primitives.gradients.v[0][0], - pi->primitives.gradients.v[0][1], pi->primitives.gradients.v[0][2]); - printf("gradWL[2]: %g %g %g\n", pi->primitives.gradients.v[1][0], - pi->primitives.gradients.v[1][1], pi->primitives.gradients.v[1][2]); - printf("gradWL[3]: %g %g %g\n", pi->primitives.gradients.v[2][0], - pi->primitives.gradients.v[2][1], pi->primitives.gradients.v[2][2]); - printf("gradWL[4]: %g %g %g\n", pi->primitives.gradients.P[0], - pi->primitives.gradients.P[1], pi->primitives.gradients.P[2]); - printf("WL': %g %g %g %g %g\n", Wi[0], Wi[1], Wi[2], Wi[3], Wi[4]); - printf("WR: %g %g %g %g %g\n", pj->primitives.rho, pj->primitives.v[0], - pj->primitives.v[1], pj->primitives.v[2], pj->primitives.P); -#ifdef USE_GRADIENTS - printf("dWR: %g %g %g %g %g\n", dWj[0], dWj[1], dWj[2], dWj[3], dWj[4]); -#endif - printf("gradWR[0]: %g %g %g\n", pj->primitives.gradients.rho[0], - pj->primitives.gradients.rho[1], pj->primitives.gradients.rho[2]); - printf("gradWR[1]: %g %g %g\n", pj->primitives.gradients.v[0][0], - pj->primitives.gradients.v[0][1], pj->primitives.gradients.v[0][2]); - printf("gradWR[2]: %g %g %g\n", pj->primitives.gradients.v[1][0], - pj->primitives.gradients.v[1][1], pj->primitives.gradients.v[1][2]); - printf("gradWR[3]: %g %g %g\n", pj->primitives.gradients.v[2][0], - pj->primitives.gradients.v[2][1], pj->primitives.gradients.v[2][2]); - printf("gradWR[4]: %g %g %g\n", pj->primitives.gradients.P[0], - pj->primitives.gradients.P[1], pj->primitives.gradients.P[2]); - printf("WR': %g %g %g %g %g\n", Wj[0], Wj[1], Wj[2], Wj[3], Wj[4]); - error("Negative density or pressure!\n"); - } - float totflux[5]; riemann_solve_for_flux(Wi, Wj, n_unit, vij, totflux); diff --git a/src/hydro/Gizmo/hydro_io.h b/src/hydro/Gizmo/hydro_io.h index 236106a1fb04cc2e5b84f997a2389d583ce17cff..3d58be2f47c4e1904aaac5f69d1862f1d453e488 100644 --- a/src/hydro/Gizmo/hydro_io.h +++ b/src/hydro/Gizmo/hydro_io.h @@ -127,7 +127,7 @@ float convert_Etot(struct engine* e, struct part* p) { void hydro_write_particles(struct part* parts, struct io_props* list, int* num_fields) { - *num_fields = 14; + *num_fields = 11; /* List what we want to write */ list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, @@ -143,22 +143,16 @@ void hydro_write_particles(struct part* parts, struct io_props* list, parts, primitives.P, convert_u); list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, parts, id); - list[6] = io_make_output_field("Acceleration", FLOAT, 3, - UNIT_CONV_ACCELERATION, parts, a_hydro); - list[7] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, + list[6] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, primitives.rho); - list[8] = io_make_output_field("Volume", FLOAT, 1, UNIT_CONV_VOLUME, parts, - geometry.volume); - list[9] = io_make_output_field("GradDensity", FLOAT, 3, UNIT_CONV_DENSITY, - parts, primitives.gradients.rho); - list[10] = io_make_output_field_convert_part( + list[7] = io_make_output_field_convert_part( "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, primitives.P, convert_A); - list[11] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, - parts, primitives.P); - list[12] = + list[8] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, + parts, primitives.P); + list[9] = io_make_output_field_convert_part("TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY, parts, conserved.energy, convert_Etot); - list[13] = io_make_output_field("GravAcceleration", FLOAT, 3, + list[10] = io_make_output_field("GravAcceleration", FLOAT, 3, UNIT_CONV_ACCELERATION, parts, gravity.old_a); } diff --git a/src/hydro/Gizmo/hydro_part.h b/src/hydro/Gizmo/hydro_part.h index d552a3f7e86031311098293845f1aa11270c417f..6c96004847ae23b46ec3f5182f742e0e84f1118d 100644 --- a/src/hydro/Gizmo/hydro_part.h +++ b/src/hydro/Gizmo/hydro_part.h @@ -148,6 +148,9 @@ struct part { /* Total surface area of the particle. */ float Atot; + /* Centroid of the "cell". */ + float centroid[3]; + } geometry; /* Variables used for timestep calculation (currently not used). */ @@ -201,6 +204,8 @@ struct part { /* Previous value of the gravitational acceleration. */ float old_a[3]; + float grad_a[3][3]; + /* Previous value of the mass flux vector. */ float old_mflux[3]; diff --git a/src/hydro/Gizmo/hydro_slope_limiters_face.h b/src/hydro/Gizmo/hydro_slope_limiters_face.h index 7ae5dd2eb073d9aae8ab6f2efffdf8df15b4bb4a..ba96063d661a93a4efc4069ff7e7269a4ac58c3b 100644 --- a/src/hydro/Gizmo/hydro_slope_limiters_face.h +++ b/src/hydro/Gizmo/hydro_slope_limiters_face.h @@ -53,14 +53,22 @@ hydro_slope_limit_face_quantity(float phi_i, float phi_j, float phi_mid0, if ((phimax + delta1) * phimax > 0.0f) { phiplus = phimax + delta1; } else { - phiplus = phimax / (1.0f + delta1 / fabs(phimax)); + if (phimax != 0.) { + phiplus = phimax / (1.0f + delta1 / fabs(phimax)); + } else { + phiplus = 0.; + } } /* if sign(phimin-delta1) == sign(phimin) */ if ((phimin - delta1) * phimin > 0.0f) { phiminus = phimin - delta1; } else { - phiminus = phimin / (1.0f + delta1 / fabs(phimin)); + if (phimin != 0.) { + phiminus = phimin / (1.0f + delta1 / fabs(phimin)); + } else { + phiminus = 0.; + } } if (phi_i < phi_j) { diff --git a/src/hydro/Gizmo/hydro_unphysical.h b/src/hydro/Gizmo/hydro_unphysical.h new file mode 100644 index 0000000000000000000000000000000000000000..517e3e0918ad340580e270477c0a166590546850 --- /dev/null +++ b/src/hydro/Gizmo/hydro_unphysical.h @@ -0,0 +1,55 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Bert Vandenbroucke (bert.vandenbroucke@gmail.com) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_HYDRO_UNPHYSICAL_H +#define SWIFT_HYDRO_UNPHYSICAL_H + +#if defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE) + +#if defined(GIZMO_UNPHYSICAL_ERROR) + +/*! @brief Crash whenever an unphysical value is detected. */ +#define gizmo_unphysical_message(name, quantity) \ + error("Unphysical " name " detected (%g)!", quantity); + +#elif defined(GIZMO_UNPHYSICAL_WARNING) + +/*! @brief Show a warning whenever an unphysical value is detected. */ +#define gizmo_unphysical_message(name, quantity) \ + message("Unphysical " name " detected (%g), reset to 0!", quantity); + +#else + +/*! @brief Don't tell anyone an unphysical value was detected. */ +#define gizmo_unphysical_message(name, quantity) + +#endif + +#define gizmo_check_physical_quantity(name, quantity) \ + if (quantity < 0.) { \ + gizmo_unphysical_message(name, quantity); \ + quantity = 0.; \ + } + +#else // defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE) + +#define gizmo_check_physical_quantity(name, quantity) + +#endif // defined(GIZMO_UNPHYSICAL_ERROR) || defined(GIZMO_UNPHYSICAL_RESCUE) + +#endif // SWIFT_HYDRO_UNPHYSICAL_H diff --git a/src/hydro/Gizmo/hydro_velocities.h b/src/hydro/Gizmo/hydro_velocities.h new file mode 100644 index 0000000000000000000000000000000000000000..08ba1f972b2f7a7b8a01ac4750c50a36f69784d0 --- /dev/null +++ b/src/hydro/Gizmo/hydro_velocities.h @@ -0,0 +1,162 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Coypright (c) 2017 Bert Vandenbroucke (bert.vandenbroucke@gmail.com) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_HYDRO_VELOCITIES_H +#define SWIFT_HYDRO_VELOCITIES_H + +/** + * @brief Initialize the GIZMO particle velocities before the start of the + * actual run based on the initial value of the primitive velocity. + * + * @param p The particle to act upon. + * @param xp The extended particle data to act upon. + */ +__attribute__((always_inline)) INLINE static void hydro_velocities_init( + struct part* restrict p, struct xpart* restrict xp) { + +#ifdef GIZMO_FIX_PARTICLES + p->v[0] = 0.; + p->v[1] = 0.; + p->v[2] = 0.; +#else + p->v[0] = p->primitives.v[0]; + p->v[1] = p->primitives.v[1]; + p->v[2] = p->primitives.v[2]; +#endif + + xp->v_full[0] = p->v[0]; + xp->v_full[1] = p->v[1]; + xp->v_full[2] = p->v[2]; +} + +/** + * @brief Set the particle velocity field that will be used to deboost fluid + * velocities during the force loop. + * + * @param p The particle to act upon. + * @param xp The extended particel data to act upon. + */ +__attribute__((always_inline)) INLINE static void +hydro_velocities_prepare_force(struct part* restrict p, + const struct xpart* restrict xp) { + +#ifndef GIZMO_FIX_PARTICLES + p->force.v_full[0] = xp->v_full[0]; + p->force.v_full[1] = xp->v_full[1]; + p->force.v_full[2] = xp->v_full[2]; +#endif +} + +/** + * @brief Set the variables that will be used to update the smoothing length + * during the drift (these will depend on the movement of the particles). + * + * @param p The particle to act upon. + */ +__attribute__((always_inline)) INLINE static void hydro_velocities_end_force( + struct part* restrict p) { + +#ifdef GIZMO_FIX_PARTICLES + /* disable the smoothing length update, since the smoothing lengths should + stay the same for all steps (particles don't move) */ + p->force.h_dt = 0.0f; +#else + /* Add normalization to h_dt. */ + p->force.h_dt *= p->h * hydro_dimension_inv; +#endif +} + +/** + * @brief Set the velocity of a GIZMO particle, based on the values of its + * primitive variables and the geometry of its mesh-free "cell". + * + * @param p The particle to act upon. + * @param xp The extended particle data to act upon. + */ +__attribute__((always_inline)) INLINE static void hydro_velocities_set( + struct part* restrict p, struct xpart* restrict xp) { + +/* We first set the particle velocity. */ +#ifdef GIZMO_FIX_PARTICLES + + p->v[0] = 0.; + p->v[1] = 0.; + p->v[2] = 0.; + +#else // GIZMO_FIX_PARTICLES + + if (p->conserved.mass > 0. && p->primitives.rho > 0.) { + /* Normal case: set particle velocity to fluid velocity. */ + p->v[0] = p->conserved.momentum[0] / p->conserved.mass; + p->v[1] = p->conserved.momentum[1] / p->conserved.mass; + p->v[2] = p->conserved.momentum[2] / p->conserved.mass; + +#ifdef GIZMO_STEER_MOTION + + /* Add a correction to the velocity to keep particle positions close enough + to + the centroid of their mesh-free "cell". */ + /* The correction term below is the same one described in Springel (2010). + */ + float ds[3]; + ds[0] = p->geometry.centroid[0]; + ds[1] = p->geometry.centroid[1]; + ds[2] = p->geometry.centroid[2]; + const float d = sqrtf(ds[0] * ds[0] + ds[1] * ds[1] + ds[2] * ds[2]); + const float R = get_radius_dimension_sphere(p->geometry.volume); + const float eta = 0.25; + const float etaR = eta * R; + const float xi = 1.; + const float soundspeed = + sqrtf(hydro_gamma * p->primitives.P / p->primitives.rho); + /* We only apply the correction if the offset between centroid and position + is + too large. */ + if (d > 0.9 * etaR) { + float fac = xi * soundspeed / d; + if (d < 1.1 * etaR) { + fac *= 5. * (d - 0.9 * etaR) / etaR; + } + p->v[0] -= ds[0] * fac; + p->v[1] -= ds[1] * fac; + p->v[2] -= ds[2] * fac; + } + +#endif // GIZMO_STEER_MOTION + } else { + /* Vacuum particles have no fluid velocity. */ + p->v[0] = 0.; + p->v[1] = 0.; + p->v[2] = 0.; + } + +#endif // GIZMO_FIX_PARTICLES + + /* Now make sure all velocity variables are up to date. */ + xp->v_full[0] = p->v[0]; + xp->v_full[1] = p->v[1]; + xp->v_full[2] = p->v[2]; + + if (p->gpart) { + p->gpart->v_full[0] = p->v[0]; + p->gpart->v_full[1] = p->v[1]; + p->gpart->v_full[2] = p->v[2]; + } +} + +#endif // SWIFT_HYDRO_VELOCITIES_H diff --git a/src/minmax.h b/src/minmax.h index a53093663c79cf4280d136747663552e49c7f1b2..9d92cd71d849dba615fdb05bc342014e0593d989 100644 --- a/src/minmax.h +++ b/src/minmax.h @@ -43,18 +43,4 @@ _a > _b ? _a : _b; \ }) -/** - * @brief Limits the value of x to be between a and b - * - * Only wraps once. If x > 2b, the returned value will be larger than b. - * Similarly for x < -b. - */ -#define box_wrap(x, a, b) \ - ({ \ - const __typeof__(x) _x = (x); \ - const __typeof__(a) _a = (a); \ - const __typeof__(b) _b = (b); \ - _x < _a ? (_x + _b) : ((_x > _b) ? (_x - _b) : _x); \ - }) - #endif /* SWIFT_MINMAX_H */ diff --git a/src/multipole.h b/src/multipole.h index b9d49dcf0fc3b605849f7b058aef14843b73517d..23f5194a30b7316aac15073cba36dc404efa21c1 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -28,7 +28,6 @@ #include <string.h> /* Includes. */ -//#include "active.h" #include "align.h" #include "const.h" #include "error.h" @@ -37,8 +36,8 @@ #include "gravity_softened_derivatives.h" #include "inline.h" #include "kernel_gravity.h" -#include "minmax.h" #include "part.h" +#include "periodic.h" #include "vector_power.h" #define multipole_align 128 diff --git a/src/partition.c b/src/partition.c index 499efab263a9031b0116f073af8cebd5fef0c2eb..c57918745c11d2858b40eefc218e2551e635d6fb 100644 --- a/src/partition.c +++ b/src/partition.c @@ -524,7 +524,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, t->type != task_type_sub_self && t->type != task_type_sub_self && t->type != task_type_ghost && t->type != task_type_kick1 && t->type != task_type_kick2 && t->type != task_type_timestep && - t->type != task_type_drift) + t->type != task_type_drift_part && t->type != task_type_drift_gpart) continue; /* Get the task weight. */ @@ -557,7 +557,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, /* Different weights for different tasks. */ if (t->type == task_type_ghost || t->type == task_type_kick1 || t->type == task_type_kick2 || t->type == task_type_timestep || - t->type == task_type_drift) { + t->type == task_type_drift_part || t->type == task_type_drift_gpart) { /* Particle updates add only to vertex weight. */ if (taskvweights) weights_v[cid] += w; diff --git a/src/periodic.h b/src/periodic.h new file mode 100644 index 0000000000000000000000000000000000000000..5874b8742e89c5c93727111adb5b289cff4cb6a6 --- /dev/null +++ b/src/periodic.h @@ -0,0 +1,75 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PERIODIC_H +#define SWIFT_PERIODIC_H + +/* Config parameters. */ +#include "../config.h" + +/* Includes. */ +#include "inline.h" + +/** + * @brief Limits the value of x to be between a and b + * + * Only wraps once. If x > 2b, the returned value will be larger than b. + * Similarly for x < -b. + */ +#define box_wrap(x, a, b) \ + ({ \ + const __typeof__(x) _x = (x); \ + const __typeof__(a) _a = (a); \ + const __typeof__(b) _b = (b); \ + _x < _a ? (_x + _b) : ((_x > _b) ? (_x - _b) : _x); \ + }) + +/** + * @brief Find the smallest distance dx along one axis within a box of size + * box_size + * + * This macro evaluates its arguments exactly once. + * + * Only wraps once. If dx > 2b, the returned value will be larger than b. + * Similarly for dx < -b. + * + */ +__attribute__((always_inline)) INLINE static double nearest(double dx, + double box_size) { + return dx > 0.5 * box_size ? (dx - box_size) + : ((dx < -0.5 * box_size) ? (dx + box_size) : dx); +} + +/** + * @brief Find the smallest distance dx along one axis within a box of size + * box_size + * + * This macro evaluates its arguments exactly once. + * + * Only wraps once. If dx > 2b, the returned value will be larger than b. + * Similarly for dx < -b. + * + */ +__attribute__((always_inline)) INLINE static float nearestf(float dx, + float box_size) { + return dx > 0.5f * box_size + ? (dx - box_size) + : ((dx < -0.5f * box_size) ? (dx + box_size) : dx); +} + +#endif /* SWIFT_PERIODIC_H */ diff --git a/src/potential/sine_wave/potential.h b/src/potential/sine_wave/potential.h index e2e2b8ffcc170c28a5facc8373a81746811a9991..1a4ee8aae8238c5db4c99eacb9e96bd967bcc7c4 100644 --- a/src/potential/sine_wave/potential.h +++ b/src/potential/sine_wave/potential.h @@ -43,6 +43,9 @@ struct external_potential { /*! Amplitude of the sine wave. */ double amplitude; + /*! Growth time of the potential. */ + double growth_time; + /*! Time-step limiting factor. */ double timestep_limit; }; @@ -76,7 +79,13 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( double time, const struct external_potential* restrict potential, const struct phys_const* restrict phys_const, struct gpart* restrict g) { - g->a_grav[0] = potential->amplitude * sin(2. * M_PI * g->x[0]) / + float Acorr = 1.; + + if (time < potential->growth_time) { + Acorr = time / potential->growth_time; + } + + g->a_grav[0] = potential->amplitude * Acorr * sin(2. * M_PI * g->x[0]) / phys_const->const_newton_G; } @@ -114,6 +123,8 @@ static INLINE void potential_init_backend( potential->amplitude = parser_get_param_double(parameter_file, "SineWavePotential:amplitude"); + potential->growth_time = parser_get_opt_param_double( + parameter_file, "SineWavePotential:growth_time", 0.); potential->timestep_limit = parser_get_param_double( parameter_file, "SineWavePotential:timestep_limit"); } diff --git a/src/riemann.h b/src/riemann.h index 685d40708e598249151f6cbe13be016edea79553..ab6d162514326778e8d6478e07c9bae2947a7c2a 100644 --- a/src/riemann.h +++ b/src/riemann.h @@ -25,10 +25,8 @@ #if defined(RIEMANN_SOLVER_EXACT) #define RIEMANN_SOLVER_IMPLEMENTATION "Exact Riemann solver (Toro 2009)" -#if defined(EOS_IDEAL_GAS) +#if defined(EOS_IDEAL_GAS) || defined(EOS_ISOTHERMAL_GAS) #include "riemann/riemann_exact.h" -#elif defined(EOS_ISOTHERMAL_GAS) -#include "riemann/riemann_exact_isothermal.h" #else #error "The Exact Riemann solver is incompatible with this equation of state!" #endif diff --git a/src/runner.c b/src/runner.c index 8131062cb297157ee21f015edd4fb566a16ff91a..208b59114263ff9300417962fa43c32e0ef0512c 100644 --- a/src/runner.c +++ b/src/runner.c @@ -53,6 +53,7 @@ #include "hydro_properties.h" #include "kick.h" #include "minmax.h" +#include "runner_doiact_fft.h" #include "runner_doiact_vec.h" #include "scheduler.h" #include "sort_part.h" @@ -333,7 +334,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, TIMER_TIC; /* Check that the particles have been moved to the current time */ - if (!cell_is_drifted(c, r->e)) error("Sorting un-drifted cell"); + if (!cell_are_part_drifted(c, r->e)) error("Sorting un-drifted cell"); #ifdef SWIFT_DEBUG_CHECKS /* Make sure the sort flags are consistent (downward). */ @@ -842,19 +843,35 @@ void runner_do_unskip_mapper(void *map_data, int num_elements, } } /** - * @brief Drift particles in real space. + * @brief Drift all part in a cell. * * @param r The runner thread. * @param c The cell. * @param timer Are we timing this ? */ -void runner_do_drift_particles(struct runner *r, struct cell *c, int timer) { +void runner_do_drift_part(struct runner *r, struct cell *c, int timer) { TIMER_TIC; - cell_drift_particles(c, r->e); + cell_drift_part(c, r->e); - if (timer) TIMER_TOC(timer_drift); + if (timer) TIMER_TOC(timer_drift_part); +} + +/** + * @brief Drift all gpart in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_gpart(c, r->e); + + if (timer) TIMER_TOC(timer_drift_gpart); } /** @@ -1525,7 +1542,7 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, /* ... and store. */ c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - c->ti_old = ti_current; + c->ti_old_part = ti_current; c->h_max = h_max; if (timer) TIMER_TOC(timer_dorecv_part); @@ -1599,7 +1616,7 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { /* ... and store. */ c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - c->ti_old = ti_current; + c->ti_old_gpart = ti_current; if (timer) TIMER_TOC(timer_dorecv_gpart); @@ -1672,7 +1689,7 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { /* ... and store. */ c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - c->ti_old = ti_current; + c->ti_old_gpart = ti_current; if (timer) TIMER_TOC(timer_dorecv_spart); @@ -1730,15 +1747,18 @@ void *runner_main(void *data) { #ifdef SWIFT_DEBUG_CHECKS t->ti_run = e->ti_current; #ifndef WITH_MPI - if (ci == NULL && cj == NULL) { + if (t->type == task_type_grav_top_level) { + if (ci != NULL || cj != NULL) + error("Top-level gravity task associated with a cell"); + } else if (ci == NULL && cj == NULL) { error("Task not associated with cells!"); - } else if (cj == NULL) { /* self */ if (!cell_is_active(ci, e) && t->type != task_type_sort && t->type != task_type_send && t->type != task_type_recv && - t->type != task_type_kick1 && t->type != task_type_drift) + t->type != task_type_kick1 && t->type != task_type_drift_part && + t->type != task_type_drift_gpart) error( "Task (type='%s/%s') should have been skipped ti_current=%lld " "c->ti_end_min=%lld", @@ -1868,8 +1888,11 @@ void *runner_main(void *data) { runner_do_extra_ghost(r, ci, 1); break; #endif - case task_type_drift: - runner_do_drift_particles(r, ci, 1); + case task_type_drift_part: + runner_do_drift_part(r, ci, 1); + break; + case task_type_drift_gpart: + runner_do_drift_gpart(r, ci, 1); break; case task_type_kick1: runner_do_kick1(r, ci, 1); @@ -1896,6 +1919,8 @@ void *runner_main(void *data) { runner_do_recv_part(r, ci, 1, 1); } else if (t->subtype == task_subtype_rho) { runner_do_recv_part(r, ci, 1, 1); + } else if (t->subtype == task_subtype_gradient) { + runner_do_recv_part(r, ci, 1, 1); } else if (t->subtype == task_subtype_gpart) { runner_do_recv_gpart(r, ci, 1); } else if (t->subtype == task_subtype_spart) { @@ -1907,14 +1932,11 @@ void *runner_main(void *data) { } break; #endif - case task_type_grav_mm: - // runner_do_grav_mm(r, t->ci, 1); - break; case task_type_grav_down: runner_do_grav_down(r, t->ci, 1); break; case task_type_grav_top_level: - // runner_do_grav_top_level(r); + runner_do_grav_fft(r, 1); break; case task_type_grav_long_range: runner_do_grav_long_range(r, t->ci, 1); diff --git a/src/runner.h b/src/runner.h index 32b8a16fe0ec67772089fc7283833d9814f5525f..facadf1608fb7e06af952eedbf1151fa68530bef 100644 --- a/src/runner.h +++ b/src/runner.h @@ -63,13 +63,15 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer); void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer); void runner_do_sort(struct runner *r, struct cell *c, int flag, int cleanup, int clock); -void runner_do_drift_particles(struct runner *r, struct cell *c, int timer); +void runner_do_drift_part(struct runner *r, struct cell *c, int timer); +void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer); void runner_do_kick1(struct runner *r, struct cell *c, int timer); void runner_do_kick2(struct runner *r, struct cell *c, int timer); void runner_do_end_force(struct runner *r, struct cell *c, int timer); void runner_do_init(struct runner *r, struct cell *c, int timer); void runner_do_cooling(struct runner *r, struct cell *c, int timer); void runner_do_grav_external(struct runner *r, struct cell *c, int timer); +void runner_do_grav_fft(struct runner *r, int timer); void *runner_main(void *data); void runner_do_unskip_mapper(void *map_data, int num_elements, void *extra_data); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 8e46b0aa33c9e3537ab7a94c3042f54b3e6ea24d..b96c25cf8d71875ad588e46db4c5fb7b53fa7753 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -899,7 +899,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); /* Get the sort ID. */ @@ -1145,7 +1145,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); /* Get the shift ID. */ @@ -1597,7 +1597,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) error("Interacting undrifted cell."); + if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); struct part *restrict parts = c->parts; const int count = c->count; @@ -1846,7 +1846,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) error("Cell is not drifted"); + if (!cell_are_part_drifted(c, e)) error("Cell is not drifted"); struct part *restrict parts = c->parts; const int count = c->count; @@ -2276,7 +2276,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { /* Make sure both cells are drifted to the current timestep. */ - if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); /* Do any of the cells need to be sorted first? */ @@ -2330,7 +2330,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { else { /* Drift the cell to the current timestep if needed. */ - if (!cell_is_drifted(ci, r->e)) error("Interacting undrifted cell."); + if (!cell_are_part_drifted(ci, r->e)) error("Interacting undrifted cell."); #if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \ defined(GADGET2_SPH) @@ -2581,7 +2581,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { /* Make sure both cells are drifted to the current timestep. */ - if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); /* Do any of the cells need to be sorted first? */ @@ -3198,7 +3198,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { /* Do any of the cells need to be drifted first? */ - if (!cell_is_drifted(cj, e)) error("Cell should be drifted!"); + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); DOPAIR_SUBSET(r, ci, parts, ind, count, cj); } diff --git a/src/runner_doiact_fft.c b/src/runner_doiact_fft.c index 076ec2578361127266c637cc5ac224609b702c66..a3e3f38fba920c0c58d600bb25feda88d4a3cf84 100644 --- a/src/runner_doiact_fft.c +++ b/src/runner_doiact_fft.c @@ -31,6 +31,287 @@ #include "runner_doiact_fft.h" /* Local includes. */ +#include "engine.h" +#include "error.h" #include "runner.h" +#include "space.h" +#include "timers.h" -void runner_do_grav_fft(struct runner *r) {} +#ifdef HAVE_FFTW + +/** + * @brief Returns 1D index of a 3D NxNxN array using row-major style. + * + * @param i Index along x. + * @param j Index along y. + * @param k Index along z. + * @param N Size of the array along one axis. + */ +__attribute__((always_inline)) INLINE static int row_major_id(int i, int j, + int k, int N) { + return ((i % N) * N * N + (j % N) * N + (k % N)); +} + +/** + * @brief Assigns a given multipole to a density mesh using the CIC method. + * + * @param m The #multipole. + * @param rho The density mesh. + * @param N the size of the mesh along one axis. + * @param fac The width of a mesh cell. + */ +__attribute__((always_inline)) INLINE static void multipole_to_mesh_CIC( + const struct gravity_tensors* m, double* rho, int N, double fac) { + + int i = (int)(fac * m->CoM[0]); + if (i >= N) i = N - 1; + const double dx = fac * m->CoM[0] - i; + const double tx = 1. - dx; + + int j = (int)(fac * m->CoM[1]); + if (j >= N) j = N - 1; + const double dy = fac * m->CoM[1] - j; + const double ty = 1. - dy; + + int k = (int)(fac * m->CoM[2]); + if (k >= N) k = N - 1; + const double dz = fac * m->CoM[2] - k; + const double tz = 1. - dz; + +#ifdef SWIFT_DEBUG_CHECKS + if (i < 0 || i >= N) error("Invalid multipole position in x"); + if (j < 0 || j >= N) error("Invalid multipole position in y"); + if (k < 0 || k >= N) error("Invalid multipole position in z"); +#endif + + /* CIC ! */ + rho[row_major_id(i + 0, j + 0, k + 0, N)] += m->m_pole.M_000 * tx * ty * tz; + rho[row_major_id(i + 0, j + 0, k + 1, N)] += m->m_pole.M_000 * tx * ty * dz; + rho[row_major_id(i + 0, j + 1, k + 0, N)] += m->m_pole.M_000 * tx * dy * tz; + rho[row_major_id(i + 0, j + 1, k + 1, N)] += m->m_pole.M_000 * tx * dy * dz; + rho[row_major_id(i + 1, j + 0, k + 0, N)] += m->m_pole.M_000 * dx * ty * tz; + rho[row_major_id(i + 1, j + 0, k + 1, N)] += m->m_pole.M_000 * dx * ty * dz; + rho[row_major_id(i + 1, j + 1, k + 0, N)] += m->m_pole.M_000 * dx * dy * tz; + rho[row_major_id(i + 1, j + 1, k + 1, N)] += m->m_pole.M_000 * dx * dy * dz; +} + +/** + * @brief Computes the potential on a multipole from a given mesh using the CIC + * method. + * + * @param m The #multipole. + * @param pot The potential mesh. + * @param N the size of the mesh along one axis. + * @param fac width of a mesh cell. + */ +__attribute__((always_inline)) INLINE static void mesh_to_multipole_CIC( + struct gravity_tensors* m, double* pot, int N, double fac) { + + int i = (int)(fac * m->CoM[0]); + if (i >= N) i = N - 1; + const double dx = fac * m->CoM[0] - i; + const double tx = 1. - dx; + + int j = (int)(fac * m->CoM[1]); + if (j >= N) j = N - 1; + const double dy = fac * m->CoM[1] - j; + const double ty = 1. - dy; + + int k = (int)(fac * m->CoM[2]); + if (k >= N) k = N - 1; + const double dz = fac * m->CoM[2] - k; + const double tz = 1. - dz; + +#ifdef SWIFT_DEBUG_CHECKS + if (i < 0 || i >= N) error("Invalid multipole position in x"); + if (j < 0 || j >= N) error("Invalid multipole position in y"); + if (k < 0 || k >= N) error("Invalid multipole position in z"); +#endif + + /* CIC ! */ + m->pot.F_000 += pot[row_major_id(i + 0, j + 0, k + 0, N)] * tx * ty * tz; + m->pot.F_000 += pot[row_major_id(i + 0, j + 0, k + 1, N)] * tx * ty * dz; + m->pot.F_000 += pot[row_major_id(i + 0, j + 1, k + 0, N)] * tx * dy * tz; + m->pot.F_000 += pot[row_major_id(i + 0, j + 1, k + 1, N)] * tx * dy * dz; + m->pot.F_000 += pot[row_major_id(i + 1, j + 0, k + 0, N)] * dx * ty * tz; + m->pot.F_000 += pot[row_major_id(i + 1, j + 0, k + 1, N)] * dx * ty * dz; + m->pot.F_000 += pot[row_major_id(i + 1, j + 1, k + 0, N)] * dx * dy * tz; + m->pot.F_000 += pot[row_major_id(i + 1, j + 1, k + 1, N)] * dx * dy * dz; +} + +#endif + +/** + * @brief Computes the potential on the top multipoles using a Fourier transform + * + * @param r The #runner task + * @param timer Are we timing this ? + */ +void runner_do_grav_fft(struct runner* r, int timer) { + +#ifdef HAVE_FFTW + + const struct engine* e = r->e; + const struct space* s = e->s; + const integertime_t ti_current = e->ti_current; + const double a_smooth = e->gravity_properties->a_smooth; + const double box_size = s->dim[0]; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + + TIMER_TIC; + + if (cdim[0] != cdim[1] || cdim[0] != cdim[2]) error("Non-square mesh"); + + /* Some useful constants */ + const int N = cdim[0]; + const int N_half = N / 2; + const double cell_fac = N / box_size; + + /* Recover the list of top-level multipoles */ + const int nr_cells = s->nr_cells; + struct gravity_tensors* restrict multipoles = s->multipoles_top; + struct cell* cells = s->cells_top; + + /* Make sure everything has been drifted to the current point */ + for (int i = 0; i < nr_cells; ++i) + if (cells[i].ti_old_multipole != ti_current) + cell_drift_multipole(&cells[i], e); + // error("Top-level multipole %d not drifted", i); + + /* Allocates some memory for the density mesh */ + double* restrict rho = fftw_alloc_real(N * N * N); + if (rho == NULL) error("Error allocating memory for density mesh"); + + /* Allocates some memory for the mesh in Fourier space */ + fftw_complex* restrict frho = fftw_alloc_complex(N * N * (N_half + 1)); + if (frho == NULL) + error("Error allocating memory for transform of density mesh"); + + /* Prepare the FFT library */ + fftw_plan forward_plan = fftw_plan_dft_r2c_3d( + N, N, N, rho, frho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); + fftw_plan inverse_plan = fftw_plan_dft_c2r_3d( + N, N, N, frho, rho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); + + /* Do a CIC mesh assignment of the multipoles */ + bzero(rho, N * N * N * sizeof(double)); + for (int i = 0; i < nr_cells; ++i) + multipole_to_mesh_CIC(&multipoles[i], rho, N, cell_fac); + + /* Fourier transform to go to magic-land */ + fftw_execute(forward_plan); + + /* frho now contains the Fourier transform of the density field */ + /* frho contains NxNx(N/2+1) complex numbers */ + + /* Some common factors */ + const double green_fac = -1. / (M_PI * box_size); + const double a_smooth2 = 4. * M_PI * a_smooth * a_smooth / ((double)(N * N)); + const double k_fac = M_PI / (double)N; + + /* Now de-convolve the CIC kernel and apply the Green function */ + for (int i = 0; i < N; ++i) { + + /* kx component of vector in Fourier space and 1/sinc(kx) */ + const int kx = (i > N_half ? i - N : i); + const double kx_d = (double)kx; + const double fx = k_fac * kx_d; + const double sinc_kx_inv = (kx != 0) ? fx / sin(fx) : 1.; + + for (int j = 0; j < N; ++j) { + + /* ky component of vector in Fourier space and 1/sinc(ky) */ + const int ky = (j > N_half ? j - N : j); + const double ky_d = (double)ky; + const double fy = k_fac * ky_d; + const double sinc_ky_inv = (ky != 0) ? fy / sin(fy) : 1.; + + for (int k = 0; k < N_half + 1; ++k) { + + /* kz component of vector in Fourier space and 1/sinc(kz) */ + const int kz = (k > N_half ? k - N : k); + const double kz_d = (double)kz; + const double fz = k_fac * kz_d; + const double sinc_kz_inv = (kz != 0) ? fz / sin(fz) : 1.; + + /* Norm of vector in Fourier space */ + const double k2 = (kx_d * kx_d + ky_d * ky_d + kz_d * kz_d); + + /* Avoid FPEs... */ + if (k2 == 0.) continue; + + /* Green function */ + const double green_cor = green_fac * exp(-k2 * a_smooth2) / k2; + + /* Deconvolution of CIC */ + const double CIC_cor = sinc_kx_inv * sinc_ky_inv * sinc_kz_inv; + const double CIC_cor2 = CIC_cor * CIC_cor; + const double CIC_cor4 = CIC_cor2 * CIC_cor2; + + /* Combined correction */ + const double total_cor = green_cor * CIC_cor4; + + /* Apply to the mesh */ + const int index = N * (N_half + 1) * i + (N_half + 1) * j + k; + frho[index][0] *= total_cor; + frho[index][1] *= total_cor; + } + } + } + + /* Correct singularity at (0,0,0) */ + frho[0][0] = 0.; + frho[0][1] = 0.; + + /* Fourier transform to come back from magic-land */ + fftw_execute(inverse_plan); + + /* rho now contains the potential */ + /* This array is now again NxNxN real numbers */ + + /* Get the potential from the mesh using CIC */ + for (int i = 0; i < nr_cells; ++i) + mesh_to_multipole_CIC(&multipoles[i], rho, N, cell_fac); + + /* Clean-up the mess */ + fftw_destroy_plan(forward_plan); + fftw_destroy_plan(inverse_plan); + fftw_free(rho); + fftw_free(frho); + + /* Time the whole thing */ + if (timer) TIMER_TOC(timer_dograv_top_level); + +#else + error("No FFTW library found. Cannot compute periodic long-range forces."); +#endif +} + +#ifdef HAVE_FFTW +void print_array(double* array, int N) { + + for (int k = N - 1; k >= 0; --k) { + printf("--- z = %d ---------\n", k); + for (int j = N - 1; j >= 0; --j) { + for (int i = 0; i < N; ++i) { + printf("%f ", array[i * N * N + j * N + k]); + } + printf("\n"); + } + } +} + +void print_carray(fftw_complex* array, int N) { + + for (int k = N - 1; k >= 0; --k) { + printf("--- z = %d ---------\n", k); + for (int j = N - 1; j >= 0; --j) { + for (int i = 0; i < N; ++i) { + printf("(%f %f) ", array[i * N * N + j * N + k][0], + array[i * N * N + j * N + k][1]); + } + printf("\n"); + } + } +} +#endif /* HAVE_FFTW */ diff --git a/src/runner_doiact_fft.h b/src/runner_doiact_fft.h index 263662383fb465dcf945e55494a569289b009ff9..e9836311e71803952969b9c9316e8c81676d2dd8 100644 --- a/src/runner_doiact_fft.h +++ b/src/runner_doiact_fft.h @@ -21,6 +21,6 @@ struct runner; -void runner_do_grav_fft(struct runner *r); +void runner_do_grav_fft(struct runner *r, int timer); #endif /* SWIFT_RUNNER_DOIACT_FFT_H */ diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 13a55344d773e7fba000d680eae9866dffdd88e1..a66cc5e0c9ed241aba3bb1b4329016b8e505e280 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -182,8 +182,8 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; /* Let's start by drifting things */ - if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + if (!cell_are_gpart_drifted(ci, e)) cell_drift_gpart(ci, e); + if (!cell_are_gpart_drifted(cj, e)) cell_drift_gpart(cj, e); #if ICHECK > 0 for (int pid = 0; pid < gcount_i; pid++) { @@ -318,7 +318,7 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { if (!cell_is_active(c, e)) return; /* Do we need to start by drifting things ? */ - if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); + if (!cell_are_gpart_drifted(c, e)) cell_drift_gpart(c, e); #if ICHECK > 0 for (int pid = 0; pid < gcount; pid++) { @@ -429,6 +429,11 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, /* Sanity check */ if (ci == cj) error("Pair interaction between a cell and itself."); + + if (cell_is_active(ci, e) && ci->ti_old_multipole != e->ti_current) + error("ci->multipole not drifted."); + if (cell_is_active(cj, e) && cj->ti_old_multipole != e->ti_current) + error("cj->multipole not drifted."); #endif #if ICHECK > 0 diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 6a0ee98a7f46feb8d6754e010034156c04bf4d66..23b66ddcc11dd5cbd52da354b7051af799250e3c 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -381,7 +381,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) error("Interacting undrifted cell."); + if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); /* Get the particle cache from the runner and re-allocate * the cache if it is not big enough for the cell. */ @@ -604,370 +604,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( #endif /* WITH_VECTORIZATION */ } -/** - * @brief Compute the cell self-interaction (non-symmetric) using vector - * intrinsics with two particle pis at a time. - * - * CURRENTLY BROKEN DO NOT USE. - * - * @param r The #runner. - * @param c The #cell. - */ -__attribute__((always_inline)) INLINE void runner_doself1_density_vec_2( - struct runner *r, struct cell *restrict c) { - -#ifdef WITH_VECTORIZATION - const struct engine *e = r->e; - int doi_mask; - int doi2_mask; - struct part *restrict pi; - struct part *restrict pi2; - int count_align; - - vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2; - vector v_hi2, v_vix2, v_viy2, v_viz2, v_hig2_2, v2_r2; - - TIMER_TIC - - if (!cell_is_active(c, e)) return; - - if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); - - /* TODO: Need to find two active particles, not just one. */ - - struct part *restrict parts = c->parts; - const int count = c->count; - - /* Get the particle cache from the runner and re-allocate - * the cache if it is not big enough for the cell. */ - struct cache *restrict cell_cache = &r->ci_cache; - - if (cell_cache->count < count) { - cache_init(cell_cache, count); - } - - /* Read the particles from the cell and store them locally in the cache. */ - cache_read_particles(c, &r->ci_cache); - - /* Create two secondary caches. */ - int icount = 0, icount_align = 0; - struct c2_cache int_cache; - - int icount2 = 0, icount_align2 = 0; - struct c2_cache int_cache2; - - /* Loop over the particles in the cell. */ - for (int pid = 0; pid < count; pid += 2) { - - /* Get a pointer to the ith particle and next i particle. */ - pi = &parts[pid]; - pi2 = &parts[pid + 1]; - - /* Is the ith particle active? */ - if (!part_is_active(pi, e)) continue; - - vector pix, piy, piz; - vector pix2, piy2, piz2; - - const float hi = cell_cache->h[pid]; - const float hi2 = cell_cache->h[pid + 1]; - - /* Fill pi position vector. */ - pix.v = vec_set1(cell_cache->x[pid]); - piy.v = vec_set1(cell_cache->y[pid]); - piz.v = vec_set1(cell_cache->z[pid]); - v_hi.v = vec_set1(hi); - v_vix.v = vec_set1(cell_cache->vx[pid]); - v_viy.v = vec_set1(cell_cache->vy[pid]); - v_viz.v = vec_set1(cell_cache->vz[pid]); - - pix2.v = vec_set1(cell_cache->x[pid + 1]); - piy2.v = vec_set1(cell_cache->y[pid + 1]); - piz2.v = vec_set1(cell_cache->z[pid + 1]); - v_hi2.v = vec_set1(hi2); - v_vix2.v = vec_set1(cell_cache->vx[pid + 1]); - v_viy2.v = vec_set1(cell_cache->vy[pid + 1]); - v_viz2.v = vec_set1(cell_cache->vz[pid + 1]); - - const float hig2 = hi * hi * kernel_gamma2; - const float hig2_2 = hi2 * hi2 * kernel_gamma2; - v_hig2.v = vec_set1(hig2); - v_hig2_2.v = vec_set1(hig2_2); - - vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, - curlvySum, curlvzSum; - vector rhoSum2, rho_dhSum2, wcountSum2, wcount_dhSum2, div_vSum2, - curlvxSum2, curlvySum2, curlvzSum2; - - vector v_hi_inv, v_hi_inv2; - - v_hi_inv = vec_reciprocal(v_hi); - v_hi_inv2 = vec_reciprocal(v_hi2); - - rhoSum.v = vec_setzero(); - rho_dhSum.v = vec_setzero(); - wcountSum.v = vec_setzero(); - wcount_dhSum.v = vec_setzero(); - div_vSum.v = vec_setzero(); - curlvxSum.v = vec_setzero(); - curlvySum.v = vec_setzero(); - curlvzSum.v = vec_setzero(); - - rhoSum2.v = vec_setzero(); - rho_dhSum2.v = vec_setzero(); - wcountSum2.v = vec_setzero(); - wcount_dhSum2.v = vec_setzero(); - div_vSum2.v = vec_setzero(); - curlvxSum2.v = vec_setzero(); - curlvySum2.v = vec_setzero(); - curlvzSum2.v = vec_setzero(); - - /* Pad cache if there is a serial remainder. */ - count_align = count; - int rem = count % (NUM_VEC_PROC * VEC_SIZE); - if (rem != 0) { - int pad = (NUM_VEC_PROC * VEC_SIZE) - rem; - - count_align += pad; - /* Set positions to the same as particle pi so when the r2 > 0 mask is - * applied these extra contributions are masked out.*/ - for (int i = count; i < count_align; i++) { - cell_cache->x[i] = pix.f[0]; - cell_cache->y[i] = piy.f[0]; - cell_cache->z[i] = piz.f[0]; - } - } - - vector pjx, pjy, pjz; - vector pjvx, pjvy, pjvz, mj; - vector pjx2, pjy2, pjz2; - vector pjvx2, pjvy2, pjvz2, mj2; - - /* Find all of particle pi's interacions and store needed values in - * secondary cache.*/ - for (int pjd = 0; pjd < count_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) { - - /* Load 2 sets of vectors from the particle cache. */ - pjx.v = vec_load(&cell_cache->x[pjd]); - pjy.v = vec_load(&cell_cache->y[pjd]); - pjz.v = vec_load(&cell_cache->z[pjd]); - pjvx.v = vec_load(&cell_cache->vx[pjd]); - pjvy.v = vec_load(&cell_cache->vy[pjd]); - pjvz.v = vec_load(&cell_cache->vz[pjd]); - mj.v = vec_load(&cell_cache->m[pjd]); - - pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]); - pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]); - pjz2.v = vec_load(&cell_cache->z[pjd + VEC_SIZE]); - pjvx2.v = vec_load(&cell_cache->vx[pjd + VEC_SIZE]); - pjvy2.v = vec_load(&cell_cache->vy[pjd + VEC_SIZE]); - pjvz2.v = vec_load(&cell_cache->vz[pjd + VEC_SIZE]); - mj2.v = vec_load(&cell_cache->m[pjd + VEC_SIZE]); - - /* Compute the pairwise distance. */ - vector v_dx_tmp, v_dy_tmp, v_dz_tmp; - vector v_dx_tmp2, v_dy_tmp2, v_dz_tmp2, v_r2_2; - vector v_dx2_tmp, v_dy2_tmp, v_dz2_tmp; - vector v_dx2_tmp2, v_dy2_tmp2, v_dz2_tmp2, v2_r2_2; - - v_dx_tmp.v = vec_sub(pix.v, pjx.v); - v_dy_tmp.v = vec_sub(piy.v, pjy.v); - v_dz_tmp.v = vec_sub(piz.v, pjz.v); - v_dx_tmp2.v = vec_sub(pix.v, pjx2.v); - v_dy_tmp2.v = vec_sub(piy.v, pjy2.v); - v_dz_tmp2.v = vec_sub(piz.v, pjz2.v); - - v_dx2_tmp.v = vec_sub(pix2.v, pjx.v); - v_dy2_tmp.v = vec_sub(piy2.v, pjy.v); - v_dz2_tmp.v = vec_sub(piz2.v, pjz.v); - v_dx2_tmp2.v = vec_sub(pix2.v, pjx2.v); - v_dy2_tmp2.v = vec_sub(piy2.v, pjy2.v); - v_dz2_tmp2.v = vec_sub(piz2.v, pjz2.v); - - v_r2.v = vec_mul(v_dx_tmp.v, v_dx_tmp.v); - v_r2.v = vec_fma(v_dy_tmp.v, v_dy_tmp.v, v_r2.v); - v_r2.v = vec_fma(v_dz_tmp.v, v_dz_tmp.v, v_r2.v); - v_r2_2.v = vec_mul(v_dx_tmp2.v, v_dx_tmp2.v); - v_r2_2.v = vec_fma(v_dy_tmp2.v, v_dy_tmp2.v, v_r2_2.v); - v_r2_2.v = vec_fma(v_dz_tmp2.v, v_dz_tmp2.v, v_r2_2.v); - - v2_r2.v = vec_mul(v_dx2_tmp.v, v_dx2_tmp.v); - v2_r2.v = vec_fma(v_dy2_tmp.v, v_dy2_tmp.v, v2_r2.v); - v2_r2.v = vec_fma(v_dz2_tmp.v, v_dz2_tmp.v, v2_r2.v); - v2_r2_2.v = vec_mul(v_dx2_tmp2.v, v_dx2_tmp2.v); - v2_r2_2.v = vec_fma(v_dy2_tmp2.v, v_dy2_tmp2.v, v2_r2_2.v); - v2_r2_2.v = vec_fma(v_dz2_tmp2.v, v_dz2_tmp2.v, v2_r2_2.v); - -/* Form a mask from r2 < hig2 and r2 > 0.*/ -#ifdef HAVE_AVX512_F - // KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check; - KNL_MASK_16 doi_mask_check, doi_mask2, doi_mask2_check; - KNL_MASK_16 doi2_mask_check, doi2_mask2, doi2_mask2_check; - - doi_mask_check = vec_cmp_gt(v_r2.v, vec_setzero()); - doi_mask = vec_cmp_lt(v_r2.v, v_hig2.v); - - doi2_mask_check = vec_cmp_gt(v2_r2.v, vec_setzero()); - doi2_mask = vec_cmp_lt(v2_r2.v, v_hig2_2.v); - - doi_mask2_check = vec_cmp_gt(v_r2_2.v, vec_setzero()); - doi_mask2 = vec_cmp_lt(v_r2_2.v, v_hig2.v); - - doi2_mask2_check = vec_cmp_gt(v2_r2_2.v, vec_setzero()); - doi2_mask2 = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v); - - doi_mask = doi_mask & doi_mask_check; - doi_mask2 = doi_mask2 & doi_mask2_check; - - doi2_mask = doi2_mask & doi2_mask_check; - doi2_mask2 = doi2_mask2 & doi2_mask2_check; -#else - vector v_doi_mask, v_doi_mask_check, v_doi_mask2, v_doi_mask2_check; - int doi_mask2; - - vector v_doi2_mask, v_doi2_mask_check, v_doi2_mask2, v_doi2_mask2_check; - int doi2_mask2; - - v_doi_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero()); - v_doi_mask.v = vec_cmp_lt(v_r2.v, v_hig2.v); - - v_doi2_mask_check.v = vec_cmp_gt(v2_r2.v, vec_setzero()); - v_doi2_mask.v = vec_cmp_lt(v2_r2.v, v_hig2_2.v); - - v_doi_mask2_check.v = vec_cmp_gt(v_r2_2.v, vec_setzero()); - v_doi_mask2.v = vec_cmp_lt(v_r2_2.v, v_hig2.v); - - v_doi2_mask2_check.v = vec_cmp_gt(v2_r2_2.v, vec_setzero()); - v_doi2_mask2.v = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v); - - doi_mask = vec_cmp_result(vec_and(v_doi_mask.v, v_doi_mask_check.v)); - doi_mask2 = vec_cmp_result(vec_and(v_doi_mask2.v, v_doi_mask2_check.v)); - doi2_mask = vec_cmp_result(vec_and(v_doi2_mask.v, v_doi2_mask_check.v)); - doi2_mask2 = - vec_cmp_result(vec_and(v_doi2_mask2.v, v_doi2_mask2_check.v)); -#endif /* HAVE_AVX512_F */ - - /* Hit or miss? */ - // if (doi_mask) { - storeInteractions(doi_mask, pjd, &v_r2, &v_dx_tmp, &v_dy_tmp, &v_dz_tmp, - &mj, &pjvx, &pjvy, &pjvz, cell_cache, &int_cache, - &icount, &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, - &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, v_hi_inv, - v_vix, v_viy, v_viz); - //} - // if (doi2_mask) { - storeInteractions( - doi2_mask, pjd, &v2_r2, &v_dx2_tmp, &v_dy2_tmp, &v_dz2_tmp, &mj, - &pjvx, &pjvy, &pjvz, cell_cache, &int_cache2, &icount2, &rhoSum2, - &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2, - &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2, v_viz2); - //} - /* Hit or miss? */ - // if (doi_mask2) { - storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_tmp2, - &v_dy_tmp2, &v_dz_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2, - cell_cache, &int_cache, &icount, &rhoSum, &rho_dhSum, - &wcountSum, &wcount_dhSum, &div_vSum, &curlvxSum, - &curlvySum, &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz); - //} - // if (doi2_mask2) { - storeInteractions(doi2_mask2, pjd + VEC_SIZE, &v2_r2_2, &v_dx2_tmp2, - &v_dy2_tmp2, &v_dz2_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2, - cell_cache, &int_cache2, &icount2, &rhoSum2, - &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2, - &curlvxSum2, &curlvySum2, &curlvzSum2, v_hi_inv2, - v_vix2, v_viy2, v_viz2); - //} - } - - /* Perform padded vector remainder interactions if any are present. */ - calcRemInteractions(&int_cache, icount, &rhoSum, &rho_dhSum, &wcountSum, - &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, - &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz, - &icount_align); - - calcRemInteractions(&int_cache2, icount2, &rhoSum2, &rho_dhSum2, - &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2, - &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2, - v_viz2, &icount_align2); - - /* Initialise masks to true incase remainder interactions have been - * performed. */ - vector int_mask, int_mask2; - vector int2_mask, int2_mask2; -#ifdef HAVE_AVX512_F - KNL_MASK_16 knl_mask = 0xFFFF; - KNL_MASK_16 knl_mask2 = 0xFFFF; - int_mask.m = vec_setint1(0xFFFFFFFF); - int_mask2.m = vec_setint1(0xFFFFFFFF); - int2_mask.m = vec_setint1(0xFFFFFFFF); - int2_mask2.m = vec_setint1(0xFFFFFFFF); -#else - int_mask.m = vec_setint1(0xFFFFFFFF); - int_mask2.m = vec_setint1(0xFFFFFFFF); - - int2_mask.m = vec_setint1(0xFFFFFFFF); - int2_mask2.m = vec_setint1(0xFFFFFFFF); -#endif - - /* Perform interaction with 2 vectors. */ - for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) { - runner_iact_nonsym_2_vec_density( - &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd], - &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz, - &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd], - &int_cache.mq[pjd], &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, - &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2, -#ifdef HAVE_AVX512_F - knl_mask, knl_mask2); -#else - 0, 0); -#endif - } - - for (int pjd = 0; pjd < icount_align2; pjd += (NUM_VEC_PROC * VEC_SIZE)) { - runner_iact_nonsym_2_vec_density( - &int_cache2.r2q[pjd], &int_cache2.dxq[pjd], &int_cache2.dyq[pjd], - &int_cache2.dzq[pjd], v_hi_inv2, v_vix2, v_viy2, v_viz2, - &int_cache2.vxq[pjd], &int_cache2.vyq[pjd], &int_cache2.vzq[pjd], - &int_cache2.mq[pjd], &rhoSum2, &rho_dhSum2, &wcountSum2, - &wcount_dhSum2, &div_vSum2, &curlvxSum2, &curlvySum2, &curlvzSum2, - int2_mask, int2_mask2, -#ifdef HAVE_AVX512_F - knl_mask, knl_mask2); -#else - 0, 0); -#endif - } - /* Perform horizontal adds on vector sums and store result in particle pi. - */ - VEC_HADD(rhoSum, pi->rho); - VEC_HADD(rho_dhSum, pi->density.rho_dh); - VEC_HADD(wcountSum, pi->density.wcount); - VEC_HADD(wcount_dhSum, pi->density.wcount_dh); - VEC_HADD(div_vSum, pi->density.div_v); - VEC_HADD(curlvxSum, pi->density.rot_v[0]); - VEC_HADD(curlvySum, pi->density.rot_v[1]); - VEC_HADD(curlvzSum, pi->density.rot_v[2]); - - VEC_HADD(rhoSum2, pi2->rho); - VEC_HADD(rho_dhSum2, pi2->density.rho_dh); - VEC_HADD(wcountSum2, pi2->density.wcount); - VEC_HADD(wcount_dhSum2, pi2->density.wcount_dh); - VEC_HADD(div_vSum2, pi2->density.div_v); - VEC_HADD(curlvxSum2, pi2->density.rot_v[0]); - VEC_HADD(curlvySum2, pi2->density.rot_v[1]); - VEC_HADD(curlvzSum2, pi2->density.rot_v[2]); - - /* Reset interaction count. */ - icount = 0; - icount2 = 0; - } /* loop over all particles. */ - - TIMER_TOC(timer_doself_density); -#endif /* WITH_VECTORIZATION */ -} - /** * @brief Compute the density interactions between a cell pair (non-symmetric) * using vector intrinsics. @@ -989,7 +625,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); /* Get the sort ID. */ diff --git a/src/scheduler.c b/src/scheduler.c index fe8776e4fb7a70506116c3c4e3c9a710c7bef937..60ae9c25f5178a40ca1e1d4aa2f5782cc28bbc11 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -152,22 +152,12 @@ void scheduler_add_subcell_tasks(struct scheduler *s, struct cell *c, } /** - * @brief Split a task if too large. + * @brief Split a hydrodynamic task if too large. * * @param t The #task * @param s The #scheduler we are working in. */ -static void scheduler_splittask(struct task *t, struct scheduler *s) { - - /* Static constants. */ - static const int pts[7][8] = { - {-1, 12, 10, 9, 4, 3, 1, 0}, {-1, -1, 11, 10, 5, 4, 2, 1}, - {-1, -1, -1, 12, 7, 6, 4, 3}, {-1, -1, -1, -1, 8, 7, 5, 4}, - {-1, -1, -1, -1, -1, 12, 10, 9}, {-1, -1, -1, -1, -1, -1, 11, 10}, - {-1, -1, -1, -1, -1, -1, -1, 12}}; - static const float sid_scale[13] = { - 0.1897f, 0.4025f, 0.1897f, 0.4025f, 0.5788f, 0.4025f, 0.1897f, - 0.4025f, 0.1897f, 0.4025f, 0.5788f, 0.4025f, 0.5788f}; +static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { /* Iterate on this task until we're done with it. */ int redo = 1; @@ -177,11 +167,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { redo = 0; /* Non-splittable task? */ - if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) || - ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID)) { + if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL)) { t->type = task_type_none; t->subtype = task_subtype_none; t->cj = NULL; @@ -194,7 +180,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Get a handle on the cell involved. */ struct cell *ci = t->ci; - const double hi = ci->dmin; + const double width = ci->dmin; /* Foreign task? */ if (ci->nodeID != s->nodeID) { @@ -202,27 +188,16 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { break; } - /* Is this cell even split? */ - if (ci->split && 2.f * kernel_gamma * ci->h_max * space_stretch < hi) { + /* Is this cell even split and the task does not violate h ? */ + if (ci->split && 2.f * kernel_gamma * ci->h_max * space_stretch < width) { /* Make a sub? */ if (scheduler_dosub && /* Note division here to avoid overflow */ - ((ci->count > 0 && ci->count < space_subsize / ci->count) || - (ci->gcount > 0 && ci->gcount < space_subsize / ci->gcount))) { + (ci->count > 0 && ci->count < space_subsize / ci->count)) { /* convert to a self-subtask. */ t->type = task_type_sub_self; - /* Make sure we have a drift task (MATTHIEU temp. fix for gravity) */ - if (t->subtype == task_subtype_grav || - t->subtype == task_subtype_external_grav) { - lock_lock(&ci->lock); - if (ci->drift == NULL) - ci->drift = scheduler_addtask(s, task_type_drift, - task_subtype_none, 0, 0, ci, NULL); - lock_unlock_blind(&ci->lock); - } - /* Depend on local sorts on this cell and its sub-cells. */ scheduler_add_subcell_tasks(s, ci, t); @@ -238,44 +213,41 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[first_child]; for (int k = first_child + 1; k < 8; k++) if (ci->progeny[k] != NULL) - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_self, t->subtype, 0, 0, ci->progeny[k], NULL), s); - /* Make a task for each pair of progeny unless it's ext. gravity. */ - if (t->subtype != task_subtype_external_grav) { - - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL) - for (int k = j + 1; k < 8; k++) - if (ci->progeny[k] != NULL) - scheduler_splittask( - scheduler_addtask(s, task_type_pair, t->subtype, - pts[j][k], 0, ci->progeny[j], - ci->progeny[k]), - s); - } + /* Make a task for each pair of progeny */ + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL) + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + scheduler_splittask_hydro( + scheduler_addtask(s, task_type_pair, t->subtype, + sub_sid_flag[j][k], 0, ci->progeny[j], + ci->progeny[k]), + s); } - } + } /* Cell is split */ - /* Otherwise, make sure the self task has a drift task. */ + /* Otherwise, make sure the self task has a drift task */ else { lock_lock(&ci->lock); - if (ci->drift == NULL) - ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, - 0, 0, ci, NULL); + + if (ci->drift_part == NULL) + ci->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, ci, NULL); lock_unlock_blind(&ci->lock); } + } /* Self interaction */ - /* Pair interaction? */ - } else if (t->type == task_type_pair && t->subtype != task_subtype_grav) { + /* Pair interaction? */ + else if (t->type == task_type_pair) { /* Get a handle on the cells involved. */ struct cell *ci = t->ci; struct cell *cj = t->cj; - const double hi = ci->dmin; - const double hj = cj->dmin; /* Foreign task? */ if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { @@ -288,10 +260,13 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { double shift[3]; const int sid = space_getsid(s->space, &ci, &cj, shift); + const double width_i = ci->dmin; + const double width_j = cj->dmin; + /* Should this task be split-up? */ if (ci->split && cj->split && - 2.f * kernel_gamma * space_stretch * ci->h_max < hi && - 2.f * kernel_gamma * space_stretch * cj->h_max < hj) { + 2.f * kernel_gamma * space_stretch * ci->h_max < width_i && + 2.f * kernel_gamma * space_stretch * cj->h_max < width_j) { /* Replace by a single sub-task? */ if (scheduler_dosub && @@ -324,15 +299,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; t->flags = 1; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, ci->progeny[7], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[6], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, ci->progeny[7], cj->progeny[0]), s); @@ -348,15 +323,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; t->flags = 3; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, ci->progeny[7], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[5], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, ci->progeny[7], cj->progeny[0]), s); @@ -366,63 +341,63 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; t->flags = 4; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, ci->progeny[5], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, ci->progeny[6], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, ci->progeny[7], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, ci->progeny[4], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, ci->progeny[5], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, ci->progeny[6], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, ci->progeny[7], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, ci->progeny[4], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, ci->progeny[5], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, ci->progeny[6], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, ci->progeny[7], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[4], cj->progeny[3]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, ci->progeny[5], cj->progeny[3]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, ci->progeny[6], cj->progeny[3]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, ci->progeny[7], cj->progeny[3]), s); @@ -432,15 +407,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; t->flags = 5; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, ci->progeny[6], cj->progeny[3]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, ci->progeny[4], cj->progeny[3]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, ci->progeny[6], cj->progeny[1]), s); @@ -456,15 +431,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 6; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, ci->progeny[5], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, ci->progeny[4], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, ci->progeny[5], cj->progeny[3]), s); @@ -480,15 +455,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; t->flags = 9; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, ci->progeny[7], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[3], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, ci->progeny[7], cj->progeny[0]), s); @@ -498,63 +473,63 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; t->flags = 10; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, ci->progeny[3], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, ci->progeny[6], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, ci->progeny[7], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, ci->progeny[2], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, ci->progeny[3], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, ci->progeny[6], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, ci->progeny[7], cj->progeny[1]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, ci->progeny[2], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, ci->progeny[3], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, ci->progeny[6], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, ci->progeny[7], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[2], cj->progeny[5]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, ci->progeny[3], cj->progeny[5]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, ci->progeny[6], cj->progeny[5]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, ci->progeny[7], cj->progeny[5]), s); @@ -564,15 +539,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[2]; t->cj = cj->progeny[1]; t->flags = 11; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, ci->progeny[6], cj->progeny[5]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, ci->progeny[2], cj->progeny[5]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, ci->progeny[6], cj->progeny[1]), s); @@ -582,63 +557,63 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[1]; t->cj = cj->progeny[0]; t->flags = 12; - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, ci->progeny[3], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, ci->progeny[5], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, ci->progeny[7], cj->progeny[0]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, ci->progeny[1], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, ci->progeny[3], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, ci->progeny[5], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, ci->progeny[7], cj->progeny[2]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, ci->progeny[1], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, ci->progeny[3], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, ci->progeny[5], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, ci->progeny[7], cj->progeny[4]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[1], cj->progeny[6]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, ci->progeny[3], cj->progeny[6]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, ci->progeny[5], cj->progeny[6]), s); - scheduler_splittask( + scheduler_splittask_hydro( scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, ci->progeny[7], cj->progeny[6]), s); @@ -663,7 +638,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { struct task *tl = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[j], cj->progeny[k]); - scheduler_splittask(tl, s); + scheduler_splittask_hydro(tl, s); tl->flags = space_getsid(s->space, &t->ci, &t->cj, shift); } @@ -672,9 +647,9 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Create the drift and sort for ci. */ lock_lock(&ci->lock); - if (ci->drift == NULL && ci->nodeID == engine_rank) - ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, - 0, 0, ci, NULL); + if (ci->drift_part == NULL && ci->nodeID == engine_rank) + ci->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, ci, NULL); if (ci->sorts == NULL) ci->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, 1 << sid, 0, ci, NULL); @@ -683,11 +658,11 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { lock_unlock_blind(&ci->lock); scheduler_addunlock(s, ci->sorts, t); - /* Create the sort for cj. */ + /* Create the drift and sort for cj. */ lock_lock(&cj->lock); - if (cj->drift == NULL && cj->nodeID == engine_rank) - cj->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, - 0, 0, cj, NULL); + if (cj->drift_part == NULL && cj->nodeID == engine_rank) + cj->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, cj, NULL); if (cj->sorts == NULL) cj->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, 1 << sid, 0, cj, NULL); @@ -696,19 +671,142 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { lock_unlock_blind(&cj->lock); scheduler_addunlock(s, cj->sorts, t); } - } /* pair interaction? */ + } /* iterate over the current task. */ +} + +/** + * @brief Split a gravity task if too large. + * + * @param t The #task + * @param s The #scheduler we are working in. + */ +static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { + + /* Iterate on this task until we're done with it. */ + int redo = 1; + while (redo) { + + /* Reset the redo flag. */ + redo = 0; + + /* Non-splittable task? */ + if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL)) { + t->type = task_type_none; + t->subtype = task_subtype_none; + t->cj = NULL; + t->skip = 1; + break; + } + + /* Self-interaction? */ + if (t->type == task_type_self) { + + /* Get a handle on the cell involved. */ + struct cell *ci = t->ci; + + /* Foreign task? */ + if (ci->nodeID != s->nodeID) { + t->skip = 1; + break; + } + + /* Is this cell even split? */ + if (ci->split) { + + /* Make a sub? */ + if (scheduler_dosub && /* Note division here to avoid overflow */ + (ci->gcount > 0 && ci->gcount < space_subsize / ci->gcount)) { + + /* convert to a self-subtask. */ + t->type = task_type_sub_self; + + /* Make sure we have a drift task (MATTHIEU temp. fix) */ + lock_lock(&ci->lock); + if (ci->drift_gpart == NULL) + ci->drift_gpart = scheduler_addtask( + s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL); + lock_unlock_blind(&ci->lock); - /* Long-range gravity interaction ? */ - else if (t->type == task_type_grav_mm) { + /* Otherwise, make tasks explicitly. */ + } else { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; + + /* Add the self tasks. */ + int first_child = 0; + while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + for (int k = first_child + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + scheduler_splittask_gravity( + scheduler_addtask(s, task_type_self, t->subtype, 0, 0, + ci->progeny[k], NULL), + s); + + /* Make a task for each pair of progeny */ + if (t->subtype != task_subtype_external_grav) { + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL) + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + scheduler_splittask_gravity( + scheduler_addtask(s, task_type_pair, t->subtype, + sub_sid_flag[j][k], 0, ci->progeny[j], + ci->progeny[k]), + s); + } + } + } /* Cell is split */ + + /* Otherwise, make sure the self task has a drift task */ + else { + + lock_lock(&ci->lock); + + if (ci->drift_gpart == NULL) + ci->drift_gpart = scheduler_addtask( + s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL); + lock_unlock_blind(&ci->lock); + } + } /* Self interaction */ + + /* Pair interaction? */ + else if (t->type == task_type_pair) { /* Get a handle on the cells involved. */ struct cell *ci = t->ci; + struct cell *cj = t->cj; + + /* Foreign task? */ + if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { + t->skip = 1; + break; + } + + /* Should this task be split-up? */ + if (ci->split && cj->split) { + + // MATTHIEU: nothing here for now + + } else { - /* Safety thing */ - if (ci->gcount == 0) t->type = task_type_none; + /* Create the drift for ci. */ + lock_lock(&ci->lock); + if (ci->drift_gpart == NULL && ci->nodeID == engine_rank) + ci->drift_gpart = scheduler_addtask( + s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL); + lock_unlock_blind(&ci->lock); - } /* gravity interaction? */ + /* Create the drift for cj. */ + lock_lock(&cj->lock); + if (cj->drift_gpart == NULL && cj->nodeID == engine_rank) + cj->drift_gpart = scheduler_addtask( + s, task_type_drift_gpart, task_subtype_none, 0, 0, cj, NULL); + lock_unlock_blind(&cj->lock); + } + } /* pair interaction? */ } /* iterate over the current task. */ } @@ -728,7 +826,20 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements, for (int ind = 0; ind < num_elements; ind++) { struct task *t = &tasks[ind]; - scheduler_splittask(t, s); + + /* Invoke the correct splitting strategy */ + if (t->subtype == task_subtype_density) { + scheduler_splittask_hydro(t, s); + } else if (t->subtype == task_subtype_external_grav) { + scheduler_splittask_gravity(t, s); + } else if (t->subtype == task_subtype_grav) { + scheduler_splittask_gravity(t, s); + } else if (t->type == task_type_grav_top_level || + t->type == task_type_grav_ghost) { + // MATTHIEU: for the future + } else { + error("Unexpected task sub-type"); + } } } @@ -821,7 +932,8 @@ void scheduler_set_unlocks(struct scheduler *s) { /* Check that we are not overflowing */ if (counts[s->unlock_ind[k]] < 0) error("Task unlocking more than %d other tasks!", - (1 << (sizeof(short int) - 1)) - 1); + (1 << (8 * sizeof(short int) - 1)) - 1); + #endif } @@ -1004,9 +1116,6 @@ void scheduler_reweight(struct scheduler *s, int verbose) { int *tid = s->tasks_ind; struct task *tasks = s->tasks; const int nodeID = s->nodeID; - const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, - 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, - 0.5788, 0.4025, 0.5788}; const float wscale = 0.001; const ticks tic = getticks(); @@ -1053,9 +1162,12 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_ghost: if (t->ci == t->ci->super) cost = wscale * t->ci->count; break; - case task_type_drift: + case task_type_drift_part: cost = wscale * t->ci->count; break; + case task_type_drift_gpart: + cost = wscale * t->ci->gcount; + break; case task_type_kick1: cost = wscale * t->ci->count; break; @@ -1148,6 +1260,15 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements, */ void scheduler_start(struct scheduler *s) { +/* Reset all task debugging timers */ +#ifdef SWIFT_DEBUG_TASKS + for (int i = 0; i < s->nr_tasks; ++i) { + s->tasks[i].tic = 0; + s->tasks[i].toc = 0; + s->tasks[i].rid = -1; + } +#endif + /* Re-wait the tasks. */ if (s->active_count > 1000) { threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tid_active, @@ -1174,6 +1295,11 @@ void scheduler_start(struct scheduler *s) { /* Don't check MPI stuff */ if (t->type == task_type_send || t->type == task_type_recv) continue; + /* Don't check the FFT task */ + if (t->type == task_type_grav_top_level || + t->type == task_type_grav_ghost) + continue; + if (ci == NULL && cj == NULL) { error("Task not associated with cells!"); @@ -1181,7 +1307,8 @@ void scheduler_start(struct scheduler *s) { } else if (cj == NULL) { /* self */ if (ci->ti_end_min == ti_current && t->skip && - t->type != task_type_sort && t->type != task_type_drift && t->type) + t->type != task_type_sort && t->type != task_type_drift_part && + t->type != task_type_drift_gpart) error( "Task (type='%s/%s') should not have been skipped " "ti_current=%lld " @@ -1276,7 +1403,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_ghost: case task_type_kick1: case task_type_kick2: - case task_type_drift: + case task_type_drift_part: + case task_type_drift_gpart: case task_type_timestep: qid = t->ci->super->owner; break; @@ -1295,7 +1423,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); } else if (t->subtype == task_subtype_xv || - t->subtype == task_subtype_rho) { + t->subtype == task_subtype_rho || + t->subtype == task_subtype_gradient) { err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); // message( "receiving %i parts with tag=%i from %i to %i." , @@ -1330,7 +1459,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); } else if (t->subtype == task_subtype_xv || - t->subtype == task_subtype_rho) { + t->subtype == task_subtype_rho || + t->subtype == task_subtype_gradient) { #ifdef SWIFT_DEBUG_CHECKS for (int k = 0; k < t->ci->count; k++) if (t->ci->parts[k].ti_drift != s->space->e->ti_current) diff --git a/src/sort_part.h b/src/sort_part.h index a243fcdfae8ec0aba606000e26bc18d35601215c..74116d7a8cada31c0663d5c5b70cfa978b11af8b 100644 --- a/src/sort_part.h +++ b/src/sort_part.h @@ -83,6 +83,18 @@ static const int sortlistID[27] = { /* ( 1 , 1 , 0 ) */ 1, /* ( 1 , 1 , 1 ) */ 0}; +/* Ratio of particles interacting assuming a uniform distribution */ +static const float sid_scale[13] = {0.1897f, 0.4025f, 0.1897f, 0.4025f, 0.5788f, + 0.4025f, 0.1897f, 0.4025f, 0.1897f, 0.4025f, + 0.5788f, 0.4025f, 0.5788f}; + +/* Sid flags for every sub-pair of a self task. */ +static const int sub_sid_flag[7][8] = { + {-1, 12, 10, 9, 4, 3, 1, 0}, {-1, -1, 11, 10, 5, 4, 2, 1}, + {-1, -1, -1, 12, 7, 6, 4, 3}, {-1, -1, -1, -1, 8, 7, 5, 4}, + {-1, -1, -1, -1, -1, 12, 10, 9}, {-1, -1, -1, -1, -1, -1, 11, 10}, + {-1, -1, -1, -1, -1, -1, -1, 12}}; + /** * @brief Determines whether a pair of cells are corner to corner. * diff --git a/src/space.c b/src/space.c index d0f629d90dd56ffb621705c9b9718331cf5eff4b..fd6476743ce39d130c6e00737af03f5d34216381 100644 --- a/src/space.c +++ b/src/space.c @@ -205,7 +205,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->gradient = NULL; c->force = NULL; c->grav = NULL; - c->dx_max = 0.0f; + c->dx_max_part = 0.0f; + c->dx_max_gpart = 0.0f; c->dx_max_sort = 0.0f; c->sorted = 0; c->count = 0; @@ -219,10 +220,12 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->kick1 = NULL; c->kick2 = NULL; c->timestep = NULL; - c->drift = NULL; + c->drift_part = NULL; + c->drift_gpart = NULL; c->cooling = NULL; c->sourceterms = NULL; - c->grav_top_level = NULL; + c->grav_ghost[0] = NULL; + c->grav_ghost[1] = NULL; c->grav_long_range = NULL; c->grav_down = NULL; c->super = c; @@ -422,7 +425,8 @@ void space_regrid(struct space *s, int verbose) { c->gcount = 0; c->scount = 0; c->super = c; - c->ti_old = ti_old; + c->ti_old_part = ti_old; + c->ti_old_gpart = ti_old; c->ti_old_multipole = ti_old; if (s->gravity) c->multipole = &s->multipoles_top[cid]; } @@ -892,8 +896,9 @@ void space_rebuild(struct space *s, int verbose) { #ifdef SWIFT_DEBUG_CHECKS /* Verify that the links are correct */ - part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts, - nr_sparts, verbose); + if ((nr_gparts > 0 && nr_parts > 0) || (nr_gparts > 0 && nr_sparts > 0)) + part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts, + nr_sparts, verbose); #endif /* Hook the cells up to the parts. */ @@ -904,7 +909,8 @@ void space_rebuild(struct space *s, int verbose) { struct spart *sfinger = s->sparts; for (int k = 0; k < s->nr_cells; k++) { struct cell *restrict c = &cells_top[k]; - c->ti_old = ti_old; + c->ti_old_part = ti_old; + c->ti_old_gpart = ti_old; c->ti_old_multipole = ti_old; c->parts = finger; c->xparts = xfinger; @@ -2013,7 +2019,8 @@ void space_split_recursive(struct space *s, struct cell *c, cp->count = 0; cp->gcount = 0; cp->scount = 0; - cp->ti_old = c->ti_old; + cp->ti_old_part = c->ti_old_part; + cp->ti_old_gpart = c->ti_old_gpart; cp->ti_old_multipole = c->ti_old_multipole; cp->loc[0] = c->loc[0]; cp->loc[1] = c->loc[1]; @@ -2027,8 +2034,9 @@ void space_split_recursive(struct space *s, struct cell *c, if (k & 1) cp->loc[2] += cp->width[2]; cp->depth = c->depth + 1; cp->split = 0; - cp->h_max = 0.0; - cp->dx_max = 0.f; + cp->h_max = 0.f; + cp->dx_max_part = 0.f; + cp->dx_max_gpart = 0.f; cp->dx_max_sort = 0.f; cp->nodeID = c->nodeID; cp->parent = c; @@ -2425,6 +2433,58 @@ void space_getcells(struct space *s, int nr_cells, struct cell **cells) { } } +void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts, + void *extra_data) { + /* Unpack the data */ + struct gpart *restrict gparts = (struct gpart *)map_data; + struct space *s = (struct space *)extra_data; + + for (int k = 0; k < nr_gparts; k++) { + + /* Get the particle */ + const struct gpart *restrict gp = &gparts[k]; + + if (gp->type == swift_type_dark_matter) + continue; + + else if (gp->type == swift_type_gas) { + + /* Get it's gassy friend */ + struct part *p = &s->parts[-gp->id_or_neg_offset]; + struct xpart *xp = &s->xparts[-gp->id_or_neg_offset]; + + /* Synchronize positions and velocities */ + p->x[0] = gp->x[0]; + p->x[1] = gp->x[1]; + p->x[2] = gp->x[2]; + + xp->v_full[0] = gp->v_full[0]; + xp->v_full[1] = gp->v_full[1]; + xp->v_full[2] = gp->v_full[2]; + } + + else if (gp->type == swift_type_star) { + + /* Get it's stellar friend */ + struct spart *sp = &s->sparts[-gp->id_or_neg_offset]; + + /* Synchronize positions */ + sp->x[0] = gp->x[0]; + sp->x[1] = gp->x[1]; + sp->x[2] = gp->x[2]; + } + } +} + +void space_synchronize_particle_positions(struct space *s) { + + if ((s->nr_gparts > 0 && s->nr_parts > 0) || + (s->nr_gparts > 0 && s->nr_sparts > 0)) + threadpool_map(&s->e->threadpool, + space_synchronize_particle_positions_mapper, s->gparts, + s->nr_gparts, sizeof(struct gpart), 1000, (void *)s); +} + /** * @brief Initialises all the particles by setting them into a valid state * @@ -2879,7 +2939,8 @@ void space_check_drift_point(struct space *s, integertime_t ti_drift, int multipole) { #ifdef SWIFT_DEBUG_CHECKS /* Recursively check all cells */ - space_map_cells_pre(s, 1, cell_check_particle_drift_point, &ti_drift); + space_map_cells_pre(s, 1, cell_check_part_drift_point, &ti_drift); + space_map_cells_pre(s, 1, cell_check_gpart_drift_point, &ti_drift); if (multipole) space_map_cells_pre(s, 1, cell_check_multipole_drift_point, &ti_drift); #else diff --git a/src/space.h b/src/space.h index c5f588563e5a9fb4b6cb73ac1446514f8149794f..e8e8600349c97ff8a60f0fcf2964d6ec514a7589 100644 --- a/src/space.h +++ b/src/space.h @@ -130,6 +130,9 @@ struct space { /*! The s-particle data (cells have pointers to this). */ struct spart *sparts; + /*! The top-level FFT task */ + struct task *grav_top_level; + /*! General-purpose lock for this space. */ swift_lock_type lock; @@ -206,6 +209,7 @@ void space_gparts_get_cell_index(struct space *s, int *gind, struct cell *cells, int verbose); void space_sparts_get_cell_index(struct space *s, int *sind, struct cell *cells, int verbose); +void space_synchronize_particle_positions(struct space *s); void space_do_parts_sort(); void space_do_gparts_sort(); void space_do_sparts_sort(); diff --git a/src/swift.h b/src/swift.h index 7f1b19b6066c2d55df1cb9101172ae94c9085583..20397eb24df478cba65a0e35d686b402f1d8ee70 100644 --- a/src/swift.h +++ b/src/swift.h @@ -45,6 +45,7 @@ #include "parser.h" #include "part.h" #include "partition.h" +#include "periodic.h" #include "physical_constants.h" #include "potential.h" #include "profiler.h" diff --git a/src/task.c b/src/task.c index e8c35e49a57595a6415c60ce7071ae1c0e3f09b7..43da1d35680783d977ea743dd4f43c52f0f291bc 100644 --- a/src/task.c +++ b/src/task.c @@ -47,27 +47,15 @@ #include "lock.h" /* Task type names. */ -const char *taskID_names[task_type_count] = {"none", - "sort", - "self", - "pair", - "sub_self", - "sub_pair", - "init_grav", - "ghost", - "extra_ghost", - "drift", - "kick1", - "kick2", - "timestep", - "send", - "recv", - "grav_top_level", - "grav_long_range", - "grav_mm", - "grav_down", - "cooling", - "sourceterms"}; +const char *taskID_names[task_type_count] = { + "none", "sort", "self", + "pair", "sub_self", "sub_pair", + "init_grav", "ghost", "extra_ghost", + "drift_part", "drift_gpart", "kick1", + "kick2", "timestep", "send", + "recv", "grav_top_level", "grav_long_range", + "grav_ghost", "grav_mm", "grav_down", + "cooling", "sourceterms"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { @@ -132,6 +120,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_none; break; + case task_type_drift_part: case task_type_sort: case task_type_ghost: case task_type_extra_ghost: @@ -169,7 +158,6 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( case task_type_timestep: case task_type_send: case task_type_recv: - case task_type_drift: if (t->ci->count > 0 && t->ci->gcount > 0) return task_action_all; else if (t->ci->count > 0) @@ -187,8 +175,10 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_multipole; break; + case task_type_drift_gpart: case task_type_grav_down: return task_action_gpart; + break; default: error("Unknown task_action for task"); @@ -286,15 +276,19 @@ void task_unlock(struct task *t) { case task_type_kick1: case task_type_kick2: case task_type_timestep: - case task_type_drift: cell_unlocktree(ci); cell_gunlocktree(ci); break; + case task_type_drift_part: case task_type_sort: cell_unlocktree(ci); break; + case task_type_drift_gpart: + cell_gunlocktree(ci); + break; + case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { @@ -323,7 +317,6 @@ void task_unlock(struct task *t) { cell_munlocktree(ci); break; - case task_type_grav_top_level: case task_type_grav_long_range: case task_type_grav_mm: cell_munlocktree(ci); @@ -372,7 +365,6 @@ int task_lock(struct task *t) { case task_type_kick1: case task_type_kick2: case task_type_timestep: - case task_type_drift: if (ci->hold || ci->ghold) return 0; if (cell_locktree(ci) != 0) return 0; if (cell_glocktree(ci) != 0) { @@ -381,10 +373,17 @@ int task_lock(struct task *t) { } break; + case task_type_drift_part: case task_type_sort: + if (ci->hold) return 0; if (cell_locktree(ci) != 0) return 0; break; + case task_type_drift_gpart: + if (ci->ghold) return 0; + if (cell_glocktree(ci) != 0) return 0; + break; + case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { @@ -442,7 +441,6 @@ int task_lock(struct task *t) { } break; - case task_type_grav_top_level: case task_type_grav_long_range: case task_type_grav_mm: /* Lock the m-poles */ diff --git a/src/task.h b/src/task.h index 049f86bdd6b4baf0856745b2b53acda5cca8c9e1..052f3e8036381441e283d3f7847d09e98ec1dac2 100644 --- a/src/task.h +++ b/src/task.h @@ -47,7 +47,8 @@ enum task_types { task_type_init_grav, task_type_ghost, task_type_extra_ghost, - task_type_drift, + task_type_drift_part, + task_type_drift_gpart, task_type_kick1, task_type_kick2, task_type_timestep, @@ -55,6 +56,7 @@ enum task_types { task_type_recv, task_type_grav_top_level, task_type_grav_long_range, + task_type_grav_ghost, task_type_grav_mm, task_type_grav_down, task_type_cooling, diff --git a/src/timers.c b/src/timers.c index aa42eee14fc0df3edd5a18340c092b8eea2ffac1..62eac20596a082e411ced61a86f32bef9edcb636 100644 --- a/src/timers.c +++ b/src/timers.c @@ -40,7 +40,8 @@ const char* timers_names[timer_count] = { "prepare", "init", "init_grav", - "drift", + "drift_part", + "drift_gpart", "kick1", "kick2", "timestep", @@ -58,6 +59,7 @@ const char* timers_names[timer_count] = { "dopair_grav_pp", "dograv_external", "dograv_down", + "dograv_top_level", "dograv_long_range", "dosource", "dosub_self_density", diff --git a/src/timers.h b/src/timers.h index 08e983a947bc57d9dcc7a432df92c2a4b0a1f7d7..9248be4f3048e468deed476f822947eed3c4ce56 100644 --- a/src/timers.h +++ b/src/timers.h @@ -41,7 +41,8 @@ enum { timer_prepare, timer_init, timer_init_grav, - timer_drift, + timer_drift_part, + timer_drift_gpart, timer_kick1, timer_kick2, timer_timestep, @@ -59,6 +60,7 @@ enum { timer_dopair_grav_pp, timer_dograv_external, timer_dograv_down, + timer_dograv_top_level, timer_dograv_long_range, timer_dosource, timer_dosub_self_density, diff --git a/src/xmf.c b/src/xmf.c index 7292606c9f013601db1e9e9b35ee843dea63f785..ca4ffe5157599dd5a45295dcfa59f9420753f5cf 100644 --- a/src/xmf.c +++ b/src/xmf.c @@ -181,6 +181,52 @@ void xmf_write_groupfooter(FILE* xmfFile, enum part_type ptype) { part_type_names[ptype]); } +/** + * @brief Returns the precision of a given dataset type + */ +int xmf_precision(enum IO_DATA_TYPE type) { + switch (type) { + case FLOAT: + return 4; + break; + case DOUBLE: + return 8; + break; + case ULONGLONG: + case LONGLONG: + return 8; + break; + case CHAR: + return 1; + break; + default: + error("Unsupported type"); + } + return 0; +} + +/** + * @brief Returns the Xdmf type name of a given dataset type + */ +const char* xmf_type(enum IO_DATA_TYPE type) { + switch (type) { + case FLOAT: + case DOUBLE: + return "Float"; + break; + case ULONGLONG: + case LONGLONG: + return "Int"; + break; + case CHAR: + return "Char"; + break; + default: + error("Unsupported type"); + } + return ""; +} + /** * @brief Writes the lines corresponding to an array of the HDF5 output * @@ -203,13 +249,15 @@ void xmf_write_line(FILE* xmfFile, const char* fileName, name, dim == 1 ? "Scalar" : "Vector"); if (dim == 1) fprintf(xmfFile, - "<DataItem Dimensions=\"%zu\" NumberType=\"Double\" " + "<DataItem Dimensions=\"%zu\" NumberType=\"%s\" " "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n", - N, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name); + N, xmf_type(type), xmf_precision(type), fileName, partTypeGroupName, + name); else fprintf(xmfFile, - "<DataItem Dimensions=\"%zu %d\" NumberType=\"Double\" " + "<DataItem Dimensions=\"%zu %d\" NumberType=\"%s\" " "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n", - N, dim, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name); + N, dim, xmf_type(type), xmf_precision(type), fileName, + partTypeGroupName, name); fprintf(xmfFile, "</Attribute>\n"); } diff --git a/tests/Makefile.am b/tests/Makefile.am index a51b8eb82a17313818ff956ca3f15a232df8df65..7c45ead22f77da7e0aa53e03051c7351cc97f550 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -22,7 +22,7 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS # List of programs and scripts to run in the test suite TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetry \ testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh \ - testParser.sh testSPHStep test125cells.sh testFFT \ + testParser.sh testSPHStep test125cells.sh test125cellsPerturbed.sh testFFT \ testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \ testMatrixInversion testThreadpool testDump testLogger \ testVoronoi1D testVoronoi2D testVoronoi3D @@ -92,6 +92,8 @@ testLogger_SOURCES = testLogger.c # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ test27cells.sh test27cellsPerturbed.sh testParser.sh \ - test125cells.sh testParserInput.yaml difffloat.py \ - tolerance_125.dat tolerance_27_normal.dat tolerance_27_perturbed.dat \ - tolerance_pair_normal.dat tolerance_pair_perturbed.dat + test125cells.sh test125cellsPerturbed.sh testParserInput.yaml difffloat.py \ + tolerance_125_normal.dat tolerance_125_perturbed.dat \ + tolerance_27_normal.dat tolerance_27_perturbed.dat \ + tolerance_pair_normal.dat tolerance_pair_perturbed.dat \ + fft_params.yml diff --git a/tests/difffloat.py b/tests/difffloat.py index e0f0864372264899c6de1bf2f83ab678b7dd9ead..0bdc706a1c44ee6c42c54ad37e93f634742e06bc 100644 --- a/tests/difffloat.py +++ b/tests/difffloat.py @@ -35,13 +35,18 @@ file1 = sys.argv[1] file2 = sys.argv[2] number_to_check = -1 -if len(sys.argv) == 5: - number_to_check = int(sys.argv[4]) - fileTol = "" if len(sys.argv) >= 4: fileTol = sys.argv[3] +if len(sys.argv) >= 5: + number_to_check = int(sys.argv[4]) + +if len(sys.argv) == 6: + ignoreSmallRhoDh = int(sys.argv[5]) +else: + ignoreSmallRhoDh = 0 + data1 = loadtxt(file1) data2 = loadtxt(file2) if fileTol != "": @@ -102,8 +107,11 @@ for i in range(n_lines_to_check): print "" error = True - if abs(data1[i,j]) < 1e-6 and + abs(data2[i,j]) < 1e-6 : continue - + if abs(data1[i,j]) < 4e-6 and abs(data2[i,j]) < 4e-6 : continue + + # Ignore pathological cases with rho_dh + if ignoreSmallRhoDh and j == 8 and abs(data1[i,j]) < 2e-4: continue + if( rel_diff > 1.1*relTol[j]): print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j) print "%10s: a = %e"%("File 1", data1[i,j]) diff --git a/tests/fft_params.yml b/tests/fft_params.yml new file mode 100644 index 0000000000000000000000000000000000000000..05d6d8f0b0578d11645fc1d78c1a6322160ae87a --- /dev/null +++ b/tests/fft_params.yml @@ -0,0 +1,10 @@ +Scheduler: + max_top_level_cells: 64 + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion) + epsilon: 0.00001 # Softening length (in internal units). + a_smooth: 0. + r_cut: 0. diff --git a/tests/test125cells.c b/tests/test125cells.c index 168b4838eab5b27f359ab927a7bae2240919e82f..e4c73b5e75df56436d277d719b3b83a179924a6f 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -1,3 +1,4 @@ + /******************************************************************************* * This file is part of SWIFT. * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk). @@ -236,11 +237,13 @@ void reset_particles(struct cell *c, struct hydro_space *hs, * separation. * @param density The density of the fluid. * @param partId The running counter of IDs. + * @param pert The perturbation to apply to the particles in the cell in units + *of the inter-particle separation. * @param vel The type of velocity field. * @param press The type of pressure field. */ struct cell *make_cell(size_t n, const double offset[3], double size, double h, - double density, long long *partId, + double density, long long *partId, double pert, enum velocity_field vel, enum pressure_field press) { const size_t count = n * n * n; @@ -263,9 +266,15 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, for (size_t x = 0; x < n; ++x) { for (size_t y = 0; y < n; ++y) { for (size_t z = 0; z < n; ++z) { - part->x[0] = offset[0] + size * (x + 0.5) / (float)n; - part->x[1] = offset[1] + size * (y + 0.5) / (float)n; - part->x[2] = offset[2] + size * (z + 0.5) / (float)n; + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; part->h = size * h / (float)n; #if defined(GIZMO_SPH) || defined(SHADOWFAX_SPH) @@ -315,7 +324,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->h_max = h; cell->count = count; cell->gcount = 0; - cell->dx_max = 0.; + cell->dx_max_part = 0.; cell->dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; @@ -324,7 +333,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_old = 8; + cell->ti_old_part = 8; cell->ti_end_min = 8; cell->ti_end_max = 8; cell->ti_sort = 0; @@ -354,8 +363,8 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, /* Write header */ fprintf(file, - "# %4s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s " - "%8s %8s %8s %8s %8s\n", + "# %4s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %13s %13s " + "%13s %13s %13s %8s %8s\n", "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "h", "rho", "div_v", "S", "u", "P", "c", "a_x", "a_y", "a_z", "h_dt", "v_sig", "dS/dt", "du/dt"); @@ -367,7 +376,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, fprintf(file, "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f " "%8.5f " - "%8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f\n", + "%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n", main_cell->parts[pid].id, main_cell->parts[pid].x[0], main_cell->parts[pid].x[1], main_cell->parts[pid].x[2], main_cell->parts[pid].v[0], main_cell->parts[pid].v[1], @@ -406,7 +415,7 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, fprintf(file, "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f " "%8.5f %8.5f " - "%8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f\n", + "%8.5f %8.5f %13f %13f %13f %13f %13f %8.5f %8.5f\n", solution[pid].id, solution[pid].x[0], solution[pid].x[1], solution[pid].x[2], solution[pid].v[0], solution[pid].v[1], solution[pid].v[2], solution[pid].h, solution[pid].rho, @@ -432,6 +441,7 @@ int main(int argc, char *argv[]) { size_t runs = 0, particles = 0; double h = 1.23485, size = 1., rho = 2.5; + double perturbation = 0.; char outputFileNameExtension[200] = ""; char outputFileName[200] = ""; enum velocity_field vel = velocity_zero; @@ -462,6 +472,9 @@ int main(int argc, char *argv[]) { case 'r': sscanf(optarg, "%zu", &runs); break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; case 'm': sscanf(optarg, "%lf", &rho); break; @@ -491,6 +504,7 @@ int main(int argc, char *argv[]) { "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" "\n-m rho - Physical density in the cell" "\n-s size - Physical size of the cell" + "\n-d pert - Perturbation to apply to the particles [0,1[" "\n-v type (0,1,2,3) - Velocity field: (zero, constant, divergent, " "rotating)" "\n-p type (0,1,2) - Pressure field: (constant, gradient divergent)" @@ -525,9 +539,9 @@ int main(int argc, char *argv[]) { /* Build the infrastructure */ struct space space; space.periodic = 1; - space.dim[0] = 3.; - space.dim[1] = 3.; - space.dim[2] = 3.; + space.dim[0] = 5.; + space.dim[1] = 5.; + space.dim[2] = 5.; hydro_space_init(&space.hs, &space); struct phys_const prog_const; @@ -535,7 +549,8 @@ int main(int argc, char *argv[]) { struct hydro_props hp; hp.target_neighbours = pow_dimension(h) * kernel_norm; - hp.delta_neighbours = 2.; + hp.delta_neighbours = 4.; + hp.h_max = FLT_MAX; hp.max_smoothing_iterations = 1; hp.CFL_condition = 0.1; @@ -565,8 +580,8 @@ int main(int argc, char *argv[]) { const double offset[3] = {i * size, j * size, k * size}; /* Construct it */ - cells[i * 25 + j * 5 + k] = - make_cell(particles, offset, size, h, rho, &partId, vel, press); + cells[i * 25 + j * 5 + k] = make_cell( + particles, offset, size, h, rho, &partId, perturbation, vel, press); /* Store the inner cells */ if (i > 0 && i < 4 && j > 0 && j < 4 && k > 0 && k < 4) { @@ -592,8 +607,13 @@ int main(int argc, char *argv[]) { const ticks tic = getticks(); /* Initialise the particles */ - for (int j = 0; j < 125; ++j) - runner_do_drift_particles(&runner, cells[j], 0); + for (int j = 0; j < 125; ++j) runner_do_drift_part(&runner, cells[j], 0); + + /* Reset particles. */ + for (int i = 0; i < 125; ++i) { + for (int n = 0; n < cells[i]->count; ++n) + hydro_init_part(&cells[i]->parts[n], &space.hs); + } /* First, sort stuff */ for (int j = 0; j < 125; ++j) runner_do_sort(&runner, cells[j], 0x1FFF, 0); @@ -670,6 +690,12 @@ int main(int argc, char *argv[]) { outputFileNameExtension); dump_particle_fields(outputFileName, main_cell, solution, 0); } + + /* Reset stuff */ + for (int i = 0; i < 125; ++i) { + for (int n = 0; n < cells[i]->count; ++n) + hydro_init_part(&cells[i]->parts[n], &space.hs); + } } /* Output timing */ diff --git a/tests/test125cells.sh.in b/tests/test125cells.sh.in index 1d3b0db75d70bf2d5047f71b183812702305df75..d6d3ddc5b6b61bbd493c94005fd500a93ae7a01d 100755 --- a/tests/test125cells.sh.in +++ b/tests/test125cells.sh.in @@ -7,15 +7,25 @@ do rm -f brute_force_125_standard.dat swift_dopair_125_standard.dat + echo "Running ./test125cells -n 6 -r 1 -v $v -p $p -f standard" ./test125cells -n 6 -r 1 -v $v -p $p -f standard if [ -e brute_force_125_standard.dat ] then - python @srcdir@/difffloat.py brute_force_125_standard.dat swift_dopair_125_standard.dat @srcdir@/tolerance_125.dat 6 + if python @srcdir@/difffloat.py brute_force_125_standard.dat swift_dopair_125_standard.dat @srcdir@/tolerance_125_normal.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi else + echo "Error Missing test output file" exit 1 fi + echo "------------" + done done diff --git a/tests/test125cellsPerturbed.sh.in b/tests/test125cellsPerturbed.sh.in new file mode 100755 index 0000000000000000000000000000000000000000..9a5cfc07c978b0cfd5aa050aa117e887a1d40907 --- /dev/null +++ b/tests/test125cellsPerturbed.sh.in @@ -0,0 +1,32 @@ +#!/bin/bash +for v in {0..3} +do + for p in {0..2} + do + echo "" + + rm -f brute_force_125_perturbed.dat swift_dopair_125_perturbed.dat + + echo "Running ./test125cells -n 6 -r 1 -d 0.1 -v $v -p $p -f perturbed" + ./test125cells -n 6 -r 1 -d 0.1 -v $v -p $p -f perturbed + + if [ -e brute_force_125_perturbed.dat ] + then + if python @srcdir@/difffloat.py brute_force_125_perturbed.dat swift_dopair_125_perturbed.dat @srcdir@/tolerance_125_perturbed.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + + done +done + +exit $? diff --git a/tests/test27cells.c b/tests/test27cells.c index bd827b68e90ea5f4e9d5577612e6cecda2edf83a..a0f541d17100a13079580aabbef065fa5adbd5e1 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -158,7 +158,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->split = 0; cell->h_max = h; cell->count = count; - cell->dx_max = 0.; + cell->dx_max_part = 0.; cell->dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; @@ -167,7 +167,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_old = 8; + cell->ti_old_part = 8; cell->ti_end_min = 8; cell->ti_end_max = 8; cell->ti_sort = 8; @@ -438,7 +438,7 @@ int main(int argc, char *argv[]) { cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho, &partId, perturbation, vel); - runner_do_drift_particles(&runner, cells[i * 9 + j * 3 + k], 0); + runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0); } diff --git a/tests/test27cells.sh.in b/tests/test27cells.sh.in index 07b6b92a82cee2bbe9c593f8f62e750d4406f84e..4312ce55e13097d4ae40c289b9c5caa885ff37cc 100755 --- a/tests/test27cells.sh.in +++ b/tests/test27cells.sh.in @@ -3,18 +3,28 @@ for v in {0..3} do echo "" - + rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat + echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4" ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4 if [ -e brute_force_27_standard.dat ] then - python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_normal.dat 6 + if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_normal.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi else + echo "Error Missing test output file" exit 1 fi + echo "------------" + done exit $? diff --git a/tests/test27cellsPerturbed.sh.in b/tests/test27cellsPerturbed.sh.in index 30498594b659101216b51dfea2346fa9230dbc97..2f2e1db76346ca8f0ea4c2365ee349e232a1ce53 100755 --- a/tests/test27cellsPerturbed.sh.in +++ b/tests/test27cellsPerturbed.sh.in @@ -6,15 +6,25 @@ do rm -f brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat + echo "Running ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4" ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4 if [ -e brute_force_27_perturbed.dat ] then - python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed.dat 6 + if python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed.dat 6 1 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi else + echo "Error Missing test output file" exit 1 fi + echo "------------" + done exit $? diff --git a/tests/testFFT.c b/tests/testFFT.c index c4aeb2885c788bd769bda49bdd15ab121dd8e9d4..4ddd030ece95bf26cbfe41f2408be7c3e0c50535 100644 --- a/tests/testFFT.c +++ b/tests/testFFT.c @@ -18,8 +18,8 @@ ******************************************************************************/ /* Some standard headers. */ -#include <stdlib.h> -#include <string.h> + +#include "../config.h" #ifndef HAVE_FFTW @@ -27,169 +27,93 @@ int main() { return 0; } #else -#include <fftw3.h> +/* Some standard headers. */ +#include <stdlib.h> +#include <string.h> /* Includes. */ #include "swift.h" -const double G = 1.; - -const size_t N = 16; -const size_t PMGRID = 8; - -// const double asmth = 2. * M_PI * const_gravity_a_smooth / boxSize; -// const double asmth2 = asmth * asmth; -// const double fact = G / (M_PI * boxSize) * (1. / (2. * boxSize / PMGRID)); - int main() { /* Initialize CPU frequency, this also starts time. */ unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); - /* Simulation properties */ - const size_t count = N * N * N; - const double boxSize = 1.; - - /* Create some particles */ - struct gpart* gparts = malloc(count * sizeof(struct gpart)); - bzero(gparts, count * sizeof(struct gpart)); - for (size_t i = 0; i < N; ++i) { - for (size_t j = 0; j < N; ++j) { - for (size_t k = 0; k < N; ++k) { - - struct gpart* gp = &gparts[i * N * N + j * N + k]; - - gp->x[0] = i * boxSize / N + boxSize / (2 * N); - gp->x[1] = j * boxSize / N + boxSize / (2 * N); - gp->x[2] = k * boxSize / N + boxSize / (2 * N); - - gp->mass = 1. / count; - - gp->id_or_neg_offset = i * N * N + j * N + k; - } - } - } - - /* Properties of the mesh */ - const size_t meshmin[3] = {0, 0, 0}; - const size_t meshmax[3] = {PMGRID - 1, PMGRID - 1, PMGRID - 1}; - - const size_t dimx = meshmax[0] - meshmin[0] + 2; - const size_t dimy = meshmax[1] - meshmin[1] + 2; - const size_t dimz = meshmax[2] - meshmin[2] + 2; - - const double fac = PMGRID / boxSize; - const size_t PMGRID2 = 2 * (PMGRID / 2 + 1); - - /* message("dimx=%zd dimy=%zd dimz=%zd", dimx, dimy, dimz); */ - - /* Allocate and empty the workspace mesh */ - const size_t workspace_size = (dimx + 4) * (dimy + 4) * (dimz + 4); - double* workspace = fftw_malloc(workspace_size * sizeof(double)); - bzero(workspace, workspace_size * sizeof(double)); - - /* Do CIC with the particles */ - for (size_t pid = 0; pid < count; ++pid) { - - const struct gpart* const gp = &gparts[pid]; - - const size_t slab_x = - (fac * gp->x[0] >= PMGRID) ? PMGRID - 1 : fac * gp->x[0]; - const size_t slab_y = - (fac * gp->x[1] >= PMGRID) ? PMGRID - 1 : fac * gp->x[1]; - const size_t slab_z = - (fac * gp->x[2] >= PMGRID) ? PMGRID - 1 : fac * gp->x[2]; - - const double dx = fac * gp->x[0] - (double)slab_x; - const double dy = fac * gp->x[1] - (double)slab_y; - const double dz = fac * gp->x[2] - (double)slab_z; - - const size_t slab_xx = slab_x + 1; - const size_t slab_yy = slab_y + 1; - const size_t slab_zz = slab_z + 1; - - workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += - gp->mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); - workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += - gp->mass * (1.0 - dx) * dy * (1.0 - dz); - workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += - gp->mass * (1.0 - dx) * (1.0 - dy) * dz; - workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += - gp->mass * (1.0 - dx) * dy * dz; - workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += - gp->mass * (dx) * (1.0 - dy) * (1.0 - dz); - workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += - gp->mass * (dx)*dy * (1.0 - dz); - workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += - gp->mass * (dx) * (1.0 - dy) * dz; - workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += - gp->mass * (dx)*dy * dz; - } - - /* for(size_t i = 0 ; i < dimx*dimy*dimz; ++i) */ - /* message("workspace[%zd] = %f", i, workspace[i]); */ - - /* Prepare the force grid */ - const size_t fft_size = workspace_size; - double* forcegrid = fftw_malloc(fft_size * sizeof(double)); - bzero(forcegrid, fft_size * sizeof(double)); - - const size_t sendmin = 0, recvmin = 0; - const size_t sendmax = PMGRID, recvmax = PMGRID; - - memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, - (sendmax - sendmin + 1) * dimy * dimz * sizeof(double)); - - /* for (size_t i = 0; i < fft_size; ++i) */ - /* if (forcegrid[i] != workspace[i]) error("wrong"); */ - - /* Prepare the density grid */ - double* rhogrid = fftw_malloc(fft_size * sizeof(double)); - bzero(rhogrid, fft_size * sizeof(double)); - - /* Now get the density */ - for (size_t slab_x = recvmin; slab_x <= recvmax; slab_x++) { - - const size_t slab_xx = slab_x % PMGRID; - - for (size_t slab_y = recvmin; slab_y <= recvmax; slab_y++) { - - const size_t slab_yy = slab_y % PMGRID; - - for (size_t slab_z = recvmin; slab_z <= recvmax; slab_z++) { - - const size_t slab_zz = slab_z % PMGRID; - - rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] += - forcegrid[((slab_x - recvmin) * dimy + (slab_y - recvmin)) * dimz + - (slab_z - recvmin)]; - } - } - } - - /* for (size_t i = 0; i < 640; i++) { */ - /* if (rhogrid[i] != workspace[i]) { */ - /* message("rhogrid[%zd]= %f workspace[%zd]= %f forcegrid[%zd]= %f", i, */ - /* rhogrid[i], i, workspace[i], i, forcegrid[i]); */ - /* } */ - /* } */ - - /* FFT of the density field */ - fftw_complex* fftgrid = fftw_malloc(fft_size * sizeof(fftw_complex)); - fftw_plan plan_forward = fftw_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid, - fftgrid, FFTW_ESTIMATE); - fftw_execute(plan_forward); - - for (size_t i = 0; i < 640; i++) { - message("workspace[%zd]= %f", i, fftgrid[i][0]); + /* Make one particle */ + int nr_gparts = 1; + struct gpart *gparts = NULL; + if (posix_memalign((void **)&gparts, 64, nr_gparts * sizeof(struct gpart)) != + 0) + error("Impossible to allocate memory for gparts."); + bzero(gparts, nr_gparts * sizeof(struct gpart)); + + gparts[0].x[0] = 0.3; + gparts[0].x[1] = 0.8; + gparts[0].x[2] = 0.2; + gparts[0].mass = 1.f; + + /* Read the parameter file */ + struct swift_params *params = malloc(sizeof(struct swift_params)); + parser_read_file("fft_params.yml", params); + + /* Initialise the gravity properties */ + struct gravity_props gravity_properties; + gravity_props_init(&gravity_properties, params); + + /* Build the infrastructure */ + struct space space; + double dim[3] = {1., 1., 1.}; + space_init(&space, params, dim, NULL, gparts, NULL, 0, nr_gparts, 0, 1, 1, 1, + 0, 0); + + struct engine engine; + engine.s = &space; + space.e = &engine; + engine.time = 0.1f; + engine.ti_current = 0; + engine.ti_old = 0; + engine.max_active_bin = num_time_bins; + engine.gravity_properties = &gravity_properties; + engine.nr_threads = 1; + + struct runner runner; + runner.e = &engine; + + /* Initialize the threadpool. */ + threadpool_init(&engine.threadpool, engine.nr_threads); + + space_rebuild(&space, 0); + + /* Run the FFT task */ + runner_do_grav_fft(&runner, 1); + + /* Now check that we got the right answer */ + int nr_cells = space.nr_cells; + double *r = malloc(nr_cells * sizeof(double)); + double *pot = malloc(nr_cells * sizeof(double)); + double *pot_exact = malloc(nr_cells * sizeof(double)); + + // FILE *file = fopen("potential.dat", "w"); + for (int i = 0; i < nr_cells; ++i) { + pot[i] = space.multipoles_top[i].pot.F_000; + double dx = + nearest(space.multipoles_top[i].CoM[0] - gparts[0].x[0], dim[0]); + double dy = + nearest(space.multipoles_top[i].CoM[1] - gparts[0].x[1], dim[1]); + double dz = + nearest(space.multipoles_top[i].CoM[2] - gparts[0].x[2], dim[2]); + r[i] = sqrt(dx * dx + dy * dy + dz * dz); + if (r[i] > 0) pot_exact[i] = -1. / r[i]; + // fprintf(file, "%e %e %e\n", r[i], pot[i], pot_exact[i]); } + // fclose(file); - /* Clean-up */ - fftw_destroy_plan(plan_forward); - fftw_free(forcegrid); - fftw_free(rhogrid); - fftw_free(workspace); + /* Clean up */ + free(r); + free(pot); + free(pot_exact); + free(params); free(gparts); return 0; } diff --git a/tests/testKernel.c b/tests/testKernel.c index 13f4e36534eb11a4c8f7ba9c19a48de6599e31f5..a2744119a527cc842cdd4711056eee7a7d7b4270 100644 --- a/tests/testKernel.c +++ b/tests/testKernel.c @@ -39,7 +39,7 @@ int main() { const float numPoints_inv = 1. / numPoints; for (int i = 0; i < numPoints; ++i) { - u[i] = i * 2.5f * numPoints_inv / h; + u[i] = i * 2.25f * numPoints_inv / h; } for (int i = 0; i < numPoints; ++i) { @@ -55,19 +55,63 @@ int main() { #ifdef WITH_VECTORIZATION + printf("\nVector Output for kernel_deval_1_vec\n"); + printf("-------------\n"); + + /* Test vectorised kernel that uses one vector. */ + for (int i = 0; i < numPoints; i += VEC_SIZE) { + + vector vx, vx_h; + vector W_vec, dW_vec; + + for (int j = 0; j < VEC_SIZE; j++) { + vx.f[j] = (i + j) * 2.25f / numPoints; + } + + vx_h.v = vx.v / vec_set1(h); + + kernel_deval_1_vec(&vx_h, &W_vec, &dW_vec); + + for (int j = 0; j < VEC_SIZE; j++) { + printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h, + h * kernel_gamma, vx.f[j], W_vec.f[j], dW_vec.f[j]); + + if (fabsf(W_vec.f[j] - W[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j], + W_vec.f[j]); + return 1; + } + if (fabsf(dW_vec.f[j] - dW[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j], + dW_vec.f[j]); + return 1; + } + } + } + + printf("\nVector Output for kernel_deval_2_vec\n"); + printf("-------------\n"); + + /* Test vectorised kernel that uses two vectors. */ for (int i = 0; i < numPoints; i += VEC_SIZE) { vector vx, vx_h; vector W_vec, dW_vec; + vector vx_2, vx_h_2; + vector W_vec_2, dW_vec_2; + for (int j = 0; j < VEC_SIZE; j++) { - vx.f[j] = (i + j) * 2.5f / numPoints; + vx.f[j] = (i + j) * 2.25f / numPoints; + vx_2.f[j] = (i + j) * 2.25f / numPoints; } vx_h.v = vx.v / vec_set1(h); + vx_h_2.v = vx_2.v / vec_set1(h); - kernel_deval_vec(&vx_h, &W_vec, &dW_vec); + kernel_deval_2_vec(&vx_h, &W_vec, &dW_vec, &vx_h_2, &W_vec_2, &dW_vec_2); + /* Check first vector results. */ for (int j = 0; j < VEC_SIZE; j++) { printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h, h * kernel_gamma, vx.f[j], W_vec.f[j], dW_vec.f[j]); @@ -83,6 +127,23 @@ int main() { return 1; } } + + /* Check second vector results. */ + for (int j = 0; j < VEC_SIZE; j++) { + printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h, + h * kernel_gamma, vx_2.f[j], W_vec_2.f[j], dW_vec_2.f[j]); + + if (fabsf(W_vec_2.f[j] - W[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j], + W_vec_2.f[j]); + return 1; + } + if (fabsf(dW_vec_2.f[j] - dW[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j], + dW_vec_2.f[j]); + return 1; + } + } } printf("\nAll values are consistent\n"); diff --git a/tests/testPair.c b/tests/testPair.c index c2533b63b902e3bdc7e7cae6fcbcf50c87dee4af..92987d2fdb625fec6e186a280837f145787f599b 100644 --- a/tests/testPair.c +++ b/tests/testPair.c @@ -84,7 +84,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->split = 0; cell->h_max = h; cell->count = count; - cell->dx_max = 0.; + cell->dx_max_part = 0.; + cell->dx_max_sort = 0.; cell->width[0] = n; cell->width[1] = n; cell->width[2] = n; @@ -92,7 +93,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[1] = offset[1]; cell->loc[2] = offset[2]; - cell->ti_old = 8; + cell->ti_old_part = 8; cell->ti_end_min = 8; cell->ti_end_max = 8; diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index 0c7ae1d0d8855371b8f8f9fbf51c7c63b3221aaa..014dacd1eb62040b03e6038b2c23183a24ec4850 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -71,7 +71,8 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { cell->h_max = h; cell->count = count; cell->gcount = 0; - cell->dx_max = 0.; + cell->dx_max_part = 0.; + cell->dx_max_sort = 0.; cell->width[0] = cellSize; cell->width[1] = cellSize; cell->width[2] = cellSize; diff --git a/tests/tolerance_125.dat b/tests/tolerance_125_normal.dat similarity index 100% rename from tests/tolerance_125.dat rename to tests/tolerance_125_normal.dat diff --git a/tests/tolerance_125_perturbed.dat b/tests/tolerance_125_perturbed.dat new file mode 100644 index 0000000000000000000000000000000000000000..04e642b28cb3729cb81f8183c3e69595ac651876 --- /dev/null +++ b/tests/tolerance_125_perturbed.dat @@ -0,0 +1,3 @@ +# ID pos_x pos_y pos_z v_x v_y v_z h rho div_v S u P c a_x a_y a_z h_dt v_sig dS/dt du/dt + 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 + 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 5e-3 5e-3 5e-3 1e-4 1e-4 1e-4 1e-4 diff --git a/tests/tolerance_27_normal.dat b/tests/tolerance_27_normal.dat index 9c7ca10414507746b41e453d75426a072f989d2e..31ee002bb9c73ff8d74cce545aff715476b33507 100644 --- a/tests/tolerance_27_normal.dat +++ b/tests/tolerance_27_normal.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 4e-5 2e-4 2e-3 8e-6 6e-6 6e-6 6e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 4e-5 2e-4 2e-3 1e-5 6e-6 6e-6 6e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-4 1e-4 1e-4 2e-4 1e-4 1e-4 1e-4 diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat index 53de4ec7632039a56a3757488881e890296e3ac8..9c6ee8c77cc6d53e67db9dbb86be197d49149b10 100644 --- a/tests/tolerance_27_perturbed.dat +++ b/tests/tolerance_27_perturbed.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 5e-5 2e-3 3.1e-6 3e-6 3e-6 3e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-2 1e-5 1e-4 2e-5 2e-3 2e-3 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 5e-5 2e-3 4e-6 3e-6 3e-6 3e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-5 1e-4 4e-5 2e-3 2e-3 2e-3