diff --git a/configure.ac b/configure.ac index 9f50ba4563cad932314a9f5e7e0bd8cf13b88b7c..ab9bdfad58711eef1dabcc27c8371ccc38bdc7c0 100644 --- a/configure.ac +++ b/configure.ac @@ -407,6 +407,24 @@ if test "$enable_memuse_reports" = "yes"; then AC_DEFINE([SWIFT_MEMUSE_REPORTS],1,[Enable memory usage reports]) fi +# The system memory report depends on the presence of /proc/self/statm, i.e. +# a linux OS, check for that. +if test -f "/proc/self/statm"; then + AC_DEFINE([SWIFT_MEMUSE_STATM],1,[Have /proc/self/statm capability]) +fi + +# Check if we want to make the dumper thread active. +AC_ARG_ENABLE([dumper], + [AS_HELP_STRING([--enable-dumper], + [Dump active tasks and memory use (if configured)@<:@yes/no@:>@] + )], + [enable_dumper="$enableval"], + [enable_dumper="no"] +) +if test "$enable_dumper" = "yes"; then + AC_DEFINE([SWIFT_DUMPER_THREAD],1,[Enable dumper thread]) +fi + # See if we want mpi reporting. AC_ARG_ENABLE([mpiuse-reports], [AS_HELP_STRING([--enable-mpiuse-reports], @@ -1208,6 +1226,17 @@ fi AC_SUBST([VELOCIRAPTOR_LIBS]) AM_CONDITIONAL([HAVEVELOCIRAPTOR],[test -n "$VELOCIRAPTOR_LIBS"]) +# Now that we found VELOCIraptor, let's check how it was compiled. +if test "$have_velociraptor" == "yes"; then + AC_CHECK_LIB( + [velociraptor], + [VR_NOMASS], + [AC_DEFINE([HAVE_VELOCIRAPTOR_WITH_NOMASS],1,[The VELOCIraptor library has been compiled with the NOMASS option. Only useful if running a uniform box.])], + [AC_MSG_RESULT(VELOCIraptor not compiled to so as to *not* store masses per particle.)], + [$VELOCIRAPTOR_LIBS $HDF5_LDFLAGS $HDF5_LIBS $GSL_LIBS] + ) +fi + # Check for dummy VELOCIraptor. AC_ARG_ENABLE([dummy-velociraptor], [AS_HELP_STRING([--enable-dummy-velociraptor], @@ -1899,7 +1928,7 @@ esac # Feedback model AC_ARG_WITH([feedback], [AS_HELP_STRING([--with-feedback=<model>], - [Feedback model to use @<:@none, EAGLE, debug default: none@:>@] + [Feedback model to use @<:@none, EAGLE, GEAR default: none@:>@] )], [with_feedback="$withval"], [with_feedback="none"] @@ -1932,7 +1961,7 @@ esac # Black hole model. AC_ARG_WITH([black-holes], [AS_HELP_STRING([--with-black-holes=<model>], - [Black holes model to use @<:@none, default: none@:>@] + [Black holes model to use @<:@none, EAGLE default: none@:>@] )], [with_black_holes="$withval"], [with_black_holes="none"] @@ -1961,14 +1990,14 @@ esac # Task order AC_ARG_WITH([task-order], [AS_HELP_STRING([--with-task-order=<model>], -[Task order to use @<:@none, EAGLE, GEAR default: none@:>@] +[Task order to use @<:@default, EAGLE, GEAR default: default@:>@] )], [with_task_order="$withval"], -[with_task_order="none"] +[with_task_order="default"] ) if test "$with_subgrid" != "none"; then -if test "$with_task_order" != "none"; then +if test "$with_task_order" != "default"; then AC_MSG_ERROR([Cannot provide with-subgrid and with-task-order together]) else with_task_order="$with_subgrid_task_order" @@ -1979,8 +2008,8 @@ case "$with_task_order" in EAGLE) AC_DEFINE([TASK_ORDER_EAGLE], [1], [EAGLE task order]) ;; -none) -AC_DEFINE([TASK_ORDER_NONE], [1], [Default task order]) +default) +AC_DEFINE([TASK_ORDER_NONE], [1], [Default (i.e. EAGLE/OWLS) task order]) ;; GEAR) AC_DEFINE([TASK_ORDER_GEAR], [1], [GEAR task order]) diff --git a/doc/RTD/source/EquationOfState/index.rst b/doc/RTD/source/EquationOfState/index.rst index 11e6069130a84ba54dec01e9892464574d9c2c6b..c4468a837efaddebc6a44f8e4e72c8fe056dfae7 100644 --- a/doc/RTD/source/EquationOfState/index.rst +++ b/doc/RTD/source/EquationOfState/index.rst @@ -7,18 +7,24 @@ Equations of State ================== -Currently (if the documentation was well updated), we have two different gas -equations of state (EoS) implemented: ideal and isothermal; as well as a variety -of EoS for "planetary" materials. -The EoS describe the relations between our main thermodynamical variables: -the internal energy (\\(u\\)), the density (\\(\\rho\\)), the entropy (\\(A\\)) -and the pressure (\\(P\\)). +Currently, SWIFT offers two different gas equations of state (EoS) +implemented: ``ideal`` and ``isothermal``; as well as a variety of EoS for +"planetary" materials. The EoS describe the relations between our +main thermodynamical variables: the internal energy per unit mass +(\\(u\\)), the mass density (\\(\\rho\\)), the entropy (\\(A\\)) and +the pressure (\\(P\\)). Gas EoS ------- -In the following section, the variables not yet defined are: \\(\\gamma\\) for -the adiabatic index and \\( c_s \\) for the speed of sound. +We write the adiabatic index as \\(\\gamma \\) and \\( c_s \\) denotes +the speed of sound. The adiabatic index can be changed at configure +time by choosing one of the allowed values of the option +``--with-adiabatic-index``. The default value is \\(\\gamma = 5/3 \\). + +The tables below give the expression for the thermodynamic quantities +on each row entry as a function of the gas density and the +thermodynamical quantity given in the header of each column. .. csv-table:: Ideal Gas :header: "Variable", "A", "u", "P" @@ -38,7 +44,11 @@ the adiabatic index and \\( c_s \\) for the speed of sound. "P", "", "\\(\\left( \\gamma - 1\\right) u \\rho \\)", "" "\\( c_s\\)", "", "\\(\\sqrt{ u \\gamma \\left( \\gamma - 1 \\right) } \\)", "" - +Note that when running with an isothermal equation of state, the value +of the tracked thermodynamic variable (e.g. the entropy in a +density-entropy scheme or the internal enegy in a density-energy SPH +formulation) written to the snapshots is meaningless. The pressure, +however, is always correct in all scheme. Planetary EoS ------------- diff --git a/doc/RTD/source/ParameterFiles/parameter_description.rst b/doc/RTD/source/ParameterFiles/parameter_description.rst index 726631b7e0de1e2ed134fbba456f0a7f3288ea38..4abe0bf913e7721ba6d2436f47bd0f56941db55f 100644 --- a/doc/RTD/source/ParameterFiles/parameter_description.rst +++ b/doc/RTD/source/ParameterFiles/parameter_description.rst @@ -986,26 +986,34 @@ should contain. The type of partitioning attempted is controlled by the:: DomainDecomposition: initial_type: -parameter. Which can have the values *memory*, *region*, *grid* or +parameter. Which can have the values *memory*, *edgememory*, *region*, *grid* or *vectorized*: + * *edgememory* + + This is the default if METIS or ParMETIS is available. It performs a + partition based on the memory use of all the particles in each cell. + The total memory per cell is used to weight the cell vertex and all the + associated edges. This attempts to equalize the memory used by all the + ranks but with some consideration given to the need to not cut dense + regions (by also minimizing the edge cut). How successful this + attempt is depends on the granularity of cells and particles and the + number of ranks, clearly if most of the particles are in one cell, or a + small region of the volume, balance is impossible or difficult. Having + more top-level cells makes it easier to calculate a good distribution + (but this comes at the cost of greater overheads). * *memory* - This is the default if METIS or ParMETIS is available. It performs a - partition based on the memory use of all the particles in each cell, - attempting to equalize the memory used by all the ranks. - How successful this attempt is depends on the granularity of cells and particles - and the number of ranks, clearly if most of the particles are in one cell, - or a small region of the volume, balance is impossible or - difficult. Having more top-level cells makes it easier to calculate a - good distribution (but this comes at the cost of greater overheads). + This is like *edgememory*, but doesn't include any edge weights, it should + balance the particle memory use per rank more exactly (but note effects + like the numbers of cells per rank will also have an effect, as that + changes the need for foreign cells). * *region* The one other METIS/ParMETIS option is "region". This attempts to assign equal - numbers of cells to each rank, with the surface area of the regions minimised - (so we get blobs, rather than rectangular volumes of cells). + numbers of cells to each rank, with the surface area of the regions minimised. If ParMETIS and METIS are not available two other options are possible, but will give a poorer partition: diff --git a/examples/GravityTests/DiscPatch/HydroStatic/plotSolution.py b/examples/GravityTests/DiscPatch/HydroStatic/plotSolution.py index 681f7d8ab3f2320b5de75e688edcb92efef9d883..b00b6176554f2d5d2aa5e87cceba90c02f2a7e54 100644 --- a/examples/GravityTests/DiscPatch/HydroStatic/plotSolution.py +++ b/examples/GravityTests/DiscPatch/HydroStatic/plotSolution.py @@ -62,11 +62,9 @@ def get_analytic_pressure(x): def get_data(name): file = h5py.File(name, "r") coords = np.array(file["/PartType0/Coordinates"]) - rho = np.array(file["/PartType0/Density"]) - u = np.array(file["/PartType0/InternalEnergy"]) + rho = np.array(file["/PartType0/Densities"]) v = np.array(file["/PartType0/Velocities"]) - - P = (gamma - 1.) * rho * u + P = np.array(file["/PartType0/Pressures"]) vtot = np.sqrt( v[:,0]**2 + v[:,1]**2 + v[:,2]**2 ) @@ -79,7 +77,7 @@ for f in sorted(glob.glob("Disc-Patch_*.hdf5")): if num < start or num > stop: continue - print "processing", f, "..." + print("processing", f, "...") xrange = np.linspace(0., 2. * x_disc, 1000) time, x, rho, P, v = get_data(f) diff --git a/examples/HydroTests/GreshoVortex_2D/plotSolution.py b/examples/HydroTests/GreshoVortex_2D/plotSolution.py index 2d4697b6ffaac0639da67ee90d824c75791ea573..fd63e22ba5c995b4ec3ab9b50b9b6f69750a08b0 100644 --- a/examples/HydroTests/GreshoVortex_2D/plotSolution.py +++ b/examples/HydroTests/GreshoVortex_2D/plotSolution.py @@ -1,61 +1,41 @@ ############################################################################### - # This file is part of SWIFT. - # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) - # - # This program is free software: you can redistribute it and/or modify - # it under the terms of the GNU Lesser General Public License as published - # by the Free Software Foundation, either version 3 of the License, or - # (at your option) any later version. - # - # This program is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - # GNU General Public License for more details. - # - # You should have received a copy of the GNU Lesser General Public License - # along with this program. If not, see <http://www.gnu.org/licenses/>. - # - ############################################################################## +# This file is part of SWIFT. +# Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################## # Computes the analytical solution of the Gresho-Chan vortex and plots the SPH answer # Parameters -gas_gamma = 5./3. # Gas adiabatic index -rho0 = 1 # Gas density -P0 = 0. # Constant additional pressure (should have no impact on the dynamics) +gas_gamma = 5.0 / 3.0 # Gas adiabatic index +rho0 = 1 # Gas density +P0 = 0.0 # Constant additional pressure (should have no impact on the dynamics) # --------------------------------------------------------------- # Don't touch anything after this. # --------------------------------------------------------------- import matplotlib + matplotlib.use("Agg") from pylab import * from scipy import stats import h5py -# Plot parameters -params = {'axes.labelsize': 10, -'axes.titlesize': 10, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 10, -'ytick.labelsize': 10, -'text.usetex': True, - 'figure.figsize' : (9.90,6.45), -'figure.subplot.left' : 0.045, -'figure.subplot.right' : 0.99, -'figure.subplot.bottom' : 0.05, -'figure.subplot.top' : 0.99, -'figure.subplot.wspace' : 0.15, -'figure.subplot.hspace' : 0.12, -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -rcParams.update(params) -rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - +style.use("../../../tools/stylesheets/mnras.mplstyle") snap = int(sys.argv[1]) @@ -69,21 +49,27 @@ solution_v_r = zeros(N) for i in range(N): if solution_r[i] < 0.2: - solution_P[i] = P0 + 5. + 12.5*solution_r[i]**2 - solution_v_phi[i] = 5.*solution_r[i] + solution_P[i] = P0 + 5.0 + 12.5 * solution_r[i] ** 2 + solution_v_phi[i] = 5.0 * solution_r[i] elif solution_r[i] < 0.4: - solution_P[i] = P0 + 9. + 12.5*solution_r[i]**2 - 20.*solution_r[i] + 4.*log(solution_r[i]/0.2) - solution_v_phi[i] = 2. -5.*solution_r[i] + solution_P[i] = ( + P0 + + 9.0 + + 12.5 * solution_r[i] ** 2 + - 20.0 * solution_r[i] + + 4.0 * log(solution_r[i] / 0.2) + ) + solution_v_phi[i] = 2.0 - 5.0 * solution_r[i] else: - solution_P[i] = P0 + 3. + 4.*log(2.) - solution_v_phi[i] = 0. + solution_P[i] = P0 + 3.0 + 4.0 * log(2.0) + solution_v_phi[i] = 0.0 solution_rho = ones(N) * rho0 -solution_s = solution_P / solution_rho**gas_gamma -solution_u = solution_P /((gas_gamma - 1.)*solution_rho) +solution_s = solution_P / solution_rho ** gas_gamma +solution_u = solution_P / ((gas_gamma - 1.0) * solution_rho) # Read the simulation data -sim = h5py.File("gresho_%04d.hdf5"%snap, "r") +sim = h5py.File("gresho_%04d.hdf5" % snap, "r") boxSize = sim["/Header"].attrs["BoxSize"][0] time = sim["/Header"].attrs["Time"][0] scheme = sim["/HydroScheme"].attrs["Scheme"] @@ -92,133 +78,153 @@ neighbours = sim["/HydroScheme"].attrs["Kernel target N_ngb"] eta = sim["/HydroScheme"].attrs["Kernel eta"] git = sim["Code"].attrs["Git Revision"] -pos = sim["/PartType0/Coordinates"][:,:] -x = pos[:,0] - boxSize / 2 -y = pos[:,1] - boxSize / 2 -vel = sim["/PartType0/Velocities"][:,:] -r = sqrt(x**2 + y**2) -v_r = (x * vel[:,0] + y * vel[:,1]) / r -v_phi = (-y * vel[:,0] + x * vel[:,1]) / r -v_norm = sqrt(vel[:,0]**2 + vel[:,1]**2) +pos = sim["/PartType0/Coordinates"][:, :] +x = pos[:, 0] - boxSize / 2 +y = pos[:, 1] - boxSize / 2 +vel = sim["/PartType0/Velocities"][:, :] +r = sqrt(x ** 2 + y ** 2) +v_r = (x * vel[:, 0] + y * vel[:, 1]) / r +v_phi = (-y * vel[:, 0] + x * vel[:, 1]) / r +v_norm = sqrt(vel[:, 0] ** 2 + vel[:, 1] ** 2) rho = sim["/PartType0/Densities"][:] u = sim["/PartType0/InternalEnergies"][:] S = sim["/PartType0/Entropies"][:] P = sim["/PartType0/Pressures"][:] # Bin te data -r_bin_edge = np.arange(0., 1., 0.02) -r_bin = 0.5*(r_bin_edge[1:] + r_bin_edge[:-1]) -rho_bin,_,_ = stats.binned_statistic(r, rho, statistic='mean', bins=r_bin_edge) -v_bin,_,_ = stats.binned_statistic(r, v_phi, statistic='mean', bins=r_bin_edge) -P_bin,_,_ = stats.binned_statistic(r, P, statistic='mean', bins=r_bin_edge) -S_bin,_,_ = stats.binned_statistic(r, S, statistic='mean', bins=r_bin_edge) -u_bin,_,_ = stats.binned_statistic(r, u, statistic='mean', bins=r_bin_edge) -rho2_bin,_,_ = stats.binned_statistic(r, rho**2, statistic='mean', bins=r_bin_edge) -v2_bin,_,_ = stats.binned_statistic(r, v_phi**2, statistic='mean', bins=r_bin_edge) -P2_bin,_,_ = stats.binned_statistic(r, P**2, statistic='mean', bins=r_bin_edge) -S2_bin,_,_ = stats.binned_statistic(r, S**2, statistic='mean', bins=r_bin_edge) -u2_bin,_,_ = stats.binned_statistic(r, u**2, statistic='mean', bins=r_bin_edge) -rho_sigma_bin = np.sqrt(rho2_bin - rho_bin**2) -v_sigma_bin = np.sqrt(v2_bin - v_bin**2) -P_sigma_bin = np.sqrt(P2_bin - P_bin**2) -S_sigma_bin = np.sqrt(S2_bin - S_bin**2) -u_sigma_bin = np.sqrt(u2_bin - u_bin**2) +r_bin_edge = np.arange(0.0, 1.0, 0.02) +r_bin = 0.5 * (r_bin_edge[1:] + r_bin_edge[:-1]) +rho_bin, _, _ = stats.binned_statistic(r, rho, statistic="mean", bins=r_bin_edge) +v_bin, _, _ = stats.binned_statistic(r, v_phi, statistic="mean", bins=r_bin_edge) +P_bin, _, _ = stats.binned_statistic(r, P, statistic="mean", bins=r_bin_edge) +S_bin, _, _ = stats.binned_statistic(r, S, statistic="mean", bins=r_bin_edge) +u_bin, _, _ = stats.binned_statistic(r, u, statistic="mean", bins=r_bin_edge) +rho2_bin, _, _ = stats.binned_statistic(r, rho ** 2, statistic="mean", bins=r_bin_edge) +v2_bin, _, _ = stats.binned_statistic(r, v_phi ** 2, statistic="mean", bins=r_bin_edge) +P2_bin, _, _ = stats.binned_statistic(r, P ** 2, statistic="mean", bins=r_bin_edge) +S2_bin, _, _ = stats.binned_statistic(r, S ** 2, statistic="mean", bins=r_bin_edge) +u2_bin, _, _ = stats.binned_statistic(r, u ** 2, statistic="mean", bins=r_bin_edge) +rho_sigma_bin = np.sqrt(rho2_bin - rho_bin ** 2) +v_sigma_bin = np.sqrt(v2_bin - v_bin ** 2) +P_sigma_bin = np.sqrt(P2_bin - P_bin ** 2) +S_sigma_bin = np.sqrt(S2_bin - S_bin ** 2) +u_sigma_bin = np.sqrt(u2_bin - u_bin ** 2) # Plot the interesting quantities -figure() +figure(figsize=(7, 7 / 1.6)) + +line_color = "C4" +binned_color = "C2" +binned_marker_size = 4 +scatter_props = dict( + marker=".", + ms=1, + markeredgecolor="none", + alpha=0.5, + zorder=-1, + rasterized=True, + linestyle="none", +) + +errorbar_props = dict(color=binned_color, ms=binned_marker_size, fmt=".", lw=1.2) # Azimuthal velocity profile ----------------------------- subplot(231) -plot(r, v_phi, '.', color='r', ms=0.5) -plot(solution_r, solution_v_phi, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, v_bin, yerr=v_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Azimuthal~velocity}}~v_\\phi$", labelpad=0) -xlim(0,R_max) +plot(r, v_phi, **scatter_props) +plot(solution_r, solution_v_phi, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, v_bin, yerr=v_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("Radius $r$") +ylabel("Azimuthal velocity $v_\\phi$") +xlim(0, R_max) ylim(-0.1, 1.2) # Radial density profile -------------------------------- subplot(232) -plot(r, rho, '.', color='r', ms=0.5) -plot(solution_r, solution_rho, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, rho_bin, yerr=rho_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Density}}~\\rho$", labelpad=0) -xlim(0,R_max) -ylim(rho0-0.3, rho0 + 0.3) -#yticks([-0.2, -0.1, 0., 0.1, 0.2]) +plot(r, rho, **scatter_props) +plot(solution_r, solution_rho, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, rho_bin, yerr=rho_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("Radius $r$") +ylabel("Density $\\rho$") +xlim(0, R_max) +ylim(rho0 - 0.3, rho0 + 0.3) +# yticks([-0.2, -0.1, 0., 0.1, 0.2]) # Radial pressure profile -------------------------------- subplot(233) -plot(r, P, '.', color='r', ms=0.5) -plot(solution_r, solution_P, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, P_bin, yerr=P_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Pressure}}~P$", labelpad=0) +plot(r, P, **scatter_props) +plot(solution_r, solution_P, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, P_bin, yerr=P_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("Radius $r$") +ylabel("Pressure $P$") xlim(0, R_max) ylim(4.9 + P0, P0 + 6.1) # Internal energy profile -------------------------------- subplot(234) -plot(r, u, '.', color='r', ms=0.5) -plot(solution_r, solution_u, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, u_bin, yerr=u_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Internal~Energy}}~u$", labelpad=0) -xlim(0,R_max) +plot(r, u, **scatter_props) +plot(solution_r, solution_u, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, u_bin, yerr=u_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("$Radius $r$") +ylabel("Internal Energy $u$") +xlim(0, R_max) ylim(7.3, 9.1) # Radial entropy profile -------------------------------- subplot(235) -plot(r, S, '.', color='r', ms=0.5) -plot(solution_r, solution_s, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Entropy}}~S$", labelpad=0) +plot(r, S, **scatter_props) +plot(solution_r, solution_s, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, S_bin, yerr=S_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("Radius $r$") +ylabel("Entropy $S$") xlim(0, R_max) ylim(4.9 + P0, P0 + 6.1) -# Image -------------------------------------------------- -#subplot(234) -#scatter(pos[:,0], pos[:,1], c=v_norm, cmap="PuBu", edgecolors='face', s=4, vmin=0, vmax=1) -#text(0.95, 0.95, "$|v|$", ha="right", va="top") -#xlim(0,1) -#ylim(0,1) -#xlabel("$x$", labelpad=0) -#ylabel("$y$", labelpad=0) - # Information ------------------------------------- subplot(236, frameon=False) -text(-0.49, 0.9, "Gresho-Chan vortex with $\\gamma=%.3f$ at $t=%.2f$"%(gas_gamma,time), fontsize=10) -text(-0.49, 0.8, "Background $\\rho_0=%.3f$"%rho0, fontsize=10) -text(-0.49, 0.7, "Background $P_0=%.3f$"%P0, fontsize=10) -plot([-0.49, 0.1], [0.62, 0.62], 'k-', lw=1) -text(-0.49, 0.5, "$\\textsc{Swift}$ %s"%git, fontsize=10) -text(-0.49, 0.4, scheme, fontsize=10) -text(-0.49, 0.3, kernel, fontsize=10) -text(-0.49, 0.2, "$%.2f$ neighbours ($\\eta=%.3f$)"%(neighbours, eta), fontsize=10) +text_fontsize = 5 + +text( + -0.49, + 0.9, + "Gresho-Chan vortex (2D) with $\\gamma=%.3f$ at $t=%.2f$" % (gas_gamma, time), + fontsize=text_fontsize, +) +text(-0.49, 0.8, "Background $\\rho_0=%.3f$" % rho0, fontsize=text_fontsize) +text(-0.49, 0.7, "Background $P_0=%.3f$" % P0, fontsize=text_fontsize) +plot([-0.49, 0.1], [0.62, 0.62], "k-", lw=1) +text(-0.49, 0.5, "SWIFT %s" % git.decode("utf-8"), fontsize=text_fontsize) +text(-0.49, 0.4, scheme.decode("utf-8"), fontsize=text_fontsize) +text(-0.49, 0.3, kernel.decode("utf-8"), fontsize=text_fontsize) +text( + -0.49, + 0.2, + "$%.2f$ neighbours ($\\eta=%.3f$)" % (neighbours, eta), + fontsize=text_fontsize, +) xlim(-0.5, 0.5) ylim(0, 1) xticks([]) yticks([]) -savefig("GreshoVortex.png", dpi=200) +tight_layout() + +savefig("GreshoVortex.png") diff --git a/examples/HydroTests/GreshoVortex_3D/plotSolution.py b/examples/HydroTests/GreshoVortex_3D/plotSolution.py index 20beab7514759c764f5ca7c379183506b764a819..d3be5a404f7011d3dea24c992ef4ca93b4c4988c 100644 --- a/examples/HydroTests/GreshoVortex_3D/plotSolution.py +++ b/examples/HydroTests/GreshoVortex_3D/plotSolution.py @@ -22,43 +22,23 @@ # answer # Parameters -gas_gamma = 5./3. # Gas adiabatic index -rho0 = 1 # Gas density -P0 = 0. # Constant additional pressure (should have no impact on the - # dynamics) +gas_gamma = 5.0 / 3.0 # Gas adiabatic index +rho0 = 1 # Gas density +P0 = 0.0 # Constant additional pressure (should have no impact on the +# dynamics) # --------------------------------------------------------------- # Don't touch anything after this. # --------------------------------------------------------------- import matplotlib + matplotlib.use("Agg") from pylab import * from scipy import stats import h5py -# Plot parameters -params = {'axes.labelsize': 10, -'axes.titlesize': 10, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 10, -'ytick.labelsize': 10, -'text.usetex': True, - 'figure.figsize' : (9.90,6.45), -'figure.subplot.left' : 0.045, -'figure.subplot.right' : 0.99, -'figure.subplot.bottom' : 0.05, -'figure.subplot.top' : 0.99, -'figure.subplot.wspace' : 0.15, -'figure.subplot.hspace' : 0.12, -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -rcParams.update(params) -rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - +style.use("../../../tools/stylesheets/mnras.mplstyle") snap = int(sys.argv[1]) @@ -72,21 +52,27 @@ solution_v_r = zeros(N) for i in range(N): if solution_r[i] < 0.2: - solution_P[i] = P0 + 5. + 12.5*solution_r[i]**2 - solution_v_phi[i] = 5.*solution_r[i] + solution_P[i] = P0 + 5.0 + 12.5 * solution_r[i] ** 2 + solution_v_phi[i] = 5.0 * solution_r[i] elif solution_r[i] < 0.4: - solution_P[i] = P0 + 9. + 12.5*solution_r[i]**2 - 20.*solution_r[i] + 4.*log(solution_r[i]/0.2) - solution_v_phi[i] = 2. -5.*solution_r[i] + solution_P[i] = ( + P0 + + 9.0 + + 12.5 * solution_r[i] ** 2 + - 20.0 * solution_r[i] + + 4.0 * log(solution_r[i] / 0.2) + ) + solution_v_phi[i] = 2.0 - 5.0 * solution_r[i] else: - solution_P[i] = P0 + 3. + 4.*log(2.) - solution_v_phi[i] = 0. + solution_P[i] = P0 + 3.0 + 4.0 * log(2.0) + solution_v_phi[i] = 0.0 solution_rho = ones(N) * rho0 -solution_s = solution_P / solution_rho**gas_gamma -solution_u = solution_P /((gas_gamma - 1.)*solution_rho) +solution_s = solution_P / solution_rho ** gas_gamma +solution_u = solution_P / ((gas_gamma - 1.0) * solution_rho) # Read the simulation data -sim = h5py.File("gresho_%04d.hdf5"%snap, "r") +sim = h5py.File("gresho_%04d.hdf5" % snap, "r") boxSize = sim["/Header"].attrs["BoxSize"][0] time = sim["/Header"].attrs["Time"][0] scheme = sim["/HydroScheme"].attrs["Scheme"] @@ -95,14 +81,14 @@ neighbours = sim["/HydroScheme"].attrs["Kernel target N_ngb"] eta = sim["/HydroScheme"].attrs["Kernel eta"] git = sim["Code"].attrs["Git Revision"] -pos = sim["/PartType0/Coordinates"][:,:] -x = pos[:,0] - boxSize / 2 -y = pos[:,1] - boxSize / 2 -vel = sim["/PartType0/Velocities"][:,:] -r = sqrt(x**2 + y**2) -v_r = (x * vel[:,0] + y * vel[:,1]) / r -v_phi = (-y * vel[:,0] + x * vel[:,1]) / r -v_norm = sqrt(vel[:,0]**2 + vel[:,1]**2) +pos = sim["/PartType0/Coordinates"][:, :] +x = pos[:, 0] - boxSize / 2 +y = pos[:, 1] - boxSize / 2 +vel = sim["/PartType0/Velocities"][:, :] +r = sqrt(x ** 2 + y ** 2) +v_r = (x * vel[:, 0] + y * vel[:, 1]) / r +v_phi = (-y * vel[:, 0] + x * vel[:, 1]) / r +v_norm = sqrt(vel[:, 0] ** 2 + vel[:, 1] ** 2) rho = sim["/PartType0/Densities"][:] u = sim["/PartType0/InternalEnergies"][:] S = sim["/PartType0/Entropies"][:] @@ -121,144 +107,183 @@ except: plot_viscosity = False # Bin te data -r_bin_edge = np.arange(0., 1., 0.02) -r_bin = 0.5*(r_bin_edge[1:] + r_bin_edge[:-1]) -rho_bin,_,_ = stats.binned_statistic(r, rho, statistic='mean', bins=r_bin_edge) -v_bin,_,_ = stats.binned_statistic(r, v_phi, statistic='mean', bins=r_bin_edge) -P_bin,_,_ = stats.binned_statistic(r, P, statistic='mean', bins=r_bin_edge) -S_bin,_,_ = stats.binned_statistic(r, S, statistic='mean', bins=r_bin_edge) -u_bin,_,_ = stats.binned_statistic(r, u, statistic='mean', bins=r_bin_edge) -rho2_bin,_,_ = stats.binned_statistic(r, rho**2, statistic='mean', bins=r_bin_edge) -v2_bin,_,_ = stats.binned_statistic(r, v_phi**2, statistic='mean', bins=r_bin_edge) -P2_bin,_,_ = stats.binned_statistic(r, P**2, statistic='mean', bins=r_bin_edge) -S2_bin,_,_ = stats.binned_statistic(r, S**2, statistic='mean', bins=r_bin_edge) -u2_bin,_,_ = stats.binned_statistic(r, u**2, statistic='mean', bins=r_bin_edge) -rho_sigma_bin = np.sqrt(rho2_bin - rho_bin**2) -v_sigma_bin = np.sqrt(v2_bin - v_bin**2) -P_sigma_bin = np.sqrt(P2_bin - P_bin**2) -S_sigma_bin = np.sqrt(S2_bin - S_bin**2) -u_sigma_bin = np.sqrt(u2_bin - u_bin**2) +r_bin_edge = np.arange(0.0, 1.0, 0.02) +r_bin = 0.5 * (r_bin_edge[1:] + r_bin_edge[:-1]) +rho_bin, _, _ = stats.binned_statistic(r, rho, statistic="mean", bins=r_bin_edge) +v_bin, _, _ = stats.binned_statistic(r, v_phi, statistic="mean", bins=r_bin_edge) +P_bin, _, _ = stats.binned_statistic(r, P, statistic="mean", bins=r_bin_edge) +S_bin, _, _ = stats.binned_statistic(r, S, statistic="mean", bins=r_bin_edge) +u_bin, _, _ = stats.binned_statistic(r, u, statistic="mean", bins=r_bin_edge) +rho2_bin, _, _ = stats.binned_statistic(r, rho ** 2, statistic="mean", bins=r_bin_edge) +v2_bin, _, _ = stats.binned_statistic(r, v_phi ** 2, statistic="mean", bins=r_bin_edge) +P2_bin, _, _ = stats.binned_statistic(r, P ** 2, statistic="mean", bins=r_bin_edge) +S2_bin, _, _ = stats.binned_statistic(r, S ** 2, statistic="mean", bins=r_bin_edge) +u2_bin, _, _ = stats.binned_statistic(r, u ** 2, statistic="mean", bins=r_bin_edge) +rho_sigma_bin = np.sqrt(rho2_bin - rho_bin ** 2) +v_sigma_bin = np.sqrt(v2_bin - v_bin ** 2) +P_sigma_bin = np.sqrt(P2_bin - P_bin ** 2) +S_sigma_bin = np.sqrt(S2_bin - S_bin ** 2) +u_sigma_bin = np.sqrt(u2_bin - u_bin ** 2) if plot_diffusion: - alpha_diff_bin,_,_ = stats.binned_statistic(r, diffusion, statistic='mean', bins=r_bin_edge) - alpha2_diff_bin,_,_ = stats.binned_statistic(r, diffusion**2, statistic='mean', bins=r_bin_edge) - alpha_diff_sigma_bin = np.sqrt(alpha2_diff_bin - alpha_diff_bin**2) + alpha_diff_bin, _, _ = stats.binned_statistic( + r, diffusion, statistic="mean", bins=r_bin_edge + ) + alpha2_diff_bin, _, _ = stats.binned_statistic( + r, diffusion ** 2, statistic="mean", bins=r_bin_edge + ) + alpha_diff_sigma_bin = np.sqrt(alpha2_diff_bin - alpha_diff_bin ** 2) if plot_viscosity: - alpha_visc_bin,_,_ = stats.binned_statistic(r, viscosity, statistic='mean', bins=r_bin_edge) - alpha2_visc_bin,_,_ = stats.binned_statistic(r, viscosity**2, statistic='mean', bins=r_bin_edge) - alpha_visc_sigma_bin = np.sqrt(alpha2_visc_bin - alpha_visc_bin**2) + alpha_visc_bin, _, _ = stats.binned_statistic( + r, viscosity, statistic="mean", bins=r_bin_edge + ) + alpha2_visc_bin, _, _ = stats.binned_statistic( + r, viscosity ** 2, statistic="mean", bins=r_bin_edge + ) + alpha_visc_sigma_bin = np.sqrt(alpha2_visc_bin - alpha_visc_bin ** 2) # Plot the interesting quantities -figure() +figure(figsize=(7, 7 / 1.6)) + +line_color = "C4" +binned_color = "C2" +binned_marker_size = 4 + +scatter_props = dict( + marker=".", + ms=1, + markeredgecolor="none", + alpha=0.1, + zorder=-1, + rasterized=True, + linestyle="none", +) + +errorbar_props = dict(color=binned_color, ms=binned_marker_size, fmt=".", lw=1.2) # Azimuthal velocity profile ----------------------------- subplot(231) -plot(r, v_phi, '.', color='r', ms=0.5) -plot(solution_r, solution_v_phi, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, v_bin, yerr=v_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Azimuthal~velocity}}~v_\\phi$", labelpad=0) -xlim(0,R_max) +plot(r, v_phi, **scatter_props) +plot(solution_r, solution_v_phi, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, v_bin, yerr=v_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("Radius $r$") +ylabel("Azimuthal velocity $v_\\phi$") +xlim(0, R_max) ylim(-0.1, 1.2) # Radial density profile -------------------------------- subplot(232) -plot(r, rho, '.', color='r', ms=0.5) -plot(solution_r, solution_rho, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, rho_bin, yerr=rho_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Density}}~\\rho$", labelpad=0) -xlim(0,R_max) -ylim(rho0-0.3, rho0 + 0.3) -#yticks([-0.2, -0.1, 0., 0.1, 0.2]) +plot(r, rho, **scatter_props) +plot(solution_r, solution_rho, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, rho_bin, yerr=rho_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("Radius $r$") +ylabel("Density $\\rho$") +xlim(0, R_max) +ylim(rho0 - 0.3, rho0 + 0.3) +# yticks([-0.2, -0.1, 0., 0.1, 0.2]) # Radial pressure profile -------------------------------- subplot(233) -plot(r, P, '.', color='r', ms=0.5) -plot(solution_r, solution_P, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, P_bin, yerr=P_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Pressure}}~P$", labelpad=0) +plot(r, P, **scatter_props) +plot(solution_r, solution_P, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, P_bin, yerr=P_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("Radius $r$") +ylabel("Pressure $P$") xlim(0, R_max) ylim(4.9 + P0, P0 + 6.1) # Internal energy profile -------------------------------- subplot(234) -plot(r, u, '.', color='r', ms=0.5) -plot(solution_r, solution_u, '--', color='k', alpha=0.8, lw=1.2) -errorbar(r_bin, u_bin, yerr=u_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) -plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) -xlabel("${\\rm{Radius}}~r$", labelpad=0) -ylabel("${\\rm{Internal~Energy}}~u$", labelpad=0) -xlim(0,R_max) +plot(r, u, **scatter_props) +plot(solution_r, solution_u, "--", color=line_color, alpha=0.8, lw=1.2) +errorbar(r_bin, u_bin, yerr=u_sigma_bin, **errorbar_props) +plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) +xlabel("Radius $r$") +ylabel("Internal Energy $u$") +xlim(0, R_max) ylim(7.3, 9.1) # Radial entropy profile -------------------------------- subplot(235) -xlabel("${\\rm{Radius}}~r$", labelpad=0) - +xlabel("Radius $r$") xlim(0, R_max) -xlabel("${\\rm{Radius}}~r$", labelpad=0) - if plot_diffusion or plot_viscosity: if plot_diffusion: - plot(r, diffusion, ".", color='r', ms=0.5, alpha=0.2) - errorbar(r_bin, alpha_diff_bin, yerr=alpha_diff_sigma_bin, fmt=".", ms=8.0, color='b', lw=1.2, label="Diffusion") + plot(r, diffusion, **scatter_props) + errorbar( + r_bin, + alpha_diff_bin, + yerr=alpha_diff_sigma_bin, + **errorbar_props, + label="Diffusion", + ) if plot_viscosity: - plot(r, viscosity, ".", color='g', ms=0.5, alpha=0.2) - errorbar(r_bin, alpha_visc_bin, yerr=alpha_visc_sigma_bin, fmt=".", ms=8.0, color='y', lw=1.2, label="Viscosity") - - ylabel("${\\rm{Rate~Coefficient}}~\\alpha$", labelpad=0) + plot(r, viscosity, **{**scatter_props, "color": "C3"}) + errorbar( + r_bin, + alpha_visc_bin, + yerr=alpha_visc_sigma_bin, + **{**errorbar_props, "color": "C4"}, + label="Viscosity", + ) + + ylabel("Rate Coefficient $\\alpha$") legend() else: - plot(r, S, '.', color='r', ms=0.5) - plot(solution_r, solution_s, '--', color='k', alpha=0.8, lw=1.2) - errorbar(r_bin, S_bin, yerr=S_sigma_bin, fmt='.', ms=8.0, color='b', lw=1.2) - plot([0.2, 0.2], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) - plot([0.4, 0.4], [-100, 100], ':', color='k', alpha=0.4, lw=1.2) - ylabel("${\\rm{Entropy}}~S$", labelpad=0) + plot(r, S, **scatter_props) + plot(solution_r, solution_s, "--", color=line_color, alpha=0.8, lw=1.2) + errorbar(r_bin, S_bin, yerr=S_sigma_bin, **errorbar_props) + plot([0.2, 0.2], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) + plot([0.4, 0.4], [-100, 100], ":", color=line_color, alpha=0.4, lw=1.2) + ylabel("Entropy $S$") ylim(4.9 + P0, P0 + 6.1) -# Image -------------------------------------------------- -#subplot(234) -#scatter(pos[:,0], pos[:,1], c=v_norm, cmap="PuBu", edgecolors='face', s=4, vmin=0, vmax=1) -#text(0.95, 0.95, "$|v|$", ha="right", va="top") -#xlim(0,1) -#ylim(0,1) -#xlabel("$x$", labelpad=0) -#ylabel("$y$", labelpad=0) # Information ------------------------------------- subplot(236, frameon=False) -text(-0.49, 0.9, "Gresho-Chan vortex with $\\gamma=%.3f$ at $t=%.2f$"%(gas_gamma,time), fontsize=10) -text(-0.49, 0.8, "Background $\\rho_0=%.3f$"%rho0, fontsize=10) -text(-0.49, 0.7, "Background $P_0=%.3f$"%P0, fontsize=10) -plot([-0.49, 0.1], [0.62, 0.62], 'k-', lw=1) -text(-0.49, 0.5, "$\\textsc{Swift}$ %s"%git, fontsize=10) -text(-0.49, 0.4, scheme, fontsize=10) -text(-0.49, 0.3, kernel, fontsize=10) -text(-0.49, 0.2, "$%.2f$ neighbours ($\\eta=%.3f$)"%(neighbours, eta), fontsize=10) +text_fontsize = 5 + +text( + -0.49, + 0.9, + "Gresho-Chan vortex (3D) with $\\gamma=%.3f$ at $t=%.2f$" % (gas_gamma, time), + fontsize=text_fontsize, +) +text(-0.49, 0.8, "Background $\\rho_0=%.3f$" % rho0, fontsize=text_fontsize) +text(-0.49, 0.7, "Background $P_0=%.3f$" % P0, fontsize=text_fontsize) +plot([-0.49, 0.1], [0.62, 0.62], "k-", lw=1) +text(-0.49, 0.5, "SWIFT %s" % git.decode("utf-8"), fontsize=text_fontsize) +text(-0.49, 0.4, scheme.decode("utf-8"), fontsize=text_fontsize) +text(-0.49, 0.3, kernel.decode("utf-8"), fontsize=text_fontsize) +text( + -0.49, + 0.2, + "$%.2f$ neighbours ($\\eta=%.3f$)" % (neighbours, eta), + fontsize=text_fontsize, +) xlim(-0.5, 0.5) ylim(0, 1) xticks([]) yticks([]) -savefig("GreshoVortex.png", dpi=200) +tight_layout() + +savefig("GreshoVortex.png") diff --git a/examples/main.c b/examples/main.c index 9a61f0bae1271f7a34d770a4b6868c2721e9db93..95784341ab3ad363b90c5a33bed9ffc0885499ce 100644 --- a/examples/main.c +++ b/examples/main.c @@ -1244,13 +1244,15 @@ int main(int argc, char *argv[]) { error("Failed to generate restart filename"); /* dump the parameters as used. */ + if (!restart && myrank == 0) { - /* used parameters */ - parser_write_params_to_file(params, "used_parameters.yml", 1); - /* unused parameters */ - parser_write_params_to_file(params, "unused_parameters.yml", 0); + /* used parameters */ + parser_write_params_to_file(params, "used_parameters.yml", /*used=*/1); + /* unused parameters */ + parser_write_params_to_file(params, "unused_parameters.yml", /*used=*/0); + } - /* Dump memory use report if collected for the 0 step. */ + /* Dump memory use report if collected for the 0 step. */ #ifdef SWIFT_MEMUSE_REPORTS { char dumpfile[40]; @@ -1296,7 +1298,7 @@ int main(int argc, char *argv[]) { } /* Did we exceed the maximal runtime? */ - if (clocks_get_hours_since_start() > restart_max_hours_runtime) { + if (e.runtime > restart_max_hours_runtime) { force_stop = 1; message("Runtime limit reached, dumping restart files..."); if (resubmit_after_max_hours) resubmit = 1; diff --git a/examples/main_fof.c b/examples/main_fof.c index 4f12cba5a792cc538c9c096cdd481b92d2613610..280d2d3998569a574b536105afe4a9c4696c2cb1 100644 --- a/examples/main_fof.c +++ b/examples/main_fof.c @@ -673,9 +673,9 @@ int main(int argc, char *argv[]) { #endif // SWIFT_DEBUG_THREADPOOL /* used parameters */ - parser_write_params_to_file(params, "fof_used_parameters.yml", 1); + parser_write_params_to_file(params, "fof_used_parameters.yml", /*used=*/1); /* unused parameters */ - parser_write_params_to_file(params, "fof_unused_parameters.yml", 0); + parser_write_params_to_file(params, "fof_unused_parameters.yml", /*used=*/0); /* Dump memory use report */ #ifdef SWIFT_MEMUSE_REPORTS diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 4d43ae30f5bcd0df4970168591f03f97c092b21e..cabc94d2bba38b462b1974c01f416c40df10ef9f 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -292,6 +292,7 @@ SineWavePotential: timestep_limit: 1. # Time-step dimensionless pre-factor. growth_time: 0. # (Optional) Time for the potential to grow to its final size. + # Parameters related to entropy floors ---------------------------------------------- EAGLEEntropyFloor: @@ -303,12 +304,14 @@ EAGLEEntropyFloor: Cool_over_density_threshold: 10. # Overdensity above which the EAGLE Cool limiter entropy floor can kick in. Cool_temperature_norm_K: 8000 # Temperature of the EAGLE Cool limiter entropy floor at the density threshold expressed in Kelvin. Cool_gamma_effective: 1. # Slope the of the EAGLE Cool limiter entropy floor - + + # Parameters related to pressure floors ---------------------------------------------- GEARPressureFloor: jeans_factor: 10. # Number of particles required to suppose a resolved clump and avoid the pressure floor. + # Parameters related to cooling function ---------------------------------------------- # Constant du/dt cooling function @@ -326,6 +329,7 @@ LambdaCooling: EAGLECooling: dir_name: ./coolingtables/ # Location of the Wiersma+08 cooling tables H_reion_z: 8.5 # Redshift of Hydrogen re-ionization + H_reion_eV_p_H: 2.0 # Energy inject by Hydrogen re-ionization in electron-volt per Hydrogen atom He_reion_z_centre: 3.5 # Redshift of the centre of the Helium re-ionization Gaussian He_reion_z_sigma: 0.5 # Spread in redshift of the Helium re-ionization Gaussian He_reion_eV_p_H: 2.0 # Energy inject by Helium re-ionization in electron-volt per Hydrogen atom @@ -336,16 +340,17 @@ EAGLECooling: # Cooling with Grackle 3.0 GrackleCooling: cloudy_table: CloudyData_UVB=HM2012.h5 # Name of the Cloudy Table (available on the grackle bitbucket repository) - with_UV_background: 1 # Enable or not the UV background - redshift: 0 # Redshift to use (-1 means time based redshift) - with_metal_cooling: 1 # Enable or not the metal cooling - provide_volumetric_heating_rates: 0 # (optional) User provide volumetric heating rates - provide_specific_heating_rates: 0 # (optional) User provide specific heating rates + with_UV_background: 1 # Enable or not the UV background + redshift: 0 # Redshift to use (-1 means time based redshift) + with_metal_cooling: 1 # Enable or not the metal cooling + provide_volumetric_heating_rates: 0 # (optional) User provide volumetric heating rates + provide_specific_heating_rates: 0 # (optional) User provide specific heating rates max_steps: 10000 # (optional) Max number of step when computing the initial composition convergence_limit: 1e-2 # (optional) Convergence threshold (relative) for initial composition thermal_time_myr: 5 # (optional) Time (in Myr) for adiabatic cooling after a feedback event. - self_shielding_method: -1 # (optional) Grackle (1->3 for Grackle's ones, 0 for none and -1 for GEAR) - self_shielding_threshold_atom_per_cm3: 0.007 # Required only with GEAR's self shielding. Density threshold of the self shielding + self_shielding_method: -1 # (optional) Grackle (1->3 for Grackle's ones, 0 for none and -1 for GEAR) + self_shielding_threshold_atom_per_cm3: 0.007 # Required only with GEAR's self shielding. Density threshold of the self shielding + # Parameters related to chemistry models ----------------------------------------------- @@ -365,16 +370,11 @@ EAGLEChemistry: # GEAR chemistry model (Revaz and Jablonka 2018) GEARChemistry: initial_metallicity: 1 # Initial metallicity of the gas (mass fraction) - scale_initial_metallicity: 1 # Should we scale the initial metallicity with the solar one? + scale_initial_metallicity: 1 # Should we scale the initial metallicity with the solar one? # Parameters related to star formation models ----------------------------------------------- -# GEAR star formation model (Revaz and Jablonka 2018) -GEARStarFormation: - star_formation_efficiency: 0.01 # star formation efficiency (c_*) - maximal_temperature: 3e4 # Upper limit to the temperature of a star forming particle - # EAGLE star formation model (Schaye and Dalla Vecchia 2008) EAGLEStarFormation: EOS_density_norm_H_p_cm3: 0.1 # Physical density used for the normalisation of the EOS assumed for the star-forming gas in Hydrogen atoms per cm^3. @@ -392,7 +392,13 @@ EAGLEStarFormation: threshold_Z0: 0.002 # Reference metallicity (metal mass fraction) for the metal-dependant threshold for star formation. threshold_slope: -0.64 # Slope of the metal-dependant star formation threshold threshold_max_density_H_p_cm3: 10.0 # Maximal density of the metal-dependant density threshold for star formation in Hydrogen atoms per cm^3. - + +# GEAR star formation model (Revaz and Jablonka 2018) +GEARStarFormation: + star_formation_efficiency: 0.01 # star formation efficiency (c_*) + maximal_temperature: 3e4 # Upper limit to the temperature of a star forming particle + + # Parameters related to feedback models ----------------------------------------------- # EAGLE feedback model @@ -438,9 +444,10 @@ EAGLEFeedback: # GEAR feedback model GEARFeedback: - supernovae_energy_erg: 0.1e51 # Energy released by a single supernovae. + supernovae_energy_erg: 0.1e51 # Energy released by a single supernovae. yields_table: chemistry-AGB+OMgSFeZnSrYBaEu-16072013.h5 # Table containing the yields. - discrete_yields: 0 # Should we use discrete yields or the IMF integrated one? + discrete_yields: 0 # Should we use discrete yields or the IMF integrated one? + # Parameters related to AGN models ----------------------------------------------- diff --git a/src/Makefile.am b/src/Makefile.am index 7144496b68a9e6789d0eaf0fb0dd19ff014e6d4b..7678220bbf0a05fab3af60248245493c3be9886b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -77,7 +77,7 @@ if HAVEGRACKLECOOLING GRACKLE_COOLING_SOURCES += cooling/grackle/cooling.c endif -# source files for GRACKLE cooling +# source files for GEAR feedback GEAR_FEEDBACK_SOURCES = if HAVEGEARFEEDBACK GEAR_FEEDBACK_SOURCES += feedback/GEAR/stellar_evolution.c feedback/GEAR/feedback.c \ @@ -102,8 +102,8 @@ AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_limiter.c chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ outputlist.c velociraptor_dummy.c logger_io.c memuse.c mpiuse.c memuse_rnodes.c fof.c \ hashmap.c pressure_floor.c \ - $(EAGLE_COOLING_SOURCES) $(EAGLE_FEEDBACK_SOURCES) $(GRACKLE_COOLING_SOURCES) \ - $(GEAR_FEEDBACK_SOURCES) + $(EAGLE_COOLING_SOURCES) $(EAGLE_FEEDBACK_SOURCES) \ + $(GRACKLE_COOLING_SOURCES) $(GEAR_FEEDBACK_SOURCES) # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/black_holes/Default/black_holes_io.h b/src/black_holes/Default/black_holes_io.h index b2c3374aec6650a5cf29dc80a7485067de38fefb..6ca174b7fef2545af0f3191bba3037ed2b31805a 100644 --- a/src/black_holes/Default/black_holes_io.h +++ b/src/black_holes/Default/black_holes_io.h @@ -126,8 +126,9 @@ INLINE static void black_holes_write_particles(const struct bpart* bparts, list[2] = io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, 0.f, bparts, mass, "Masses of the particles"); - list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, - 0.f, bparts, id, "Unique ID of the particles"); + list[3] = + io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, 0.f, + bparts, id, "Unique ID of the particles"); list[4] = io_make_output_field( "SmoothingLengths", FLOAT, 1, UNIT_CONV_LENGTH, 1.f, bparts, h, diff --git a/src/black_holes/EAGLE/black_holes_io.h b/src/black_holes/EAGLE/black_holes_io.h index 7461d16958436e9b529779797f7b3a0b88c0bfaf..690c76d38a573ba2bdb3834d573a523a324772d8 100644 --- a/src/black_holes/EAGLE/black_holes_io.h +++ b/src/black_holes/EAGLE/black_holes_io.h @@ -130,8 +130,9 @@ INLINE static void black_holes_write_particles(const struct bpart* bparts, io_make_output_field("DynamicalMasses", FLOAT, 1, UNIT_CONV_MASS, 0.f, bparts, mass, "Dynamical masses of the particles"); - list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, - 0.f, bparts, id, "Unique ID of the particles"); + list[3] = + io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, 0.f, + bparts, id, "Unique ID of the particles"); list[4] = io_make_output_field( "SmoothingLengths", FLOAT, 1, UNIT_CONV_LENGTH, 1.f, bparts, h, diff --git a/src/black_holes_struct.h b/src/black_holes_struct.h index 5535caf23eb9d6eaed3774faa96e75f6f9f73625..fdc70ebb7e73dab16bcf86b099a8ea3b61f51e6a 100644 --- a/src/black_holes_struct.h +++ b/src/black_holes_struct.h @@ -27,6 +27,8 @@ /* Config parameters. */ #include "../config.h" +#include "inline.h" + /* Import the right black holes definition */ #if defined(BLACK_HOLES_NONE) #include "./black_holes/Default/black_holes_struct.h" diff --git a/src/cell.c b/src/cell.c index b6d998ee39bfb053d5c6667e20d83c4be382534a..d5b0499d0005ca389e98a9ef44461e9a2c0ee126 100644 --- a/src/cell.c +++ b/src/cell.c @@ -409,6 +409,37 @@ int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts) { #endif } +/** + * @brief Recursively nullify all the particle pointers in a cell hierarchy. + * + * Should only be used on foreign cells! + * + * This will make any task or action running on these cells likely crash. + * Recreating the foreign links will be necessary. + * + * @param c The #cell to act on. + */ +void cell_unlink_foreign_particles(struct cell *c) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Unlinking foreign particles in a local cell!"); +#endif + + c->grav.parts = NULL; + c->hydro.parts = NULL; + c->stars.parts = NULL; + c->black_holes.parts = NULL; + + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + cell_unlink_foreign_particles(c->progeny[k]); + } + } + } +} + /** * @brief Recursively count the number of #part in foreign cells that * are in cells with hydro-related tasks. diff --git a/src/cell.h b/src/cell.h index df6949643b35455e55125474acfc9dfc5c13b3fc..b4242bf1f5812135f87d37d30f5ac7c3a405def1 100644 --- a/src/cell.h +++ b/src/cell.h @@ -94,8 +94,11 @@ struct pcell { /*! Hydro variables */ struct { + /*! Number of #part in this cell. */ + int count; + /*! Maximal smoothing length. */ - double h_max; + float h_max; /*! Minimal integer end-of-timestep in this cell for hydro tasks */ integertime_t ti_end_min; @@ -109,9 +112,6 @@ struct pcell { /*! Integer time of the last drift of the #part in this cell */ integertime_t ti_old_part; - /*! Number of #part in this cell. */ - int count; - } hydro; /*! Gravity variables */ @@ -159,7 +159,7 @@ struct pcell { int count; /*! Maximal smoothing length. */ - double h_max; + float h_max; /*! Minimal integer end-of-timestep in this cell for stars tasks */ integertime_t ti_end_min; @@ -179,7 +179,7 @@ struct pcell { int count; /*! Maximal smoothing length. */ - double h_max; + float h_max; /*! Minimal integer end-of-timestep in this cell for black hole tasks */ integertime_t ti_end_min; @@ -386,9 +386,6 @@ struct cell { /*! Task for sorting the stars again after a SF event */ struct task *stars_resort; - /*! Max smoothing length in this cell. */ - double h_max; - /*! Last (integer) time the cell's part were drifted forward in time. */ integertime_t ti_old_part; @@ -405,6 +402,9 @@ struct cell { /*! Spin lock for various uses (#part case). */ swift_lock_type lock; + /*! Max smoothing length in this cell. */ + float h_max; + /*! Maximum part movement in this cell since last construction. */ float dx_max_part; @@ -569,9 +569,6 @@ struct cell { /*! Implicit tasks marking the exit of the stellar physics block of tasks */ struct task *stars_out; - /*! Max smoothing length in this cell. */ - double h_max; - /*! Last (integer) time the cell's spart were drifted forward in time. */ integertime_t ti_old_part; @@ -587,6 +584,9 @@ struct cell { /*! Nr of #spart this cell can hold after addition of new #spart. */ int count_total; + /*! Max smoothing length in this cell. */ + float h_max; + /*! Values of h_max before the drifts, used for sub-cell tasks. */ float h_max_old; @@ -678,9 +678,6 @@ struct cell { /*! Linked list of the tasks computing this cell's BH feedback. */ struct link *feedback; - /*! Max smoothing length in this cell. */ - double h_max; - /*! Last (integer) time the cell's bpart were drifted forward in time. */ integertime_t ti_old_part; @@ -693,6 +690,9 @@ struct cell { /*! Nr of #bpart this cell can hold after addition of new #bpart. */ int count_total; + /*! Max smoothing length in this cell. */ + float h_max; + /*! Values of h_max before the drifts, used for sub-cell tasks. */ float h_max_old; @@ -860,6 +860,7 @@ int cell_link_sparts(struct cell *c, struct spart *sparts); int cell_link_bparts(struct cell *c, struct bpart *bparts); int cell_link_foreign_parts(struct cell *c, struct part *parts); int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts); +void cell_unlink_foreign_particles(struct cell *c); int cell_count_parts_for_tasks(const struct cell *c); int cell_count_gparts_for_tasks(const struct cell *c); void cell_clean_links(struct cell *c, void *data); diff --git a/src/clocks.c b/src/clocks.c index 16af01938d8f4e6cb21490af3288fd64e1a93876..0ac2d97e18658195f9140857f8ee6fef4bd1fe1d 100644 --- a/src/clocks.c +++ b/src/clocks.c @@ -276,22 +276,24 @@ double clocks_get_hours_since_start(void) { } /** - * @brief return the cpu time used. + * @brief return the cpu times used. * - * Uses the times(2) function to access the user cpu times and returns the sum - * of these for the process tree, i.e. current process plus "waited-for" - * children. This may be pthread implementation specific as to what that - * exactly means. Note we do not include the system time as that includes - * spin times and we don't want to give credit for that. + * Uses the times(2) function to access the user and system cpu times and + * returns the sum of these for the process tree, i.e. current process plus + * "waited-for" children. This may be pthread implementation specific as to + * what that exactly means. * - * @result cpu time used in sysconf(_SC_CLK_TCK) ticks, usually 100/s not our - * usual ticks. + * Note cpu times are reported in sysconf(_SC_CLK_TCK) ticks, usually 100/s + * not our usual ticks. + * + * @param usertime the user time. + * @param systime the system time. */ -double clocks_get_cputime_used(void) { - +void clocks_get_cputimes_used(double *usertime, double *systime) { struct tms tmstic; times(&tmstic); - return (double)(tmstic.tms_utime + tmstic.tms_cutime); + *usertime = (tmstic.tms_utime + tmstic.tms_cutime); + *systime = (tmstic.tms_stime + tmstic.tms_cstime); } /** diff --git a/src/clocks.h b/src/clocks.h index d306268674fc85c722e71a6bf8c0095341ba4e1a..c9c23807587e1ad4b519651911e1c37c9c1a1d6b 100644 --- a/src/clocks.h +++ b/src/clocks.h @@ -52,7 +52,7 @@ double clocks_diff_ticks(ticks tic, ticks toc); const char *clocks_get_timesincestart(void); double clocks_get_hours_since_start(void); -double clocks_get_cputime_used(void); +void clocks_get_cputimes_used(double *usertime, double *systime); int clocks_random_seed(void); #endif /* SWIFT_CLOCKS_H */ diff --git a/src/collectgroup.c b/src/collectgroup.c index 6b096416a3f334617a4567e51b4be04c92432eef..6aa120a5708df486229773c8ff9dde4f24180ccf 100644 --- a/src/collectgroup.c +++ b/src/collectgroup.c @@ -56,6 +56,7 @@ struct mpicollectgroup1 { long long total_nr_tasks; float tasks_per_cell_max; struct star_formation_history sfh; + float runtime; }; /* Forward declarations. */ @@ -125,6 +126,8 @@ void collectgroup1_apply(const struct collectgroup1 *grp1, struct engine *e) { e->tasks_per_cell_max = grp1->tasks_per_cell_max; star_formation_logger_add_to_accumulator(&e->sfh, &grp1->sfh); + + e->runtime = grp1->runtime; } /** @@ -174,6 +177,7 @@ void collectgroup1_apply(const struct collectgroup1 *grp1, struct engine *e) { * @param total_nr_tasks total number of tasks on rank. * @param tasks_per_cell the used number of tasks per cell. * @param sfh The star formation history logger + * @param runtime The runtime of rank in hours. */ void collectgroup1_init( struct collectgroup1 *grp1, size_t updated, size_t g_updated, @@ -186,7 +190,7 @@ void collectgroup1_init( integertime_t ti_black_holes_end_min, integertime_t ti_black_holes_end_max, integertime_t ti_black_holes_beg_max, int forcerebuild, long long total_nr_cells, long long total_nr_tasks, float tasks_per_cell, - const struct star_formation_history sfh) { + const struct star_formation_history sfh, float runtime) { grp1->updated = updated; grp1->g_updated = g_updated; @@ -213,6 +217,7 @@ void collectgroup1_init( grp1->total_nr_tasks = total_nr_tasks; grp1->tasks_per_cell_max = tasks_per_cell; grp1->sfh = sfh; + grp1->runtime = runtime; } /** @@ -254,6 +259,7 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { mpigrp11.total_nr_tasks = grp1->total_nr_tasks; mpigrp11.tasks_per_cell_max = grp1->tasks_per_cell_max; mpigrp11.sfh = grp1->sfh; + mpigrp11.runtime = grp1->runtime; struct mpicollectgroup1 mpigrp12; if (MPI_Allreduce(&mpigrp11, &mpigrp12, 1, mpicollectgroup1_type, @@ -286,6 +292,7 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { grp1->total_nr_tasks = mpigrp12.total_nr_tasks; grp1->tasks_per_cell_max = mpigrp12.tasks_per_cell_max; grp1->sfh = mpigrp12.sfh; + grp1->runtime = mpigrp12.runtime; #endif } @@ -357,6 +364,9 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, /* Star formation history */ star_formation_logger_add(&mpigrp11->sfh, &mpigrp12->sfh); + + /* Use the maximum runtime as the global runtime. */ + mpigrp11->runtime = max(mpigrp11->runtime, mpigrp12->runtime); } /** diff --git a/src/collectgroup.h b/src/collectgroup.h index 9e973142953beec552e8c76fdbb789dc6325587a..fcb49fe0596b23125525c944fa9bfa80418f5b80 100644 --- a/src/collectgroup.h +++ b/src/collectgroup.h @@ -60,6 +60,9 @@ struct collectgroup1 { /* Maximum value of actual tasks per cell across all ranks. */ float tasks_per_cell_max; + + /* Global runtime of application in hours. */ + float runtime; }; void collectgroup_init(void); @@ -75,7 +78,7 @@ void collectgroup1_init( integertime_t ti_black_holes_end_min, integertime_t ti_black_holes_end_max, integertime_t ti_black_holes_beg_max, int forcerebuild, long long total_nr_cells, long long total_nr_tasks, float tasks_per_cell, - const struct star_formation_history sfh); + const struct star_formation_history sfh, float runtime); void collectgroup1_reduce(struct collectgroup1 *grp1); #ifdef WITH_MPI void mpicollect_free_MPI_type(void); diff --git a/src/common_io.c b/src/common_io.c index 460fda26699a3f4f45944f18c1fe65b2e9274dfa..458fe8229321f9fbd40b1b2c5a5bc70ebfe3417f 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -1580,7 +1580,7 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, io_copy_mapper, temp_c, - N, copySize, 0, (void*)&props); + N, copySize, threadpool_auto_chunk_size, (void*)&props); } else { /* Converting particle to data */ @@ -1593,8 +1593,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_part_f_mapper, temp_f, N, copySize, 0, - (void*)&props); + io_convert_part_f_mapper, temp_f, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_part_i != NULL) { @@ -1605,8 +1605,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_part_i_mapper, temp_i, N, copySize, 0, - (void*)&props); + io_convert_part_i_mapper, temp_i, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_part_d != NULL) { @@ -1617,8 +1617,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_part_d_mapper, temp_d, N, copySize, 0, - (void*)&props); + io_convert_part_d_mapper, temp_d, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_part_l != NULL) { @@ -1629,8 +1629,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_part_l_mapper, temp_l, N, copySize, 0, - (void*)&props); + io_convert_part_l_mapper, temp_l, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_gpart_f != NULL) { @@ -1641,8 +1641,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_gpart_f_mapper, temp_f, N, copySize, 0, - (void*)&props); + io_convert_gpart_f_mapper, temp_f, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_gpart_i != NULL) { @@ -1653,8 +1653,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_gpart_i_mapper, temp_i, N, copySize, 0, - (void*)&props); + io_convert_gpart_i_mapper, temp_i, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_gpart_d != NULL) { @@ -1665,8 +1665,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_gpart_d_mapper, temp_d, N, copySize, 0, - (void*)&props); + io_convert_gpart_d_mapper, temp_d, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_gpart_l != NULL) { @@ -1677,8 +1677,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_gpart_l_mapper, temp_l, N, copySize, 0, - (void*)&props); + io_convert_gpart_l_mapper, temp_l, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_spart_f != NULL) { @@ -1689,8 +1689,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_spart_f_mapper, temp_f, N, copySize, 0, - (void*)&props); + io_convert_spart_f_mapper, temp_f, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_spart_i != NULL) { @@ -1701,8 +1701,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_spart_i_mapper, temp_i, N, copySize, 0, - (void*)&props); + io_convert_spart_i_mapper, temp_i, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_spart_d != NULL) { @@ -1713,8 +1713,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_spart_d_mapper, temp_d, N, copySize, 0, - (void*)&props); + io_convert_spart_d_mapper, temp_d, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_spart_l != NULL) { @@ -1725,8 +1725,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_spart_l_mapper, temp_l, N, copySize, 0, - (void*)&props); + io_convert_spart_l_mapper, temp_l, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_bpart_f != NULL) { @@ -1737,8 +1737,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_bpart_f_mapper, temp_f, N, copySize, 0, - (void*)&props); + io_convert_bpart_f_mapper, temp_f, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_bpart_i != NULL) { @@ -1749,8 +1749,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_bpart_i_mapper, temp_i, N, copySize, 0, - (void*)&props); + io_convert_bpart_i_mapper, temp_i, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_bpart_d != NULL) { @@ -1761,8 +1761,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_bpart_d_mapper, temp_d, N, copySize, 0, - (void*)&props); + io_convert_bpart_d_mapper, temp_d, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else if (props.convert_bpart_l != NULL) { @@ -1773,8 +1773,8 @@ void io_copy_temp_buffer(void* temp, const struct engine* e, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, - io_convert_bpart_l_mapper, temp_l, N, copySize, 0, - (void*)&props); + io_convert_bpart_l_mapper, temp_l, N, copySize, + threadpool_auto_chunk_size, (void*)&props); } else { error("Missing conversion function"); @@ -1832,7 +1832,7 @@ void io_prepare_dm_gparts(struct threadpool* tp, struct gpart* const gparts, size_t Ndm) { threadpool_map(tp, io_prepare_dm_gparts_mapper, gparts, Ndm, - sizeof(struct gpart), 0, NULL); + sizeof(struct gpart), threadpool_auto_chunk_size, NULL); } void io_prepare_dm_background_gparts_mapper(void* restrict data, int Ndm, @@ -1868,7 +1868,7 @@ void io_prepare_dm_background_gparts(struct threadpool* tp, struct gpart* const gparts, size_t Ndm) { threadpool_map(tp, io_prepare_dm_background_gparts_mapper, gparts, Ndm, - sizeof(struct gpart), 0, NULL); + sizeof(struct gpart), threadpool_auto_chunk_size, NULL); } size_t io_count_dm_background_gparts(const struct gpart* const gparts, @@ -1950,7 +1950,7 @@ void io_duplicate_hydro_gparts(struct threadpool* tp, struct part* const parts, data.Ndm = Ndm; threadpool_map(tp, io_duplicate_hydro_gparts_mapper, parts, Ngas, - sizeof(struct part), 0, &data); + sizeof(struct part), threadpool_auto_chunk_size, &data); } void io_duplicate_stars_gparts_mapper(void* restrict data, int Nstars, @@ -2008,7 +2008,7 @@ void io_duplicate_stars_gparts(struct threadpool* tp, data.Ndm = Ndm; threadpool_map(tp, io_duplicate_stars_gparts_mapper, sparts, Nstars, - sizeof(struct spart), 0, &data); + sizeof(struct spart), threadpool_auto_chunk_size, &data); } void io_duplicate_black_holes_gparts_mapper(void* restrict data, @@ -2068,7 +2068,8 @@ void io_duplicate_black_holes_gparts(struct threadpool* tp, data.Ndm = Ndm; threadpool_map(tp, io_duplicate_black_holes_gparts_mapper, bparts, - Nblackholes, sizeof(struct bpart), 0, &data); + Nblackholes, sizeof(struct bpart), threadpool_auto_chunk_size, + &data); } /** diff --git a/src/cooling/Compton/cooling.h b/src/cooling/Compton/cooling.h index a594cdca6916c361b737763e0360ed32d459ba99..78318560220781fedf700dfab07b4930e536a6e8 100644 --- a/src/cooling/Compton/cooling.h +++ b/src/cooling/Compton/cooling.h @@ -136,9 +136,10 @@ __attribute__((always_inline)) INLINE static double Compton_cooling_rate_cgs( * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param xp Pointer to the particle' extended data. - * @param time The current time. * @param dt The time-step of this particle. * @param dt_therm The time-step operator used for thermal quantities. + * @param time Time since Big Bang (or start of the simulation) in internal + * units. */ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct phys_const* restrict phys_const, @@ -147,8 +148,8 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct hydro_props* hydro_props, const struct entropy_floor_properties* floor_props, const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, const double time, - const float dt, const float dt_therm) { + struct part* restrict p, struct xpart* restrict xp, const float dt, + const float dt_therm, const double time) { /* Nothing to do here? */ if (dt == 0.) return; diff --git a/src/cooling/EAGLE/cooling.c b/src/cooling/EAGLE/cooling.c index 3a6fb523b83ba34d7fea2ff981a5c899e9f04a1c..9d5a5786230d661dd39877154358bce094d9c6a6 100644 --- a/src/cooling/EAGLE/cooling.c +++ b/src/cooling/EAGLE/cooling.c @@ -372,10 +372,10 @@ INLINE static double bisection_iter( * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param xp Pointer to the extended particle data. - * @param time The current time (since the Big Bang or start of the run) in - * internal units. * @param dt The cooling time-step of this particle. * @param dt_therm The hydro time-step of this particle. + * @param time The current time (since the Big Bang or start of the run) in + * internal units. */ void cooling_cool_part(const struct phys_const *phys_const, const struct unit_system *us, @@ -384,8 +384,8 @@ void cooling_cool_part(const struct phys_const *phys_const, const struct entropy_floor_properties *floor_props, const struct cooling_function_data *cooling, struct part *restrict p, struct xpart *restrict xp, - const double time, const float dt, - const float dt_therm) { + const float dt, const float dt_therm, + const double time) { /* No cooling happens over zero time */ if (dt == 0.) return; diff --git a/src/cooling/EAGLE/cooling.h b/src/cooling/EAGLE/cooling.h index 90c1c01e82c7eec6d6615546866083afdfc1e328..760e455a05e518027bb8ffd66954531d406591be 100644 --- a/src/cooling/EAGLE/cooling.h +++ b/src/cooling/EAGLE/cooling.h @@ -44,7 +44,7 @@ void cooling_cool_part(const struct phys_const *phys_const, const struct entropy_floor_properties *floor_props, const struct cooling_function_data *cooling, struct part *restrict p, struct xpart *restrict xp, - const double time, const float dt, const float dt_therm); + const float dt, const float dt_therm, const double time); float cooling_timestep(const struct cooling_function_data *restrict cooling, const struct phys_const *restrict phys_const, diff --git a/src/cooling/const_du/cooling.h b/src/cooling/const_du/cooling.h index de5164e59498a1a17c62fecfed0b0f61bcf2a2c2..69424fa331b60964fd7448b08ecc59d7751a492d 100644 --- a/src/cooling/const_du/cooling.h +++ b/src/cooling/const_du/cooling.h @@ -76,9 +76,10 @@ INLINE static void cooling_update(const struct cosmology* cosmo, * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param xp Pointer to the extended particle data. - * @param time The current time. * @param dt The time-step of this particle. * @param dt_therm The time-step operator used for thermal quantities. + * @param time Time since Big Bang (or start of the simulation) in internal + * units. */ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct phys_const* restrict phys_const, @@ -87,8 +88,8 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct hydro_props* hydro_props, const struct entropy_floor_properties* floor_props, const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, const double time, - const float dt, const float dt_therm) { + struct part* restrict p, struct xpart* restrict xp, const float dt, + const float dt_therm, const double time) { /* Internal energy floor */ const float u_floor = cooling->min_energy; diff --git a/src/cooling/const_lambda/cooling.h b/src/cooling/const_lambda/cooling.h index ef56b6e3575d11f767af8061e303f6ffe9248d7a..1597687bf6feec49f88f8a2f2b4bba05ecbf5939 100644 --- a/src/cooling/const_lambda/cooling.h +++ b/src/cooling/const_lambda/cooling.h @@ -106,9 +106,10 @@ __attribute__((always_inline)) INLINE static double cooling_rate_cgs( * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param xp Pointer to the particle' extended data. - * @param time The current time. * @param dt The time-step of this particle. * @param dt_therm The time-step operator used for thermal quantities. + * @param time Time since Big Bang (or start of the simulation) in internal + * units. */ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct phys_const* restrict phys_const, @@ -117,8 +118,8 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct hydro_props* hydro_props, const struct entropy_floor_properties* floor_props, const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, const double time, - const float dt, const float dt_therm) { + struct part* restrict p, struct xpart* restrict xp, const float dt, + const float dt_therm, const double time) { /* Nothing to do here? */ if (dt == 0.) return; diff --git a/src/cooling/grackle/cooling.c b/src/cooling/grackle/cooling.c index 6b74b72aefa0bb64717f83d9b5a54d497e6ae260..b51afd83f0be3fee94e10f1e03ea24303b621a26 100644 --- a/src/cooling/grackle/cooling.c +++ b/src/cooling/grackle/cooling.c @@ -748,9 +748,10 @@ void cooling_apply(struct part* restrict p, struct xpart* restrict xp, * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param xp Pointer to the particle' extended data. - * @param time The current time. * @param dt The time-step of this particle. * @param dt_therm The time-step operator used for thermal quantities. + * @param time The current time (since the Big Bang or start of the run) in + * internal units. */ void cooling_cool_part(const struct phys_const* restrict phys_const, const struct unit_system* restrict us, @@ -759,8 +760,8 @@ void cooling_cool_part(const struct phys_const* restrict phys_const, const struct entropy_floor_properties* floor_props, const struct cooling_function_data* restrict cooling, struct part* restrict p, struct xpart* restrict xp, - const double time, const double dt, - const double dt_therm) { + const double dt, const double dt_therm, + const double time) { /* Nothing to do here? */ if (dt == 0.) return; diff --git a/src/cooling/grackle/cooling.h b/src/cooling/grackle/cooling.h index 209b986c6f39595a7f60b64539c97e3292e79571..c5230c077b29bfdce39b34931c9f0107c490ee14 100644 --- a/src/cooling/grackle/cooling.h +++ b/src/cooling/grackle/cooling.h @@ -115,8 +115,8 @@ void cooling_cool_part(const struct phys_const* restrict phys_const, const struct entropy_floor_properties* floor_props, const struct cooling_function_data* restrict cooling, struct part* restrict p, struct xpart* restrict xp, - const double time, const double dt, - const double dt_therm); + const double dt, const double dt_therm, + const double time); float cooling_get_temperature( const struct phys_const* restrict phys_const, diff --git a/src/cooling/none/cooling.h b/src/cooling/none/cooling.h index 107ea4b88a24d2505b8cceb346672f855df7b9da..abd29bfb84d23ec1b701f19479dbd71eac3a1555 100644 --- a/src/cooling/none/cooling.h +++ b/src/cooling/none/cooling.h @@ -63,9 +63,10 @@ INLINE static void cooling_update(const struct cosmology* cosmo, * @param cooling The #cooling_function_data used in the run. * @param p Pointer to the particle data. * @param xp Pointer to the extended particle data. - * @param time The current time. * @param dt The time-step of this particle. * @param dt_therm The time-step operator used for thermal quantities. + * @param time The current time (since the Big Bang or start of the run) in + * internal units. */ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct phys_const* restrict phys_const, @@ -74,8 +75,8 @@ __attribute__((always_inline)) INLINE static void cooling_cool_part( const struct hydro_props* hydro_props, const struct entropy_floor_properties* floor_props, const struct cooling_function_data* restrict cooling, - struct part* restrict p, struct xpart* restrict xp, const double time, - const float dt, const float dt_therm) {} + struct part* restrict p, struct xpart* restrict xp, const float dt, + const float dt_therm, const double time) {} /** * @brief Computes the cooling time-step. diff --git a/src/debug.c b/src/debug.c index fba8db46661e9624f9332c74d0d76c435cb9a6d2..1bfe5d2377ef6f6f3f8f4733d244a76635c60e6f 100644 --- a/src/debug.c +++ b/src/debug.c @@ -375,31 +375,11 @@ static void dumpCells_map(struct cell *c, void *data) { size_t *ldata = (size_t *)data; FILE *file = (FILE *)ldata[0]; struct engine *e = (struct engine *)ldata[1]; - float ntasks = c->nr_tasks; int super = (int)ldata[2]; int active = (int)ldata[3]; int mpiactive = (int)ldata[4]; int pactive = (int)ldata[5]; -#if SWIFT_DEBUG_CHECKS - /* The c->nr_tasks field does not include all the tasks. So let's check this - * the hard way. Note pairs share the task 50/50 with the other cell. */ - ntasks = 0.0f; - struct task *tasks = e->sched.tasks; - int nr_tasks = e->sched.nr_tasks; - for (int k = 0; k < nr_tasks; k++) { - if (tasks[k].cj == NULL) { - if (c == tasks[k].ci) { - ntasks = ntasks + 1.0f; - } - } else { - if (c == tasks[k].ci || c == tasks[k].cj) { - ntasks = ntasks + 0.5f; - } - } - } -#endif - /* Only cells with particles are dumped. */ if (c->hydro.count > 0 || c->grav.count > 0 || c->stars.count > 0) { @@ -429,6 +409,36 @@ static void dumpCells_map(struct cell *c, void *data) { (!super || ((super && c->super == c) || (c->parent == NULL))) && active && mpiactive) { + /* The c->nr_tasks field does not include all the tasks. So let's check + * this the hard way. Note pairs share the task 50/50 with the other + * cell. Also accumulate all the time used by tasks of this cell and + * form some idea of the effective task depth. */ + float ntasks = 0.0f; + struct task *tasks = e->sched.tasks; + int nr_tasks = e->sched.nr_tasks; + double ticsum = 0.0; /* Sum of work for this cell. */ + double dsum = 0.0; + for (int k = 0; k < nr_tasks; k++) { + if (tasks[k].cj == NULL) { + if (tasks[k].ci != NULL) { + if (c == tasks[k].ci || c == tasks[k].ci->super) { + ntasks = ntasks + 1.0f; + ticsum += (tasks[k].toc - tasks[k].tic); + dsum += tasks[k].ci->depth; + } + } + } else { + if (c == tasks[k].ci || c == tasks[k].ci->super || c == tasks[k].cj || + c == tasks[k].cj->super) { + ntasks = ntasks + 0.5f; + ticsum += 0.5 * (tasks[k].toc - tasks[k].tic); + if (tasks[k].ci != NULL) dsum += (tasks[k].ci->depth * 0.5); + dsum += (tasks[k].cj->depth * 0.5); + } + } + } + dsum /= (double)ntasks; + /* If requested we work out how many particles are active in this cell. */ int pactcount = 0; if (pactive) { @@ -443,15 +453,16 @@ static void dumpCells_map(struct cell *c, void *data) { if (spart_is_active(&sparts[k], e)) pactcount++; } - fprintf(file, - " %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d " - "%6.1f %20lld %6d %6d %6d %6d %6d %6d %6d\n", - c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1], - c->width[2], e->step, c->hydro.count, c->grav.count, - c->stars.count, pactcount, c->depth, ntasks, c->hydro.ti_end_min, - get_time_bin(c->hydro.ti_end_min), (c->super == c), - (c->parent == NULL), cell_is_active_hydro(c, e), c->nodeID, - c->nodeID == e->nodeID, ismpiactive); + fprintf( + file, + " %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d %6d " + "%6.1f %20lld %6d %6d %6d %6d %6d %6d %6d %f %f\n", + c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1], + c->width[2], e->step, c->hydro.count, c->grav.count, c->stars.count, + pactcount, c->depth, c->maxdepth, ntasks, c->hydro.ti_end_min, + get_time_bin(c->hydro.ti_end_min), (c->super == c), + (c->parent == NULL), cell_is_active_hydro(c, e), c->nodeID, + c->nodeID == e->nodeID, ismpiactive, ticsum, dsum); } } } @@ -483,11 +494,12 @@ void dumpCells(const char *prefix, int super, int active, int mpiactive, /* Header. */ fprintf(file, - "# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s " - "%20s %6s %6s %6s %6s %6s %6s %6s\n", + "# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s " + "%20s %6s %6s %6s %6s %6s %6s %6s %6s %6s\n", "x", "y", "z", "xw", "yw", "zw", "step", "count", "gcount", "scount", - "actcount", "depth", "tasks", "ti_end_min", "timebin", "issuper", - "istop", "active", "rank", "local", "mpiactive"); + "actcount", "depth", "maxdepth", "tasks", "ti_end_min", "timebin", + "issuper", "istop", "active", "rank", "local", "mpiactive", "ticsum", + "avedepth"); size_t data[6]; data[0] = (size_t)file; diff --git a/src/engine.c b/src/engine.c index c1e294a442b7a78091ba0bf0af6e9f965a57d024..4d7d054de41d19af2c16fc9cda5b10f637ac3729 100644 --- a/src/engine.c +++ b/src/engine.c @@ -65,6 +65,7 @@ #include "equation_of_state.h" #include "error.h" #include "feedback.h" +#include "fof.h" #include "gravity.h" #include "gravity_cache.h" #include "hydro.h" @@ -73,6 +74,7 @@ #include "map.h" #include "memuse.h" #include "minmax.h" +#include "mpiuse.h" #include "outputlist.h" #include "parallel_io.h" #include "part.h" @@ -210,8 +212,9 @@ void engine_repartition(struct engine *e) { /* Task arrays. */ scheduler_free_tasks(&e->sched); - /* Foreign parts. */ - space_free_foreign_parts(e->s); + /* Foreign parts. (no need to nullify the cell pointers as the cells + * will be regenerated) */ + space_free_foreign_parts(e->s, /*clear_cell_pointers=*/0); /* Now comes the tricky part: Exchange particles between all nodes. This is done in two steps, first allreducing a matrix of @@ -253,79 +256,169 @@ void engine_repartition_trigger(struct engine *e) { #ifdef WITH_MPI const ticks tic = getticks(); + static int opened = 0; /* Do nothing if there have not been enough steps since the last repartition * as we don't want to repeat this too often or immediately after a - * repartition step. Also nothing to do when requested. */ - if (e->step - e->last_repartition >= 2 && - e->reparttype->type != REPART_NONE) { + * repartition step. We attempt all this even when we are not repartitioning + * as the balance logs can still be interesting. */ + if (e->step - e->last_repartition >= 2) { /* If we have fixed costs available and this is step 2 or we are forcing - * repartitioning then we do a fixed costs one now. */ - if (e->reparttype->trigger > 1 || - (e->step == 2 && e->reparttype->use_fixed_costs)) { + * repartitioning then we do a forced fixed costs repartition regardless. */ + int forced = 0; + if (e->reparttype->type != REPART_NONE) { + if (e->reparttype->trigger > 1 || + (e->step == 2 && e->reparttype->use_fixed_costs)) { + if (e->reparttype->trigger > 1) { + if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1; + } else { + e->forcerepart = 1; + } + e->reparttype->use_ticks = 0; + forced = 1; + } + } - if (e->reparttype->trigger > 1) { - if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1; + /* We only check the CPU loads when we have processed a significant number + * of all particles as we require all tasks to have timings or are + * interested in the various balances logs. */ + if ((e->updates > 1 && + e->updates >= e->total_nr_parts * e->reparttype->minfrac) || + (e->g_updates > 1 && + e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { + + /* Are we using the task tick timings or fixed costs? */ + if (e->reparttype->use_fixed_costs > 1) { + e->reparttype->use_ticks = 0; } else { - e->forcerepart = 1; + e->reparttype->use_ticks = 1; } - e->reparttype->use_ticks = 0; - } else { + /* Get the resident size of the process for the memory logs. */ + long size, resident, shared, text, library, data, dirty; + memuse_use(&size, &resident, &shared, &text, &data, &library, &dirty); + + /* Gather it together with the CPU times used by the tasks in the last + * step. */ + double timemem[3] = {e->usertime_last_step, e->systime_last_step, + (double)resident}; + double timemems[e->nr_nodes * 3]; + MPI_Gather(&timemem, 3, MPI_DOUBLE, timemems, 3, MPI_DOUBLE, 0, + MPI_COMM_WORLD); + if (e->nodeID == 0) { + + /* Get the range and mean of the two CPU times and memory. */ + double umintime = timemems[0]; + double umaxtime = timemems[0]; + + double smintime = timemems[1]; + double smaxtime = timemems[1]; + + double minmem = timemems[2]; + double maxmem = timemems[2]; + + double tmintime = umintime + smintime; + double tmaxtime = umaxtime + smaxtime; + + double usum = timemems[0]; + double ssum = timemems[1]; + double tsum = usum + ssum; + + double msum = timemems[2]; + + for (int k = 3; k < e->nr_nodes * 3; k += 3) { + if (timemems[k] > umaxtime) umaxtime = timemems[k]; + if (timemems[k] < umintime) umintime = timemems[k]; + + if (timemems[k + 1] > smaxtime) smaxtime = timemems[k + 1]; + if (timemems[k + 1] < smintime) smintime = timemems[k + 1]; + + double total = timemems[k] + timemems[k + 1]; + if (total > tmaxtime) tmaxtime = total; + if (total < tmintime) tmintime = total; + + usum += timemems[k]; + ssum += timemems[k + 1]; + tsum += total; + + if (timemems[k + 2] > maxmem) maxmem = timemems[k + 2]; + if (timemems[k + 2] < minmem) minmem = timemems[k + 2]; + msum += timemems[k + 2]; + } + double umean = usum / (double)e->nr_nodes; + double smean = ssum / (double)e->nr_nodes; + double tmean = tsum / (double)e->nr_nodes; + double mmean = msum / (double)e->nr_nodes; + + /* Are we out of balance and need to repartition? */ + /* ---------------------------------------------- */ + double abs_trigger = fabs(e->reparttype->trigger); + double balance = (umaxtime - umintime) / umean; + if (e->reparttype->type != REPART_NONE) { + + /* When forced we don't care about the balance. */ + if (!forced) { + if (balance > abs_trigger) { + if (e->verbose) + message("trigger fraction %.3f > %.3f will repartition", + balance, abs_trigger); + e->forcerepart = 1; + } else { + if (e->verbose && e->reparttype->type != REPART_NONE) + message("trigger fraction %.3f =< %.3f will not repartition", + balance, abs_trigger); + } + } - /* It is only worth checking the CPU loads when we have processed a - * significant number of all particles as we require all tasks to have - * timings. */ - if ((e->updates > 1 && - e->updates >= e->total_nr_parts * e->reparttype->minfrac) || - (e->g_updates > 1 && - e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { - - /* Should we are use the task timings or fixed costs. */ - if (e->reparttype->use_fixed_costs > 1) { - e->reparttype->use_ticks = 0; } else { - e->reparttype->use_ticks = 1; + /* Not repartitioning, would that have been done otherwise? */ + if (e->verbose) + message("trigger fraction %.3f > %.3f would have repartitioned", + balance, abs_trigger); } - /* Get CPU time used since the last call to this function. */ - double elapsed_cputime = - clocks_get_cputime_used() - e->cputime_last_step; - - /* Gather the elapsed CPU times from all ranks for the last step. */ - double elapsed_cputimes[e->nr_nodes]; - MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, - MPI_DOUBLE, 0, MPI_COMM_WORLD); - if (e->nodeID == 0) { - - /* Get the range and mean of cputimes. */ - double mintime = elapsed_cputimes[0]; - double maxtime = elapsed_cputimes[0]; - double sum = elapsed_cputimes[0]; - for (int k = 1; k < e->nr_nodes; k++) { - if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; - if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; - sum += elapsed_cputimes[k]; - } - double mean = sum / (double)e->nr_nodes; - - /* Are we out of balance? */ - double abs_trigger = fabs(e->reparttype->trigger); - if (((maxtime - mintime) / mean) > abs_trigger) { - if (e->verbose) - message("trigger fraction %.3f > %.3f will repartition", - (maxtime - mintime) / mean, abs_trigger); - e->forcerepart = 1; - } else { - if (e->verbose) - message("trigger fraction %.3f =< %.3f will not repartition", - (maxtime - mintime) / mean, abs_trigger); - } + /* Keep logs of all CPU times and resident memory size for debugging + * load issues. */ + FILE *timelog = NULL; + FILE *memlog = NULL; + if (!opened) { + timelog = fopen("rank_cpu_balance.log", "w"); + fprintf(timelog, "# step rank user sys sum\n"); + + memlog = fopen("rank_memory_balance.log", "w"); + fprintf(memlog, "# step rank resident\n"); + + opened = 1; + } else { + timelog = fopen("rank_cpu_balance.log", "a"); + memlog = fopen("rank_memory_balance.log", "a"); + } + + for (int k = 0; k < e->nr_nodes * 3; k += 3) { + + fprintf(timelog, "%d %d %f %f %f\n", e->step, k / 3, timemems[k], + timemems[k + 1], timemems[k] + timemems[k + 1]); + + fprintf(memlog, "%d %d %ld\n", e->step, k / 3, (long)timemems[k + 2]); } + + fprintf(timelog, "# %d mean times: %f %f %f\n", e->step, umean, smean, + tmean); + if (abs_trigger > 1.f) abs_trigger = 0.f; /* Not relevant. */ + fprintf(timelog, + "# %d balance: %f, expected: %f (sys: %f, total: %f)\n", + e->step, balance, abs_trigger, (smaxtime - smintime) / smean, + (tmaxtime - tmintime) / tmean); + + fclose(timelog); + + fprintf(memlog, "# %d mean resident memory: %f, balance: %f\n", e->step, + mmean, (maxmem - minmem) / mmean); + fclose(memlog); } - /* All nodes do this together. */ + /* All nodes do this together, so send to other ranks. */ MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD); } @@ -333,10 +426,6 @@ void engine_repartition_trigger(struct engine *e) { if (e->forcerepart) e->last_repartition = e->step; } - /* We always reset CPU time for next check, unless it will not be used. */ - if (e->reparttype->type != REPART_NONE) - e->cputime_last_step = clocks_get_cputime_used(); - if (e->verbose) message("took %.3f %s", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1282,7 +1371,7 @@ void engine_print_task_counts(const struct engine *e) { for (int k = 0; k <= task_type_count; k++) counts[k] = 0; threadpool_map((struct threadpool *)&e->threadpool, engine_do_tasks_count_mapper, (void *)tasks, nr_tasks, - sizeof(struct task), 0, counts); + sizeof(struct task), threadpool_auto_chunk_size, counts); #ifdef WITH_MPI printf("[%04i] %s engine_print_task_counts: task counts are [ %s=%i", @@ -2340,11 +2429,29 @@ void engine_step(struct engine *e) { gravity_exact_force_compute(e->s, e); #endif + /* Get current CPU times.*/ +#ifdef WITH_MPI + double start_usertime = 0.0; + double start_systime = 0.0; + if (e->reparttype->type != REPART_NONE) { + clocks_get_cputimes_used(&start_usertime, &start_systime); + } +#endif + /* Start all the tasks. */ TIMER_TIC; engine_launch(e, "tasks"); TIMER_TOC(timer_runners); + /* Now record the CPU times used by the tasks. */ +#ifdef WITH_MPI + double end_usertime = 0.0; + double end_systime = 0.0; + clocks_get_cputimes_used(&end_usertime, &end_systime); + e->usertime_last_step = end_usertime - start_usertime; + e->systime_last_step = end_systime - start_systime; +#endif + /* Since the time-steps may have changed because of the limiter's * action, we need to communicate the new time-step sizes */ if ((e->policy & engine_policy_timestep_sync) || @@ -2715,7 +2822,8 @@ void engine_reconstruct_multipoles(struct engine *e) { #endif threadpool_map(&e->threadpool, engine_do_reconstruct_multipoles_mapper, - e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 0, e); + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), + threadpool_auto_chunk_size, e); if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -3105,17 +3213,10 @@ void engine_split(struct engine *e, struct partition *initial_partition) { swift_free("gparts", s->gparts); s->gparts = gparts_new; - /* Re-link the parts. */ - if (s->nr_parts > 0 && s->nr_gparts > 0) - part_relink_parts_to_gparts(s->gparts, s->nr_gparts, s->parts); - - /* Re-link the sparts. */ - if (s->nr_sparts > 0 && s->nr_gparts > 0) - part_relink_sparts_to_gparts(s->gparts, s->nr_gparts, s->sparts); - - /* Re-link the bparts. */ - if (s->nr_bparts > 0 && s->nr_gparts > 0) - part_relink_bparts_to_gparts(s->gparts, s->nr_gparts, s->bparts); + /* Re-link everything to the gparts. */ + if (s->nr_gparts > 0) + part_relink_all_parts_to_gparts(s->gparts, s->nr_gparts, s->parts, + s->sparts, s->bparts, &e->threadpool); #ifdef SWIFT_DEBUG_CHECKS @@ -3383,6 +3484,72 @@ void engine_unpin(void) { #endif } +#ifdef SWIFT_DUMPER_THREAD +/** + * @brief dumper thread action, checks got the existence of the .dump file + * every 5 seconds and does the dump if found. + * + * @param p the #engine + */ +static void *engine_dumper_poll(void *p) { + struct engine *e = (struct engine *)p; + while (1) { + if (access(".dump", F_OK) == 0) { + + /* OK, do our work. */ + message("Dumping engine tasks in step: %d", e->step); + task_dump_active(e); + +#ifdef SWIFT_MEMUSE_REPORTS + /* Dump the currently logged memory. */ + message("Dumping memory use report"); + memuse_log_dump_error(e->nodeID); +#endif + +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) + /* Dump the MPI interactions in the step. */ + mpiuse_log_dump_error(e->nodeID); +#endif + + /* Add more interesting diagnostics. */ + scheduler_dump_queues(e); + + /* Delete the file. */ + unlink(".dump"); + message("Dumping completed"); + fflush(stdout); + } + + /* Take a breath. */ + sleep(5); + } + return NULL; +} +#endif /* SWIFT_DUMPER_THREAD */ + +#ifdef SWIFT_DUMPER_THREAD +/** + * @brief creates the dumper thread. + * + * This watches for the creation of a ".dump" file in the current directory + * and if found dumps the current state of the tasks and memory use (if also + * configured). + * + * @param e the #engine + * + */ +static void engine_dumper_init(struct engine *e) { + pthread_t dumper; + + /* Make sure the .dump file is not present, that is bad when starting up. */ + struct stat buf; + if (stat(".dump", &buf) == 0) unlink(".dump"); + + /* Thread does not exit, so nothing to do but create it. */ + pthread_create(&dumper, NULL, &engine_dumper_poll, e); +} +#endif /* SWIFT_DUMPER_THREAD */ + /** * @brief init an engine struct with the necessary properties for the * simulation. @@ -3512,7 +3679,8 @@ void engine_init(struct engine *e, struct space *s, struct swift_params *params, e->parameter_file = params; e->stf_this_timestep = 0; #ifdef WITH_MPI - e->cputime_last_step = 0; + e->usertime_last_step = 0.0; + e->systime_last_step = 0.0; e->last_repartition = 0; #endif e->total_nr_cells = 0; @@ -4283,6 +4451,12 @@ void engine_config(int restart, int fof, struct engine *e, } #endif +#ifdef SWIFT_DUMPER_THREAD + + /* Start the dumper thread.*/ + engine_dumper_init(e); +#endif + /* Wait for the runner threads to be in place. */ swift_barrier_wait(&e->wait_barrier); } diff --git a/src/engine.h b/src/engine.h index a30d8943b8476f91915687988b42b0c074572397..f4de5d6a76205bda533e97790e4954a1d801e08b 100644 --- a/src/engine.h +++ b/src/engine.h @@ -360,8 +360,9 @@ struct engine { ticks tic_step, toc_step; #ifdef WITH_MPI - /* CPU time of the last step. */ - double cputime_last_step; + /* CPU times that the tasks used in the last step. */ + double usertime_last_step; + double systime_last_step; /* Step of last repartition. */ int last_repartition; @@ -476,6 +477,9 @@ struct engine { /* Maximum number of tasks needed for restarting. */ int restart_max_tasks; + /* The globally agreed runtime, in hours. */ + float runtime; + /* Label of the run */ char run_name[PARSER_MAX_LINE_SIZE]; diff --git a/src/engine_collect_end_of_step.c b/src/engine_collect_end_of_step.c index 458a0a7a40e9e433f0241b04ff67fa1fbc37bbf0..0973399c11146baef3b3be60b3635426021b23cb 100644 --- a/src/engine_collect_end_of_step.c +++ b/src/engine_collect_end_of_step.c @@ -44,6 +44,7 @@ struct end_of_step_data { ti_black_holes_beg_max; struct engine *e; struct star_formation_history sfh; + float runtime; }; /** @@ -455,13 +456,16 @@ void engine_collect_end_of_step(struct engine *e, int apply) { data.ti_black_holes_end_max = 0, data.ti_black_holes_beg_max = 0; data.e = e; + /* Need to use a consistent check of the hours since we started. */ + data.runtime = clocks_get_hours_since_start(); + /* Initialize the total SFH of the simulation to zero */ star_formation_logger_init(&data.sfh); /* Collect information from the local top-level cells */ threadpool_map(&e->threadpool, engine_collect_end_of_step_mapper, s->local_cells_with_tasks_top, s->nr_local_cells_with_tasks, - sizeof(int), 0, &data); + sizeof(int), threadpool_auto_chunk_size, &data); /* Get the number of inhibited particles from the space-wide counters * since these have been updated atomically during the time-steps. */ @@ -480,7 +484,8 @@ void engine_collect_end_of_step(struct engine *e, int apply) { data.ti_stars_beg_max, data.ti_black_holes_end_min, data.ti_black_holes_end_max, data.ti_black_holes_beg_max, e->forcerebuild, e->s->tot_cells, e->sched.nr_tasks, - (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh); + (float)e->sched.nr_tasks / (float)e->s->tot_cells, data.sfh, + data.runtime); /* Aggregate collective data from the different nodes for this step. */ #ifdef WITH_MPI diff --git a/src/engine_drift.c b/src/engine_drift.c index fee9ed1f11de2d5e372bd70554bd053b8313ac80..26f8160200c5bdc51f7dab51b56e9a4d771d3d39 100644 --- a/src/engine_drift.c +++ b/src/engine_drift.c @@ -293,28 +293,28 @@ void engine_drift_all(struct engine *e, const int drift_mpoles) { if (e->s->nr_parts > 0) { threadpool_map(&e->threadpool, engine_do_drift_all_part_mapper, e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } if (e->s->nr_gparts > 0) { threadpool_map(&e->threadpool, engine_do_drift_all_gpart_mapper, e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } if (e->s->nr_sparts > 0) { threadpool_map(&e->threadpool, engine_do_drift_all_spart_mapper, e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } if (e->s->nr_bparts > 0) { threadpool_map(&e->threadpool, engine_do_drift_all_bpart_mapper, e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } if (drift_mpoles && (e->policy & engine_policy_self_gravity)) { threadpool_map(&e->threadpool, engine_do_drift_all_multipole_mapper, e->s->local_cells_with_tasks_top, e->s->nr_local_cells_with_tasks, sizeof(int), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } } else { @@ -325,27 +325,27 @@ void engine_drift_all(struct engine *e, const int drift_mpoles) { if (e->s->nr_parts > 0) { threadpool_map(&e->threadpool, engine_do_drift_all_part_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } if (e->s->nr_sparts > 0) { threadpool_map(&e->threadpool, engine_do_drift_all_spart_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } if (e->s->nr_bparts > 0) { threadpool_map(&e->threadpool, engine_do_drift_all_bpart_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } if (e->s->nr_gparts > 0) { threadpool_map(&e->threadpool, engine_do_drift_all_gpart_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } if (e->policy & engine_policy_self_gravity) { threadpool_map(&e->threadpool, engine_do_drift_all_multipole_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), - /* default chunk */ 0, e); + threadpool_auto_chunk_size, e); } } @@ -401,7 +401,8 @@ void engine_drift_top_multipoles(struct engine *e) { const ticks tic = getticks(); threadpool_map(&e->threadpool, engine_do_drift_top_multipoles_mapper, - e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 0, e); + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), + threadpool_auto_chunk_size, e); #ifdef SWIFT_DEBUG_CHECKS /* Check that all cells have been drifted to the current time. */ diff --git a/src/engine_fof.c b/src/engine_fof.c index 961b7e966ecc33250c3d005d3a31d59200f98fb0..bbecc54a9dd5a5aa9cbdb62b15a0fc621791ebb5 100644 --- a/src/engine_fof.c +++ b/src/engine_fof.c @@ -25,6 +25,9 @@ /* This object's header. */ #include "engine.h" +/* Local headers. */ +#include "fof.h" + /** * @brief Activate all the #gpart communications in preparation * fof a call to FOF. diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index 5d8d3a5fed14330776ccd835664804427d1cb370..9f907f33b4b0173a32292c58e9b1a1fd8bad3312 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -3123,7 +3123,7 @@ void engine_make_fof_tasks(struct engine *e) { /* Construct a FOF loop over neighbours */ if (e->policy & engine_policy_fof) threadpool_map(&e->threadpool, engine_make_fofloop_tasks_mapper, NULL, - s->nr_cells, 1, 0, e); + s->nr_cells, 1, threadpool_auto_chunk_size, e); if (e->verbose) message("Making FOF tasks took %.3f %s.", @@ -3180,7 +3180,7 @@ void engine_maketasks(struct engine *e) { /* Construct the first hydro loop over neighbours */ if (e->policy & engine_policy_hydro) threadpool_map(&e->threadpool, engine_make_hydroloop_tasks_mapper, NULL, - s->nr_cells, 1, 0, e); + s->nr_cells, 1, threadpool_auto_chunk_size, e); if (e->verbose) message("Making hydro tasks took %.3f %s.", @@ -3191,7 +3191,7 @@ void engine_maketasks(struct engine *e) { /* Add the self gravity tasks. */ if (e->policy & engine_policy_self_gravity) { threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL, - s->nr_cells, 1, 0, e); + s->nr_cells, 1, threadpool_auto_chunk_size, e); } if (e->verbose) @@ -3242,7 +3242,8 @@ void engine_maketasks(struct engine *e) { store the density tasks in each cell, and make each sort depend on the sorts of its progeny. */ threadpool_map(&e->threadpool, engine_count_and_link_tasks_mapper, - sched->tasks, sched->nr_tasks, sizeof(struct task), 0, e); + sched->tasks, sched->nr_tasks, sizeof(struct task), + threadpool_auto_chunk_size, e); if (e->verbose) message("Counting and linking tasks took %.3f %s.", @@ -3259,7 +3260,7 @@ void engine_maketasks(struct engine *e) { /* Now that the self/pair tasks are at the right level, set the super * pointers. */ threadpool_map(&e->threadpool, cell_set_super_mapper, cells, nr_cells, - sizeof(struct cell), 0, e); + sizeof(struct cell), threadpool_auto_chunk_size, e); if (e->verbose) message("Setting super-pointers took %.3f %s.", @@ -3267,7 +3268,7 @@ void engine_maketasks(struct engine *e) { /* Append hierarchical tasks to each cell. */ threadpool_map(&e->threadpool, engine_make_hierarchical_tasks_mapper, cells, - nr_cells, sizeof(struct cell), 0, e); + nr_cells, sizeof(struct cell), threadpool_auto_chunk_size, e); tic2 = getticks(); @@ -3276,7 +3277,8 @@ void engine_maketasks(struct engine *e) { of its super-cell. */ if (e->policy & engine_policy_hydro) threadpool_map(&e->threadpool, engine_make_extra_hydroloop_tasks_mapper, - sched->tasks, sched->nr_tasks, sizeof(struct task), 0, e); + sched->tasks, sched->nr_tasks, sizeof(struct task), + threadpool_auto_chunk_size, e); if (e->verbose) message("Making extra hydroloop tasks took %.3f %s.", @@ -3325,8 +3327,8 @@ void engine_maketasks(struct engine *e) { threadpool_map(&e->threadpool, engine_addtasks_send_mapper, send_cell_type_pairs, num_send_cells, - sizeof(struct cell_type_pair), - /*chunk=*/0, e); + sizeof(struct cell_type_pair), threadpool_auto_chunk_size, + e); free(send_cell_type_pairs); @@ -3366,8 +3368,8 @@ void engine_maketasks(struct engine *e) { } threadpool_map(&e->threadpool, engine_addtasks_recv_mapper, recv_cell_type_pairs, num_recv_cells, - sizeof(struct cell_type_pair), - /*chunk=*/0, e); + sizeof(struct cell_type_pair), threadpool_auto_chunk_size, + e); free(recv_cell_type_pairs); if (e->verbose) diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c index e7da143404849799911d7ae4f20fd8532d04b5b8..d3d17bcdd6dbf09fb6586cd584c7fdfd6ebe21fd 100644 --- a/src/engine_marktasks.c +++ b/src/engine_marktasks.c @@ -1004,7 +1004,7 @@ int engine_marktasks(struct engine *e) { /* Run through the tasks and mark as skip or not. */ size_t extra_data[3] = {(size_t)e, (size_t)rebuild_space, (size_t)&e->sched}; threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks, - sizeof(struct task), 0, extra_data); + sizeof(struct task), threadpool_auto_chunk_size, extra_data); rebuild_space = extra_data[1]; if (e->verbose) diff --git a/src/engine_redistribute.c b/src/engine_redistribute.c index e2c3ea056e57e5097785a9014e4e0d2500d52a52..9e9ce4842a88a65f446f12a74de5039912247516 100644 --- a/src/engine_redistribute.c +++ b/src/engine_redistribute.c @@ -664,7 +664,8 @@ void engine_redistribute(struct engine *e) { redist_data.base = (void *)parts; threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts, - nr_parts, sizeof(struct part), 0, &redist_data); + nr_parts, sizeof(struct part), threadpool_auto_chunk_size, + &redist_data); /* Sort the particles according to their cell index. */ if (nr_parts > 0) @@ -711,7 +712,8 @@ void engine_redistribute(struct engine *e) { savelink_data.parts = (void *)parts; savelink_data.nodeID = nodeID; threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part, - nodes, nr_nodes, sizeof(int), 0, &savelink_data); + nodes, nr_nodes, sizeof(int), threadpool_auto_chunk_size, + &savelink_data); } swift_free("dest", dest); @@ -729,7 +731,8 @@ void engine_redistribute(struct engine *e) { redist_data.base = (void *)sparts; threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts, - nr_sparts, sizeof(struct spart), 0, &redist_data); + nr_sparts, sizeof(struct spart), threadpool_auto_chunk_size, + &redist_data); /* Sort the particles according to their cell index. */ if (nr_sparts > 0) @@ -775,7 +778,8 @@ void engine_redistribute(struct engine *e) { savelink_data.parts = (void *)sparts; savelink_data.nodeID = nodeID; threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart, - nodes, nr_nodes, sizeof(int), 0, &savelink_data); + nodes, nr_nodes, sizeof(int), threadpool_auto_chunk_size, + &savelink_data); } swift_free("s_dest", s_dest); @@ -793,7 +797,8 @@ void engine_redistribute(struct engine *e) { redist_data.base = (void *)bparts; threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_bpart, bparts, - nr_bparts, sizeof(struct bpart), 0, &redist_data); + nr_bparts, sizeof(struct bpart), threadpool_auto_chunk_size, + &redist_data); /* Sort the particles according to their cell index. */ if (nr_bparts > 0) @@ -839,7 +844,8 @@ void engine_redistribute(struct engine *e) { savelink_data.parts = (void *)bparts; savelink_data.nodeID = nodeID; threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart, - nodes, nr_nodes, sizeof(int), 0, &savelink_data); + nodes, nr_nodes, sizeof(int), threadpool_auto_chunk_size, + &savelink_data); } swift_free("b_dest", b_dest); @@ -857,7 +863,8 @@ void engine_redistribute(struct engine *e) { redist_data.base = (void *)gparts; threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts, - nr_gparts, sizeof(struct gpart), 0, &redist_data); + nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size, + &redist_data); /* Sort the gparticles according to their cell index. */ if (nr_gparts > 0) diff --git a/src/engine_split_particles.c b/src/engine_split_particles.c index 58d7f905588c79c6339d9c2e1ac264ec63095c3c..2a9fae7efb5d55dd0f3308af85829241c5031a7a 100644 --- a/src/engine_split_particles.c +++ b/src/engine_split_particles.c @@ -36,6 +36,201 @@ const int particle_split_factor = 2; const double displacement_factor = 0.2; +/** + * @brief Data structure used by the counter mapper function + */ +struct data_count { + const struct engine *const e; + const float mass_threshold; + size_t counter; +}; + +/** + * @brief Data structure used by the split mapper function + */ +struct data_split { + const struct engine *const e; + const float mass_threshold; + size_t *const k_parts; + size_t *const k_gparts; + swift_lock_type lock; +}; + +/** + * @brief Mapper function to count the number of #part above the mass threshold + * for splitting. + */ +void engine_split_gas_particle_count_mapper(void *restrict map_data, int count, + void *restrict extra_data) { + + /* Unpack the data */ + struct part *parts = (struct part *)map_data; + struct data_count *data = (struct data_count *)extra_data; + const struct engine *e = data->e; + const float mass_threshold = data->mass_threshold; + + size_t counter = 0; + + for (int i = 0; i < count; ++i) { + + const struct part *p = &parts[i]; + + /* Ignore inhibited particles */ + if (part_is_inhibited(p, e)) continue; + + /* Is the mass of this particle larger than the threshold? */ + const float gas_mass = hydro_get_mass(p); + if (gas_mass > mass_threshold) ++counter; + } + + /* Increment the global counter */ + atomic_add(&data->counter, counter); +} + +/** + * @brief Mapper function to split the #part above the mass threshold. + */ +void engine_split_gas_particle_split_mapper(void *restrict map_data, int count, + void *restrict extra_data) { + + /* Unpack the data */ + struct part *parts = (struct part *)map_data; + struct data_split *data = (struct data_split *)extra_data; + const float mass_threshold = data->mass_threshold; + const struct engine *e = data->e; + const int with_gravity = (e->policy & engine_policy_self_gravity) || + (e->policy & engine_policy_external_gravity); + + /* Additional thread-global particle arrays */ + const struct space *s = e->s; + struct part *global_parts = s->parts; + struct xpart *global_xparts = s->xparts; + struct gpart *global_gparts = s->gparts; + + /* xpart array corresponding to the thread-local particle array */ + const ptrdiff_t offset = parts - global_parts; + struct xpart *xparts = global_xparts + offset; + + /* Loop over the chunk of the part array assigned to this thread */ + for (int i = 0; i < count; ++i) { + + /* Is the mass of this particle larger than the threshold? */ + struct part *p = &parts[i]; + + /* Ignore inhibited particles */ + if (part_is_inhibited(p, e)) continue; + + const float gas_mass = hydro_get_mass(p); + const float h = p->h; + + /* Found a particle to split */ + if (gas_mass > mass_threshold) { + + /* Make sure only one thread is here */ + lock_lock(&data->lock); + + /* Where should we put the new particle in the array? */ + const size_t k_parts = *data->k_parts; + const size_t k_gparts = *data->k_gparts; + + /* Make the next slot available for the next particle */ + (*data->k_parts)++; + if (with_gravity) (*data->k_gparts)++; + + /* Release the lock and continue in parallel */ + if (lock_unlock(&data->lock) != 0) + error("Impossible to unlock particle splitting"); + + /* We now know where to put the new particle we are going to create. */ + + /* Current other fields associated to this particle */ + struct xpart *xp = &xparts[i]; + struct gpart *gp = p->gpart; + + /* Start by copying over the particles */ + memcpy(&global_parts[k_parts], p, sizeof(struct part)); + memcpy(&global_xparts[k_parts], xp, sizeof(struct xpart)); + + if (with_gravity) { + memcpy(&global_gparts[k_gparts], gp, sizeof(struct gpart)); + } + + /* Update the IDs */ + global_parts[k_parts].id += 2; + + /* Re-link everything */ + if (with_gravity) { + global_parts[k_parts].gpart = &global_gparts[k_gparts]; + global_gparts[k_gparts].id_or_neg_offset = -k_parts; + } + + /* Displacement unit vector */ + const double delta_x = random_unit_interval(p->id, e->ti_current, + (enum random_number_type)0); + const double delta_y = random_unit_interval(p->id, e->ti_current, + (enum random_number_type)1); + const double delta_z = random_unit_interval(p->id, e->ti_current, + (enum random_number_type)2); + + /* Displace the old particle */ + p->x[0] += delta_x * displacement_factor * h; + p->x[1] += delta_y * displacement_factor * h; + p->x[2] += delta_z * displacement_factor * h; + + if (with_gravity) { + gp->x[0] += delta_x * displacement_factor * h; + gp->x[1] += delta_y * displacement_factor * h; + gp->x[2] += delta_z * displacement_factor * h; + } + + /* Displace the new particle */ + global_parts[k_parts].x[0] -= delta_x * displacement_factor * h; + global_parts[k_parts].x[1] -= delta_y * displacement_factor * h; + global_parts[k_parts].x[2] -= delta_z * displacement_factor * h; + + if (with_gravity) { + global_gparts[k_gparts].x[0] -= delta_x * displacement_factor * h; + global_gparts[k_gparts].x[1] -= delta_y * displacement_factor * h; + global_gparts[k_gparts].x[2] -= delta_z * displacement_factor * h; + } + + /* Divide the mass */ + const double new_mass = gas_mass * 0.5; + hydro_set_mass(p, new_mass); + hydro_set_mass(&global_parts[k_parts], new_mass); + + if (with_gravity) { + gp->mass = new_mass; + global_gparts[k_gparts].mass = new_mass; + } + + /* Split the chemistry fields */ + chemistry_split_part(p, particle_split_factor); + chemistry_split_part(&global_parts[k_parts], particle_split_factor); + + /* Split the cooling fields */ + cooling_split_part(p, xp, particle_split_factor); + cooling_split_part(&global_parts[k_parts], &global_xparts[k_parts], + particle_split_factor); + + /* Split the star formation fields */ + star_formation_split_part(p, xp, particle_split_factor); + star_formation_split_part(&global_parts[k_parts], &global_xparts[k_parts], + particle_split_factor); + + /* Split the tracer fields */ + tracers_split_part(p, xp, particle_split_factor); + tracers_split_part(&global_parts[k_parts], &global_xparts[k_parts], + particle_split_factor); + + /* Mark the particles as not having been swallowed */ + black_holes_mark_part_as_not_swallowed(&p->black_holes_data); + black_holes_mark_part_as_not_swallowed( + &global_parts[k_parts].black_holes_data); + } + } +} + /** * @brief Identify all the gas particles above a given mass threshold * and split them into 2. @@ -45,12 +240,15 @@ const double displacement_factor = 0.2; * the local array of particles. In case of reallocations, it may * also have to loop over the gravity and other arrays. * + * This is done on a node-by-node basis. No MPI required here. + * * @param e The #engine. */ void engine_split_gas_particles(struct engine *e) { /* Abort if we are not doing any splitting */ if (!e->hydro_properties->particle_splitting) return; + if (e->s->nr_parts == 0) return; /* Time this */ const ticks tic = getticks(); @@ -59,8 +257,9 @@ void engine_split_gas_particles(struct engine *e) { struct space *s = e->s; const int with_gravity = (e->policy & engine_policy_self_gravity) || (e->policy & engine_policy_external_gravity); - const double mass_threshold = + const float mass_threshold = e->hydro_properties->particle_splitting_mass_threshold; + const size_t nr_parts_old = s->nr_parts; /* Quick check to avoid problems */ if (particle_split_factor != 2) { @@ -68,23 +267,12 @@ void engine_split_gas_particles(struct engine *e) { "Invalid splitting factor. Can currently only split particles into 2!"); } - /* Start by counting how many particles are above the threshold for splitting - */ - - size_t counter = 0; - const size_t nr_parts_old = s->nr_parts; - const struct part *parts_old = s->parts; - for (size_t i = 0; i < nr_parts_old; ++i) { - - const struct part *p = &parts_old[i]; - - /* Ignore inhibited particles */ - if (part_is_inhibited(p, e)) continue; - - /* Is the mass of this particle larger than the threshold? */ - const float gas_mass = hydro_get_mass(p); - if (gas_mass > mass_threshold) ++counter; - } + /* Start by counting how many particles are above the threshold + * for splitting (this is done in parallel over the threads) */ + struct data_count data_count = {e, mass_threshold, 0}; + threadpool_map(&e->threadpool, engine_split_gas_particle_count_mapper, + s->parts, nr_parts_old, sizeof(struct part), 0, &data_count); + const size_t counter = data_count.counter; /* Early abort? */ if (counter == 0) { @@ -109,17 +297,29 @@ void engine_split_gas_particles(struct engine *e) { if (e->verbose) message("Reallocating the part array!"); + /* Allocate a larger array and copy over */ struct part *parts_new = NULL; if (swift_memalign("parts", (void **)&parts_new, part_align, sizeof(struct part) * s->size_parts) != 0) error("Failed to allocate new part data."); + + /* Tell the compiler that the arrays are aligned */ + swift_align_information(struct part, s->parts, part_align); + swift_align_information(struct part, parts_new, part_align); + memcpy(parts_new, s->parts, sizeof(struct part) * s->nr_parts); swift_free("parts", s->parts); + /* Allocate a larger array and copy over */ struct xpart *xparts_new = NULL; if (swift_memalign("xparts", (void **)&xparts_new, xpart_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); + + /* Tell the compiler that the arrays are aligned */ + swift_align_information(struct xpart, s->xparts, xpart_align); + swift_align_information(struct xpart, xparts_new, xpart_align); + memcpy(xparts_new, s->xparts, sizeof(struct xpart) * s->nr_parts); swift_free("xparts", s->xparts); @@ -135,18 +335,23 @@ void engine_split_gas_particles(struct engine *e) { if (e->verbose) message("Reallocating the gpart array!"); + /* Allocate a larger array and copy over */ struct gpart *gparts_new = NULL; if (swift_memalign("gparts", (void **)&gparts_new, gpart_align, sizeof(struct gpart) * s->size_gparts) != 0) error("Failed to allocate new gpart data."); + /* Tell the compiler that the arrays are aligned */ + swift_align_information(struct gpart, s->gparts, gpart_align); + swift_align_information(struct gpart, gparts_new, gpart_align); + /* Copy the particles */ memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->nr_gparts); swift_free("gparts", s->gparts); /* We now need to correct all the pointers of the other particle arrays */ part_relink_all_parts_to_gparts(gparts_new, s->nr_gparts, s->parts, - s->sparts, s->bparts); + s->sparts, s->bparts, &e->threadpool); s->gparts = gparts_new; } @@ -157,115 +362,18 @@ void engine_split_gas_particles(struct engine *e) { s->nr_gparts, s->nr_sparts, s->nr_bparts, e->verbose); #endif + /* We now have enough memory in the part array to accomodate the new + * particles. We can start the splitting procedure */ + size_t k_parts = s->nr_parts; size_t k_gparts = s->nr_gparts; /* Loop over the particles again to split them */ - struct part *parts = s->parts; - struct xpart *xparts = s->xparts; - struct gpart *gparts = s->gparts; - for (size_t i = 0; i < nr_parts_old; ++i) { - - /* Is the mass of this particle larger than the threshold? */ - struct part *p = &parts[i]; - - /* Ignore inhibited particles */ - if (part_is_inhibited(p, e)) continue; - - const float gas_mass = hydro_get_mass(p); - const float h = p->h; - - /* Found a particle to split */ - if (gas_mass > mass_threshold) { - - struct xpart *xp = &xparts[i]; - struct gpart *gp = p->gpart; - - /* Start by copying over the particles */ - memcpy(&parts[k_parts], p, sizeof(struct part)); - memcpy(&xparts[k_parts], xp, sizeof(struct xpart)); - - if (with_gravity) { - memcpy(&gparts[k_gparts], gp, sizeof(struct gpart)); - } - - /* Update the IDs */ - parts[k_parts].id += 2; - - /* Re-link everything */ - if (with_gravity) { - parts[k_parts].gpart = &gparts[k_gparts]; - gparts[k_gparts].id_or_neg_offset = -k_parts; - } - - /* Displacement unit vector */ - const double delta_x = random_unit_interval(p->id, e->ti_current, - (enum random_number_type)0); - const double delta_y = random_unit_interval(p->id, e->ti_current, - (enum random_number_type)1); - const double delta_z = random_unit_interval(p->id, e->ti_current, - (enum random_number_type)2); - - /* Displace the old particle */ - p->x[0] += delta_x * displacement_factor * h; - p->x[1] += delta_y * displacement_factor * h; - p->x[2] += delta_z * displacement_factor * h; - - if (with_gravity) { - gp->x[0] += delta_x * displacement_factor * h; - gp->x[1] += delta_y * displacement_factor * h; - gp->x[2] += delta_z * displacement_factor * h; - } - - /* Displace the new particle */ - parts[k_parts].x[0] -= delta_x * displacement_factor * h; - parts[k_parts].x[1] -= delta_y * displacement_factor * h; - parts[k_parts].x[2] -= delta_z * displacement_factor * h; - - if (with_gravity) { - gparts[k_gparts].x[0] -= delta_x * displacement_factor * h; - gparts[k_gparts].x[1] -= delta_y * displacement_factor * h; - gparts[k_gparts].x[2] -= delta_z * displacement_factor * h; - } - - /* Divide the mass */ - const double new_mass = gas_mass * 0.5; - hydro_set_mass(p, new_mass); - hydro_set_mass(&parts[k_parts], new_mass); - - if (with_gravity) { - gp->mass = new_mass; - gparts[k_gparts].mass = new_mass; - } - - /* Split the chemistry fields */ - chemistry_split_part(p, particle_split_factor); - chemistry_split_part(&parts[k_parts], particle_split_factor); - - /* Split the cooling fields */ - cooling_split_part(p, xp, particle_split_factor); - cooling_split_part(&parts[k_parts], &xparts[k_parts], - particle_split_factor); - - /* Split the star formation fields */ - star_formation_split_part(p, xp, particle_split_factor); - star_formation_split_part(&parts[k_parts], &xparts[k_parts], - particle_split_factor); - - /* Split the tracer fields */ - tracers_split_part(p, xp, particle_split_factor); - tracers_split_part(&parts[k_parts], &xparts[k_parts], - particle_split_factor); - - /* Mark the particles as not having been swallowed */ - black_holes_mark_part_as_not_swallowed(&p->black_holes_data); - black_holes_mark_part_as_not_swallowed(&parts[k_parts].black_holes_data); - - /* Move to the next free slot */ - k_parts++; - if (with_gravity) k_gparts++; - } - } + struct data_split data_split = {e, mass_threshold, &k_parts, &k_gparts, 0}; + lock_init(&data_split.lock); + threadpool_map(&e->threadpool, engine_split_gas_particle_split_mapper, + s->parts, nr_parts_old, sizeof(struct part), 0, &data_split); + if (lock_destroy(&data_split.lock) != 0) error("Error destroying lock"); /* Update the local counters */ s->nr_parts = k_parts; diff --git a/src/engine_unskip.c b/src/engine_unskip.c index 22bc55f34e0bc51711f770d4d7a0d2b2a629f778..9142ff447ac023d0428d466029110233cef37149 100644 --- a/src/engine_unskip.c +++ b/src/engine_unskip.c @@ -383,7 +383,8 @@ void engine_unskip(struct engine *e) { /* Activate all the regular tasks */ threadpool_map(&e->threadpool, engine_do_unskip_mapper, local_active_cells, - num_active_cells * multiplier, sizeof(int), 1, &data); + num_active_cells * multiplier, sizeof(int), /*chunk=*/1, + &data); #ifdef WITH_PROFILER ProfilerStop(); @@ -444,7 +445,8 @@ void engine_unskip_timestep_communications(struct engine *e) { /* Activate all the part and gpart ti_end tasks */ threadpool_map(&e->threadpool, engine_unskip_timestep_communications_mapper, - tasks, nr_tasks, sizeof(struct task), 0, s); + tasks, nr_tasks, sizeof(struct task), + threadpool_auto_chunk_size, s); if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), diff --git a/src/fof.c b/src/fof.c index 51f35227d4441971e25f46dbc6e57ccfaddb43b7..ff15262bab336f336af47d3cc2e3710b1bee355a 100644 --- a/src/fof.c +++ b/src/fof.c @@ -324,8 +324,8 @@ void fof_allocate(const struct space *s, const long long total_nr_DM_particles, /* Set initial group index */ threadpool_map(&s->e->threadpool, fof_set_initial_group_index_mapper, - props->group_index, s->nr_gparts, sizeof(size_t), 0, - props->group_index); + props->group_index, s->nr_gparts, sizeof(size_t), + threadpool_auto_chunk_size, props->group_index); if (verbose) message("Setting initial group index took: %.3f %s.", @@ -335,7 +335,8 @@ void fof_allocate(const struct space *s, const long long total_nr_DM_particles, /* Set initial group sizes */ threadpool_map(&s->e->threadpool, fof_set_initial_group_size_mapper, - props->group_size, s->nr_gparts, sizeof(size_t), 0, NULL); + props->group_size, s->nr_gparts, sizeof(size_t), + threadpool_auto_chunk_size, NULL); if (verbose) message("Setting initial group sizes took: %.3f %s.", @@ -1759,7 +1760,8 @@ void fof_calc_group_mass(struct fof_props *props, const struct space *s, /* Increment the group mass for groups above min_group_size. */ threadpool_map(&s->e->threadpool, fof_calc_group_mass_mapper, gparts, - nr_gparts, sizeof(struct gpart), 0, (struct space *)s); + nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size, + (struct space *)s); /* Loop over particles and find the densest particle in each group. */ /* JSW TODO: Parallelise with threadpool*/ @@ -2278,7 +2280,8 @@ void fof_search_foreign_cells(struct fof_props *props, const struct space *s) { data.nr_gparts = nr_gparts; data.space_gparts = s->gparts; threadpool_map(&e->threadpool, fof_set_outgoing_root_mapper, local_cells, - num_cells_out, sizeof(struct cell **), 0, &data); + num_cells_out, sizeof(struct cell **), + threadpool_auto_chunk_size, &data); if (verbose) message( @@ -2636,7 +2639,8 @@ void fof_search_tree(struct fof_props *props, ticks tic_calc_group_size = getticks(); threadpool_map(&s->e->threadpool, fof_calc_group_size_mapper, gparts, - nr_gparts, sizeof(struct gpart), 0, s); + nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size, + s); if (verbose) message("FOF calc group size took (FOF SCALING): %.3f %s.", clocks_from_ticks(getticks() - tic_calc_group_size), @@ -2778,7 +2782,7 @@ void fof_search_tree(struct fof_props *props, /* Set default group ID for all particles */ threadpool_map(&s->e->threadpool, fof_set_initial_group_id_mapper, s->gparts, - s->nr_gparts, sizeof(struct gpart), 0, + s->nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size, (void *)&group_id_default); if (verbose) diff --git a/src/gravity.c b/src/gravity.c index 53ab6b816f964e4e2b071df1e3192d972c5567de..da68796e68f748cfdcdf37688911aa03ca11f003 100644 --- a/src/gravity.c +++ b/src/gravity.c @@ -36,6 +36,7 @@ /* Local headers. */ #include "active.h" #include "error.h" +#include "threadpool.h" #include "version.h" struct exact_force_data { @@ -615,7 +616,8 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) { data.const_G = e->physical_constants->const_newton_G; threadpool_map(&s->e->threadpool, gravity_exact_force_compute_mapper, - s->gparts, s->nr_gparts, sizeof(struct gpart), 0, &data); + s->gparts, s->nr_gparts, sizeof(struct gpart), + threadpool_auto_chunk_size, &data); message("Computed exact gravity for %d gparts (took %.3f %s). ", data.counter_global, clocks_from_ticks(getticks() - tic), diff --git a/src/logger_io.c b/src/logger_io.c index e227db3b169767ad4a88514d136a9593b476b64d..8921304c062b75b67344d06ef3c4203a931eba13 100644 --- a/src/logger_io.c +++ b/src/logger_io.c @@ -54,6 +54,7 @@ #include "serial_io.h" #include "single_io.h" #include "stars_io.h" +#include "threadpool.h" #include "tracers_io.h" #include "units.h" #include "version.h" @@ -128,7 +129,7 @@ void writeIndexArray(const struct engine* e, FILE* f, struct io_props* props, /* Copy the whole thing into a buffer */ threadpool_map((struct threadpool*)&e->threadpool, logger_io_copy_mapper, - temp, N, typeSize, 0, props); + temp, N, typeSize, threadpool_auto_chunk_size, props); /* Write data to file */ fwrite(temp, typeSize, num_elements, f); diff --git a/src/memuse.c b/src/memuse.c index 00a2a5f879a994d96e0747eba55c56a3161a6b86..c3eefbf045b84918ace42d917e8a9bccb3a0f02a 100644 --- a/src/memuse.c +++ b/src/memuse.c @@ -34,6 +34,11 @@ #include <sys/types.h> #include <unistd.h> +#ifndef SWIFT_MEMUSE_STATM +#include <sys/resource.h> +#include <sys/time.h> +#endif + /* Local defines. */ #include "memuse.h" @@ -408,7 +413,8 @@ void memuse_log_dump_error(int rank) { /** * @brief parse the process /proc/self/statm file to get the process - * memory use (in KB). Top field in (). + * memory use (in KB). Top field in (). If this file is not + * available only the resident field will be returned. * * @param size total virtual memory (VIRT/VmSize) * @param resident resident non-swapped memory (RES/VmRSS) @@ -423,6 +429,8 @@ void memuse_log_dump_error(int rank) { void memuse_use(long *size, long *resident, long *shared, long *text, long *data, long *library, long *dirty) { +#ifdef SWIFT_MEMUSE_STATM + /* Open the file. */ FILE *file = fopen("/proc/self/statm", "r"); if (file != NULL) { @@ -432,29 +440,38 @@ void memuse_use(long *size, long *resident, long *shared, long *text, if (nscan == 7) { /* Convert pages into bytes. Usually 4096, but could be 512 on some * systems so take care in conversion to KB. */ - long sz = sysconf(_SC_PAGESIZE); - *size *= sz; - *resident *= sz; - *shared *= sz; - *text *= sz; - *library *= sz; - *data *= sz; - *dirty *= sz; - - *size /= 1024; - *resident /= 1024; - *shared /= 1024; - *text /= 1024; - *library /= 1024; - *data /= 1024; - *dirty /= 1024; + uint64_t sz = sysconf(_SC_PAGESIZE); + *size = (*size) * sz / 1024; + *resident = (*resident) * sz / 1024; + *shared = (*shared) * sz / 1024; + *text = (*text) * sz / 1024; + *library = (*library) * sz / 1024; + *data = (*data) * sz / 1024; + *dirty = (*dirty) * sz / 1024; + } else { error("Failed to read sufficient fields from /proc/self/statm"); } fclose(file); + } else { error("Failed to open /proc/self/statm"); } +#else + + /* Not a Linux compatible OS, try to use very limited POSIX call instead. + * Linux only claims to support ru_maxrss, and POSIX only ru_utime and + * ru_stime, so this may still fail. */ + struct rusage usage; + getrusage(RUSAGE_SELF, &usage); + *size = 0; + *resident = usage.ru_maxrss; + *shared = usage.ru_ixrss; + *text = 0; + *library = 0; + *data = usage.ru_isrss; + *dirty = 0; +#endif } /** @@ -469,13 +486,8 @@ void memuse_use(long *size, long *resident, long *shared, long *text, */ const char *memuse_process(int inmb) { static char buffer[256]; - long size; - long resident; - long shared; - long text; - long library; - long data; - long dirty; + + long size, resident, shared, text, library, data, dirty; memuse_use(&size, &resident, &shared, &text, &data, &library, &dirty); if (inmb) { diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index 9efd8c03966cdfb230f48bedaeb2e4cfab3c1368..0efec0bc80fc381d9bd6ff733cc546e1885c1d9e 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -37,6 +37,7 @@ #include "part.h" #include "runner.h" #include "space.h" +#include "threadpool.h" #ifdef HAVE_FFTW @@ -51,10 +52,9 @@ * @param k Index along z. * @param N Size of the array along one axis. */ -__attribute__((always_inline)) INLINE static int row_major_id_periodic(int i, - int j, - int k, - int N) { +__attribute__((always_inline, const)) INLINE static int row_major_id_periodic( + const int i, const int j, const int k, const int N) { + return (((i + N) % N) * N * N + ((j + N) % N) * N + ((k + N) % N)); } @@ -72,9 +72,10 @@ __attribute__((always_inline)) INLINE static int row_major_id_periodic(int i, * @param dy Second CIC coefficient along y * @param dz Second CIC coefficient along z */ -__attribute__((always_inline)) INLINE static double CIC_get( - double mesh[6][6][6], int i, int j, int k, double tx, double ty, double tz, - double dx, double dy, double dz) { +__attribute__((always_inline, const)) INLINE static double CIC_get( + double mesh[6][6][6], const int i, const int j, const int k, + const double tx, const double ty, const double tz, const double dx, + const double dy, const double dz) { double temp; temp = mesh[i + 0][j + 0][k + 0] * tx * ty * tz; @@ -106,8 +107,9 @@ __attribute__((always_inline)) INLINE static double CIC_get( * @param value The value to interpolate. */ __attribute__((always_inline)) INLINE static void CIC_set( - double* mesh, int N, int i, int j, int k, double tx, double ty, double tz, - double dx, double dy, double dz, double value) { + double* mesh, const int N, const int i, const int j, const int k, + const double tx, const double ty, const double tz, const double dx, + const double dy, const double dz, const double value) { /* Classic CIC interpolation */ atomic_add_d(&mesh[row_major_id_periodic(i + 0, j + 0, k + 0, N)], @@ -137,8 +139,9 @@ __attribute__((always_inline)) INLINE static void CIC_set( * @param fac The width of a mesh cell. * @param dim The dimensions of the simulation box. */ -INLINE static void gpart_to_mesh_CIC(const struct gpart* gp, double* rho, int N, - double fac, const double dim[3]) { +INLINE static void gpart_to_mesh_CIC(const struct gpart* gp, double* rho, + const int N, const double fac, + const double dim[3]) { /* Box wrap the multipole's position */ const double pos_x = box_wrap(gp->x[0], 0., dim[0]); @@ -183,8 +186,9 @@ INLINE static void gpart_to_mesh_CIC(const struct gpart* gp, double* rho, int N, * @param fac The width of a mesh cell. * @param dim The dimensions of the simulation box. */ -void cell_gpart_to_mesh_CIC(const struct cell* c, double* rho, int N, - double fac, const double dim[3]) { +void cell_gpart_to_mesh_CIC(const struct cell* c, double* rho, const int N, + const double fac, const double dim[3]) { + const int gcount = c->grav.count; const struct gpart* gparts = c->grav.parts; @@ -253,8 +257,8 @@ void cell_gpart_to_mesh_CIC_mapper(void* map_data, int num, void* extra) { * @param fac width of a mesh cell. * @param dim The dimensions of the simulation box. */ -void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, int N, double fac, - const double dim[3]) { +void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N, + const double fac, const double dim[3]) { /* Box wrap the gpart's position */ const double pos_x = box_wrap(gp->x[0], 0., dim[0]); @@ -342,6 +346,139 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, int N, double fac, #endif } +/** + * @brief Shared information about the Green function to be used by all the + * threads in the pool. + */ +struct Green_function_data { + + int N; + fftw_complex* frho; + double green_fac; + double a_smooth2; + double k_fac; +}; + +/** + * @brief Mapper function for the application of the Green function. + * + * @param map_data The array of the density field Fourier transform. + * @param num The number of elements to iterate on (along the x-axis). + * @param extra The properties of the Green function. + */ +void mesh_apply_Green_function_mapper(void* map_data, const int num, + void* extra) { + + struct Green_function_data* data = (struct Green_function_data*)extra; + + /* Unpack the array */ + fftw_complex* const frho = data->frho; + const int N = data->N; + const int N_half = N / 2; + + /* Unpack the Green function properties */ + const double green_fac = data->green_fac; + const double a_smooth2 = data->a_smooth2; + const double k_fac = data->k_fac; + + /* Range handled by this call */ + const int i_start = (fftw_complex*)map_data - frho; + const int i_end = i_start + num; + + /* Loop over the x range corresponding to this thread */ + for (int i = i_start; i < i_end; ++i) { + + /* kx component of vector in Fourier space and 1/sinc(kx) */ + const int kx = (i > N_half ? i - N : i); + const double kx_d = (double)kx; + const double fx = k_fac * kx_d; + const double sinc_kx_inv = (kx != 0) ? fx / sin(fx) : 1.; + + for (int j = 0; j < N; ++j) { + + /* ky component of vector in Fourier space and 1/sinc(ky) */ + const int ky = (j > N_half ? j - N : j); + const double ky_d = (double)ky; + const double fy = k_fac * ky_d; + const double sinc_ky_inv = (ky != 0) ? fy / sin(fy) : 1.; + + for (int k = 0; k < N_half + 1; ++k) { + + /* kz component of vector in Fourier space and 1/sinc(kz) */ + const int kz = (k > N_half ? k - N : k); + const double kz_d = (double)kz; + const double fz = k_fac * kz_d; + const double sinc_kz_inv = (kz != 0) ? fz / (sin(fz) + FLT_MIN) : 1.; + + /* Norm of vector in Fourier space */ + const double k2 = (kx_d * kx_d + ky_d * ky_d + kz_d * kz_d); + + /* Avoid FPEs... */ + if (k2 == 0.) continue; + + /* Green function */ + double W = 1.; + fourier_kernel_long_grav_eval(k2 * a_smooth2, &W); + const double green_cor = green_fac * W / (k2 + FLT_MIN); + + /* Deconvolution of CIC */ + const double CIC_cor = sinc_kx_inv * sinc_ky_inv * sinc_kz_inv; + const double CIC_cor2 = CIC_cor * CIC_cor; + const double CIC_cor4 = CIC_cor2 * CIC_cor2; + + /* Combined correction */ + const double total_cor = green_cor * CIC_cor4; + + /* Apply to the mesh */ + const int index = N * (N_half + 1) * i + (N_half + 1) * j + k; + frho[index][0] *= total_cor; + frho[index][1] *= total_cor; + } + } + } +} + +/** + * @brief Apply the Green function in Fourier space to the density + * array to get the potential. + * + * Also deconvolves the CIC kernel. + * + * @param tp The threadpool. + * @param frho The NxNx(N/2) complex array of the Fourier transform of the + * density field. + * @param N The dimension of the array. + * @param r_s The Green function smoothing scale. + * @param box_size The physical size of the simulation box. + */ +void mesh_apply_Green_function(struct threadpool* tp, fftw_complex* frho, + const int N, const double r_s, + const double box_size) { + + /* Some common factors */ + struct Green_function_data data; + data.frho = frho; + data.N = N; + data.green_fac = -1. / (M_PI * box_size); + data.a_smooth2 = 4. * M_PI * M_PI * r_s * r_s / (box_size * box_size); + data.k_fac = M_PI / (double)N; + + /* Parallelize the Green function application using the threadpool + to split the x-axis loop over the threads. + The array is N x N x (N/2). We use the thread to each deal with + a range [i_min, i_max[ x N x (N/2) */ + if (N < 32) { + mesh_apply_Green_function_mapper(frho, N, &data); + } else { + threadpool_map(tp, mesh_apply_Green_function_mapper, frho, N, + sizeof(fftw_complex), threadpool_auto_chunk_size, &data); + } + + /* Correct singularity at (0,0,0) */ + frho[0][0] = 0.; + frho[0][1] = 0.; +} + #endif /** @@ -359,7 +496,7 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, int N, double fac, * @param verbose Are we talkative? */ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, - struct threadpool* tp, int verbose) { + struct threadpool* tp, const int verbose) { #ifdef HAVE_FFTW @@ -416,7 +553,8 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, /* Do a parallel CIC mesh assignment of the gparts but only using the local top-level cells */ threadpool_map(tp, cell_gpart_to_mesh_CIC_mapper, (void*)local_cells, - nr_local_cells, sizeof(int), 0, (void*)&data); + nr_local_cells, sizeof(int), threadpool_auto_chunk_size, + (void*)&data); if (verbose) message("Gpart assignment took %.3f %s.", @@ -432,7 +570,7 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, MPI_COMM_WORLD); if (verbose) - message("Mesh comunication took %.3f %s.", + message("Mesh communication took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); #endif @@ -453,66 +591,8 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, tic = getticks(); - /* Some common factors */ - const double green_fac = -1. / (M_PI * box_size); - const double a_smooth2 = 4. * M_PI * M_PI * r_s * r_s / (box_size * box_size); - const double k_fac = M_PI / (double)N; - /* Now de-convolve the CIC kernel and apply the Green function */ - for (int i = 0; i < N; ++i) { - - /* kx component of vector in Fourier space and 1/sinc(kx) */ - const int kx = (i > N_half ? i - N : i); - const double kx_d = (double)kx; - const double fx = k_fac * kx_d; - const double sinc_kx_inv = (kx != 0) ? fx / sin(fx) : 1.; - - for (int j = 0; j < N; ++j) { - - /* ky component of vector in Fourier space and 1/sinc(ky) */ - const int ky = (j > N_half ? j - N : j); - const double ky_d = (double)ky; - const double fy = k_fac * ky_d; - const double sinc_ky_inv = (ky != 0) ? fy / sin(fy) : 1.; - - for (int k = 0; k < N_half + 1; ++k) { - - /* kz component of vector in Fourier space and 1/sinc(kz) */ - const int kz = (k > N_half ? k - N : k); - const double kz_d = (double)kz; - const double fz = k_fac * kz_d; - const double sinc_kz_inv = (kz != 0) ? fz / (sin(fz) + FLT_MIN) : 1.; - - /* Norm of vector in Fourier space */ - const double k2 = (kx_d * kx_d + ky_d * ky_d + kz_d * kz_d); - - /* Avoid FPEs... */ - if (k2 == 0.) continue; - - /* Green function */ - double W = 1.; - fourier_kernel_long_grav_eval(k2 * a_smooth2, &W); - const double green_cor = green_fac * W / (k2 + FLT_MIN); - - /* Deconvolution of CIC */ - const double CIC_cor = sinc_kx_inv * sinc_ky_inv * sinc_kz_inv; - const double CIC_cor2 = CIC_cor * CIC_cor; - const double CIC_cor4 = CIC_cor2 * CIC_cor2; - - /* Combined correction */ - const double total_cor = green_cor * CIC_cor4; - - /* Apply to the mesh */ - const int index = N * (N_half + 1) * i + (N_half + 1) * j + k; - frho[index][0] *= total_cor; - frho[index][1] *= total_cor; - } - } - } - - /* Correct singularity at (0,0,0) */ - frho[0][0] = 0.; - frho[0][1] = 0.; + mesh_apply_Green_function(tp, frho, N, r_s, box_size); if (verbose) message("Applying Green function took %.3f %s.", @@ -534,7 +614,7 @@ void pm_mesh_compute_potential(struct pm_mesh* mesh, const struct space* s, mesh->potential = rho; /* message("\n\n\n POTENTIAL"); */ - /* print_array(potential, N); */ + /* print_array(mesh->potential, N); */ /* Clean-up the mess */ fftw_destroy_plan(forward_plan); diff --git a/src/mpiuse.c b/src/mpiuse.c index f32d9a069e680e0cffc110e288b816ca6475b276..0688affa1b17b0298b5cfe5dafe8ce28f5e16032 100644 --- a/src/mpiuse.c +++ b/src/mpiuse.c @@ -351,4 +351,16 @@ void mpiuse_log_dump(const char *filename, ticks stepticks) { // clocks_getunit()); } +/** + * @brief dump the log for using the given rank to generate a standard + * name for the output. Used when exiting in error. + * + * @param rank the rank exiting in error. + */ +void mpiuse_log_dump_error(int rank) { + char filename[60]; + sprintf(filename, "mpiuse-error-report-rank%d.txt", rank); + mpiuse_log_dump(filename, clocks_start_ticks); +} + #endif /* defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) */ diff --git a/src/mpiuse.h b/src/mpiuse.h index 20c6d05e1cc2d6b3995fbfc1dc2f69809ed093c9..812af6a78ea98d138f45638e442f100fe23d8596 100644 --- a/src/mpiuse.h +++ b/src/mpiuse.h @@ -33,6 +33,7 @@ void mpiuse_log_dump(const char *filename, ticks stepticks); void mpiuse_log_allocation(int type, int subtype, void *ptr, int activation, size_t size, int otherrank, int tag); +void mpiuse_log_dump_error(int rank); #else /* No-op when not reporting. */ diff --git a/src/part.c b/src/part.c index cb73aba1afa958e7a5e65919af819687ba9df863..f172708c4b43ccfb3647f743811b3afeb6149be2 100644 --- a/src/part.c +++ b/src/part.c @@ -32,6 +32,7 @@ #include "error.h" #include "hydro.h" #include "part.h" +#include "threadpool.h" /** * @brief Re-link the #gpart%s associated with the list of #part%s. @@ -40,8 +41,8 @@ * @param N The number of particles to re-link; * @param offset The offset of #part%s relative to the global parts list. */ -void part_relink_gparts_to_parts(struct part *parts, size_t N, - ptrdiff_t offset) { +void part_relink_gparts_to_parts(struct part *parts, const size_t N, + const ptrdiff_t offset) { for (size_t k = 0; k < N; k++) { if (parts[k].gpart) { parts[k].gpart->id_or_neg_offset = -(k + offset); @@ -56,8 +57,8 @@ void part_relink_gparts_to_parts(struct part *parts, size_t N, * @param N The number of s-particles to re-link; * @param offset The offset of #spart%s relative to the global sparts list. */ -void part_relink_gparts_to_sparts(struct spart *sparts, size_t N, - ptrdiff_t offset) { +void part_relink_gparts_to_sparts(struct spart *sparts, const size_t N, + const ptrdiff_t offset) { for (size_t k = 0; k < N; k++) { if (sparts[k].gpart) { sparts[k].gpart->id_or_neg_offset = -(k + offset); @@ -72,8 +73,8 @@ void part_relink_gparts_to_sparts(struct spart *sparts, size_t N, * @param N The number of s-particles to re-link; * @param offset The offset of #bpart%s relative to the global bparts list. */ -void part_relink_gparts_to_bparts(struct bpart *bparts, size_t N, - ptrdiff_t offset) { +void part_relink_gparts_to_bparts(struct bpart *bparts, const size_t N, + const ptrdiff_t offset) { for (size_t k = 0; k < N; k++) { if (bparts[k].gpart) { bparts[k].gpart->id_or_neg_offset = -(k + offset); @@ -88,7 +89,7 @@ void part_relink_gparts_to_bparts(struct bpart *bparts, size_t N, * @param N The number of particles to re-link; * @param parts The global #part array in which to find the #gpart offsets. */ -void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, +void part_relink_parts_to_gparts(struct gpart *gparts, const size_t N, struct part *parts) { for (size_t k = 0; k < N; k++) { if (gparts[k].type == swift_type_gas) { @@ -104,7 +105,7 @@ void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, * @param N The number of particles to re-link; * @param sparts The global #spart array in which to find the #gpart offsets. */ -void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, +void part_relink_sparts_to_gparts(struct gpart *gparts, const size_t N, struct spart *sparts) { for (size_t k = 0; k < N; k++) { if (gparts[k].type == swift_type_stars) { @@ -120,7 +121,7 @@ void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, * @param N The number of particles to re-link; * @param bparts The global #bpart array in which to find the #gpart offsets. */ -void part_relink_bparts_to_gparts(struct gpart *gparts, size_t N, +void part_relink_bparts_to_gparts(struct gpart *gparts, const size_t N, struct bpart *bparts) { for (size_t k = 0; k < N; k++) { if (gparts[k].type == swift_type_black_hole) { @@ -130,19 +131,34 @@ void part_relink_bparts_to_gparts(struct gpart *gparts, size_t N, } /** - * @brief Re-link both the #part%s and #spart%s associated with the list of - * #gpart%s. + * @brief Helper structure to pass data to the liking mapper functions. + */ +struct relink_data { + struct part *const parts; + struct gpart *const garts; + struct spart *const sparts; + struct bpart *const bparts; +}; + +/** + * @brief #threadpool mapper function for the linking of all particle types + * to the #gpart array. * - * @param gparts The list of #gpart. - * @param N The number of particles to re-link; - * @param parts The global #part array in which to find the #gpart offsets. - * @param sparts The global #spart array in which to find the #gpart offsets. - * @param bparts The global #bpart array in which to find the #gpart offsets. + * @brief map_data The array of #gpart. + * @brief count The number of #gpart. + * @brief extra_data the #relink_data containing pointer to the other arrays. */ -void part_relink_all_parts_to_gparts(struct gpart *gparts, size_t N, - struct part *parts, struct spart *sparts, - struct bpart *bparts) { - for (size_t k = 0; k < N; k++) { +void part_relink_all_parts_to_gparts_mapper(void *restrict map_data, int count, + void *restrict extra_data) { + + /* Un-pack the data */ + struct relink_data *data = (struct relink_data *)extra_data; + struct part *const parts = data->parts; + struct spart *const sparts = data->sparts; + struct bpart *const bparts = data->bparts; + struct gpart *const gparts = (struct gpart *)map_data; + + for (int k = 0; k < count; k++) { if (gparts[k].type == swift_type_gas) { parts[-gparts[k].id_or_neg_offset].gpart = &gparts[k]; } else if (gparts[k].type == swift_type_stars) { @@ -153,6 +169,31 @@ void part_relink_all_parts_to_gparts(struct gpart *gparts, size_t N, } } +/** + * @brief Re-link both the #part%s, #spart%s and #bpart%s associated + * with the list of #gpart%s. + * + * This function uses thread parallelism and should not be called inside + * an already threaded section (unlike the functions linking individual arrays + * that are designed to be called in thread-parallel code). + * + * @param gparts The list of #gpart. + * @param N The number of particles to re-link; + * @param parts The global #part array in which to find the #gpart offsets. + * @param sparts The global #spart array in which to find the #gpart offsets. + * @param bparts The global #bpart array in which to find the #gpart offsets. + * @param tp The #threadpool object. + */ +void part_relink_all_parts_to_gparts(struct gpart *gparts, const size_t N, + struct part *parts, struct spart *sparts, + struct bpart *bparts, + struct threadpool *tp) { + + struct relink_data data = {parts, /*gparts=*/NULL, sparts, bparts}; + threadpool_map(tp, part_relink_all_parts_to_gparts_mapper, gparts, N, + sizeof(struct gpart), 0, &data); +} + /** * @brief Verifies that the #gpart, #part and #spart are correctly linked * together diff --git a/src/part.h b/src/part.h index d0ff99089526c223f148d32058003edaf474126d..3f2d6371156e5e259f095f88ac744be50be845eb 100644 --- a/src/part.h +++ b/src/part.h @@ -32,9 +32,10 @@ /* Local headers. */ #include "align.h" -#include "fof.h" #include "part_type.h" -#include "timeline.h" + +/* Pre-declarations */ +struct threadpool; /* Some constants. */ #define part_align 128 @@ -119,21 +120,22 @@ #error "Invalid choice of black hole particle" #endif -void part_relink_gparts_to_parts(struct part *parts, size_t N, - ptrdiff_t offset); -void part_relink_gparts_to_sparts(struct spart *sparts, size_t N, - ptrdiff_t offset); -void part_relink_gparts_to_bparts(struct bpart *bparts, size_t N, - ptrdiff_t offset); -void part_relink_parts_to_gparts(struct gpart *gparts, size_t N, +void part_relink_gparts_to_parts(struct part *parts, const size_t N, + const ptrdiff_t offset); +void part_relink_gparts_to_sparts(struct spart *sparts, const size_t N, + const ptrdiff_t offset); +void part_relink_gparts_to_bparts(struct bpart *bparts, const size_t N, + const ptrdiff_t offset); +void part_relink_parts_to_gparts(struct gpart *gparts, const size_t N, struct part *parts); -void part_relink_sparts_to_gparts(struct gpart *gparts, size_t N, +void part_relink_sparts_to_gparts(struct gpart *gparts, const size_t N, struct spart *sparts); -void part_relink_bparts_to_gparts(struct gpart *gparts, size_t N, +void part_relink_bparts_to_gparts(struct gpart *gparts, const size_t N, struct bpart *bparts); -void part_relink_all_parts_to_gparts(struct gpart *gparts, size_t N, +void part_relink_all_parts_to_gparts(struct gpart *gparts, const size_t N, struct part *parts, struct spart *sparts, - struct bpart *bparts); + struct bpart *bparts, + struct threadpool *tp); void part_verify_links(struct part *parts, struct gpart *gparts, struct spart *sparts, struct bpart *bparts, size_t nr_parts, size_t nr_gparts, size_t nr_sparts, diff --git a/src/partition.c b/src/partition.c index 2ee321c872e6c34ff4ea82a9369c4637d56bb1d9..9004adb2adb54e980174608ffb30d546c69911ed 100644 --- a/src/partition.c +++ b/src/partition.c @@ -60,13 +60,15 @@ #include "partition.h" #include "restart.h" #include "space.h" +#include "threadpool.h" #include "tools.h" /* Simple descriptions of initial partition types for reports. */ const char *initial_partition_name[] = { "axis aligned grids of cells", "vectorized point associated cells", "memory balanced, using particle weighted cells", - "similar sized regions, using unweighted cells"}; + "similar sized regions, using unweighted cells", + "memory and edge balanced cells using particle weights"}; /* Simple descriptions of repartition types for reports. */ const char *repartition_name[] = { @@ -399,55 +401,158 @@ static void ACCUMULATE_SIZES_MAPPER(gpart); */ static void ACCUMULATE_SIZES_MAPPER(spart); +/* qsort support. */ +static int ptrcmp(const void *p1, const void *p2) { + const double *v1 = *(const double **)p1; + const double *v2 = *(const double **)p2; + return (*v1) - (*v2); +} + /** * @brief Accumulate total memory size in particles per cell. * * @param s the space containing the cells. + * @param verbose whether to report any clipped cell counts. * @param counts the number of bytes in particles per cell. Should be * allocated as size s->nr_cells. */ -static void accumulate_sizes(struct space *s, double *counts) { +static void accumulate_sizes(struct space *s, int verbose, double *counts) { bzero(counts, sizeof(double) * s->nr_cells); struct counts_mapper_data mapper_data; - mapper_data.counts = counts; mapper_data.s = s; + double gsize = 0.0; + double *gcounts = NULL; + double hsize = 0.0; + double ssize = 0.0; - double hsize = (double)sizeof(struct part); - if (s->nr_parts > 0) { - mapper_data.size = hsize; - threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_part, s->parts, - s->nr_parts, sizeof(struct part), space_splitsize, - &mapper_data); - } - - double gsize = (double)sizeof(struct gpart); if (s->nr_gparts > 0) { + /* Self-gravity gets more efficient with density (see gitlab issue #640) + * so we end up with too much weight in cells with larger numbers of + * gparts, to suppress this we fix a upper weight limit based on a + * percentile clip to on the numbers of cells. Should be relatively + * harmless when not really needed. */ + if ((gcounts = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) + error("Failed to allocate gcounts buffer."); + bzero(gcounts, sizeof(double) * s->nr_cells); + gsize = (double)sizeof(struct gpart); + + mapper_data.counts = gcounts; mapper_data.size = gsize; threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_gpart, s->gparts, s->nr_gparts, sizeof(struct gpart), space_splitsize, &mapper_data); + + /* Get all the counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, gcounts, s->nr_cells, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce particle cell gpart weights."); + + /* Now we need to sort... */ + double **ptrs = NULL; + if ((ptrs = (double **)malloc(sizeof(double *) * s->nr_cells)) == NULL) + error("Failed to allocate pointers buffer."); + for (int k = 0; k < s->nr_cells; k++) { + ptrs[k] = &gcounts[k]; + } + + /* Sort pointers, not counts... */ + qsort(ptrs, s->nr_cells, sizeof(double *), ptrcmp); + + /* Percentile cut keeps 99.8% of cells and clips above. */ + int cut = ceil(s->nr_cells * 0.998); + + /* And clip. */ + int nadj = 0; + double clip = *ptrs[cut]; + for (int k = cut + 1; k < s->nr_cells; k++) { + *ptrs[k] = clip; + nadj++; + } + if (verbose) message("clipped gravity part counts of %d cells", nadj); + free(ptrs); + } + + /* Other particle types are assumed to correlate with processing time. */ + if (s->nr_parts > 0) { + mapper_data.counts = counts; + hsize = (double)sizeof(struct part); + mapper_data.size = hsize; + threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_part, s->parts, + s->nr_parts, sizeof(struct part), space_splitsize, + &mapper_data); } - double ssize = (double)sizeof(struct spart); if (s->nr_sparts > 0) { + ssize = (double)sizeof(struct spart); mapper_data.size = ssize; threadpool_map(&s->e->threadpool, accumulate_sizes_mapper_spart, s->sparts, s->nr_sparts, sizeof(struct spart), space_splitsize, &mapper_data); } + /* Merge the counts arrays across all nodes, if needed. Doesn't include any + * gparts. */ + if (s->nr_parts > 0 || s->nr_sparts > 0) { + if (MPI_Allreduce(MPI_IN_PLACE, counts, s->nr_cells, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce particle cell weights."); + } + + /* And merge with gravity counts. */ + double sum = 0.0; + if (s->nr_gparts > 0) { + for (int k = 0; k < s->nr_cells; k++) { + counts[k] += gcounts[k]; + sum += counts[k]; + } + free(gcounts); + } else { + for (int k = 0; k < s->nr_cells; k++) { + sum += counts[k]; + } + } + /* Keep the sum of particles across all ranks in the range of IDX_MAX. */ - if ((s->e->total_nr_parts * hsize + s->e->total_nr_gparts * gsize + - s->e->total_nr_sparts * ssize) > (double)IDX_MAX) { - double vscale = - (double)(IDX_MAX - 1000) / - (double)(s->e->total_nr_parts * hsize + s->e->total_nr_gparts * gsize + - s->e->total_nr_sparts * ssize); + if (sum > (double)(IDX_MAX - 10000)) { + double vscale = (double)(IDX_MAX - 10000) / sum; for (int k = 0; k < s->nr_cells; k++) counts[k] *= vscale; } } + +/** + * @brief Make edge weights from the accumulated particle sizes per cell. + * + * @param s the space containing the cells. + * @param counts the number of bytes in particles per cell. + * @param edges weights for the edges of these regions. Should be 26 * counts. + */ +static void sizes_to_edges(struct space *s, double *counts, double *edges) { + + bzero(edges, sizeof(double) * s->nr_cells * 26); + + for (int l = 0; l < s->nr_cells; l++) { + int p = 0; + for (int i = -1; i <= 1; i++) { + int isid = ((i < 0) ? 0 : ((i > 0) ? 2 : 1)); + for (int j = -1; j <= 1; j++) { + int jsid = isid * 3 + ((j < 0) ? 0 : ((j > 0) ? 2 : 1)); + for (int k = -1; k <= 1; k++) { + int ksid = jsid * 3 + ((k < 0) ? 0 : ((k > 0) ? 2 : 1)); + + /* If not self, we work out the sort indices to get the expected + * fractional weight and add that. Scale to keep sum less than + * counts and a bit of tuning... */ + if (i || j || k) { + edges[l * 26 + p] = counts[l] * sid_scale[sortlistID[ksid]] / 26.0; + p++; + } + } + } + } + } +} #endif #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) @@ -464,7 +569,7 @@ static void split_metis(struct space *s, int nregions, int *celllist) { /* To check or visualise the partition dump all the cells. */ /*if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top, - * s->nr_cells);*/ + s->nr_cells);*/ } #endif @@ -780,7 +885,7 @@ static void pick_parmetis(int nodeID, struct space *s, int nregions, /* Dump graphs to disk files for testing. */ /*dumpMETISGraph("parmetis_graph", ncells, 1, std_xadj, full_adjncy, - full_weights_v, NULL, full_weights_e);*/ + full_weights_v, NULL, full_weights_e); */ /* xadj is set for each rank, different to serial version in that each * rank starts with 0, so we need to re-offset. */ @@ -1213,7 +1318,7 @@ static void pick_metis(int nodeID, struct space *s, int nregions, /* Dump graph in METIS format */ /*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v, - NULL, weights_e);*/ + NULL, weights_e);*/ if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, NULL, weights_e, &idx_nregions, NULL, NULL, options, @@ -1482,7 +1587,8 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, ticks tic = getticks(); threadpool_map(&s->e->threadpool, partition_gather_weights, tasks, nr_tasks, - sizeof(struct task), 0, &weights_data); + sizeof(struct task), threadpool_auto_chunk_size, + &weights_data); if (s->e->verbose) message("weight mapper took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1536,22 +1642,22 @@ static void repart_edge_metis(int vweights, int eweights, int timebins, if (vweights && eweights) { if (vsum > esum) { if (vsum > (double)IDX_MAX) { - vscale = (double)(IDX_MAX - 1000) / vsum; + vscale = (double)(IDX_MAX - 10000) / vsum; escale = vscale; } } else { if (esum > (double)IDX_MAX) { - escale = (double)(IDX_MAX - 1000) / esum; + escale = (double)(IDX_MAX - 10000) / esum; vscale = escale; } } } else if (vweights) { if (vsum > (double)IDX_MAX) { - vscale = (double)(IDX_MAX - 1000) / vsum; + vscale = (double)(IDX_MAX - 10000) / vsum; } } else if (eweights) { if (esum > (double)IDX_MAX) { - escale = (double)(IDX_MAX - 1000) / esum; + escale = (double)(IDX_MAX - 10000) / esum; } } @@ -1655,17 +1761,11 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID, double *weights = NULL; if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) error("Failed to allocate cell weights buffer."); - bzero(weights, sizeof(double) * s->nr_cells); /* Check each particle and accumulate the sizes per cell. */ - accumulate_sizes(s, weights); + accumulate_sizes(s, s->e->verbose, weights); - /* Get all the counts from all the nodes. */ - if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, MPI_SUM, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to allreduce particle cell weights."); - - /* Allocate cell list for the partition. If not already done. */ + /* Allocate cell list for the partition. If not already done. */ #ifdef HAVE_PARMETIS int refine = 1; #endif @@ -1839,27 +1939,38 @@ void partition_initial_partition(struct partition *initial_partition, } } else if (initial_partition->type == INITPART_METIS_WEIGHT || + initial_partition->type == INITPART_METIS_WEIGHT_EDGE || initial_partition->type == INITPART_METIS_NOWEIGHT) { #if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS)) /* Simple k-way partition selected by METIS using cell particle * counts as weights or not. Should be best when starting with a * inhomogeneous dist. */ - - /* Space for particles sizes per cell, which will be used as weights. */ - double *weights = NULL; + double *weights_v = NULL; + double *weights_e = NULL; if (initial_partition->type == INITPART_METIS_WEIGHT) { - if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) - error("Failed to allocate weights buffer."); - bzero(weights, sizeof(double) * s->nr_cells); + /* Particles sizes per cell, which will be used as weights. */ + if ((weights_v = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) + error("Failed to allocate weights_v buffer."); + + /* Check each particle and accumulate the sizes per cell. */ + accumulate_sizes(s, s->e->verbose, weights_v); + + } else if (initial_partition->type == INITPART_METIS_WEIGHT_EDGE) { - /* Check each particle and accumilate the sizes per cell. */ - accumulate_sizes(s, weights); + /* Particle sizes also counted towards the edges. */ - /* Get all the counts from all the nodes. */ - if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, MPI_SUM, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to allreduce particle cell weights."); + if ((weights_v = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL) + error("Failed to allocate weights_v buffer."); + if ((weights_e = (double *)malloc(sizeof(double) * s->nr_cells * 26)) == + NULL) + error("Failed to allocate weights_e buffer."); + + /* Check each particle and accumulate the sizes per cell. */ + accumulate_sizes(s, s->e->verbose, weights_v); + + /* Spread these into edge weights. */ + sizes_to_edges(s, weights_v, weights_e); } /* Do the calculation. */ @@ -1868,12 +1979,13 @@ void partition_initial_partition(struct partition *initial_partition, error("Failed to allocate celllist"); #ifdef HAVE_PARMETIS if (initial_partition->usemetis) { - pick_metis(nodeID, s, nr_nodes, weights, NULL, celllist); + pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); } else { - pick_parmetis(nodeID, s, nr_nodes, weights, NULL, 0, 0, 0.0f, celllist); + pick_parmetis(nodeID, s, nr_nodes, weights_v, weights_e, 0, 0, 0.0f, + celllist); } #else - pick_metis(nodeID, s, nr_nodes, weights, NULL, celllist); + pick_metis(nodeID, s, nr_nodes, weights_v, weights_e, celllist); #endif /* And apply to our cells */ @@ -1888,7 +2000,8 @@ void partition_initial_partition(struct partition *initial_partition, partition_initial_partition(initial_partition, nodeID, nr_nodes, s); } - if (weights != NULL) free(weights); + if (weights_v != NULL) free(weights_v); + if (weights_e != NULL) free(weights_e); free(celllist); #else error("SWIFT was not compiled with METIS or ParMETIS support"); @@ -1943,7 +2056,7 @@ void partition_init(struct partition *partition, /* Defaults make use of METIS if available */ #if defined(HAVE_METIS) || defined(HAVE_PARMETIS) const char *default_repart = "fullcosts"; - const char *default_part = "memory"; + const char *default_part = "edgememory"; #else const char *default_repart = "none"; const char *default_part = "grid"; @@ -1974,10 +2087,13 @@ void partition_init(struct partition *partition, case 'm': partition->type = INITPART_METIS_WEIGHT; break; + case 'e': + partition->type = INITPART_METIS_WEIGHT_EDGE; + break; default: message("Invalid choice of initial partition type '%s'.", part_type); error( - "Permitted values are: 'grid', 'region', 'memory' or " + "Permitted values are: 'grid', 'region', 'memory', 'edgememory' or " "'vectorized'"); #else default: diff --git a/src/partition.h b/src/partition.h index de0d95a5e343f1aa85a03c2cda49019f2fd08037..8f6dbbd148d510650578ff0a857f9fd29c3e3c25 100644 --- a/src/partition.h +++ b/src/partition.h @@ -28,7 +28,8 @@ enum partition_type { INITPART_GRID = 0, INITPART_VECTORIZE, INITPART_METIS_WEIGHT, - INITPART_METIS_NOWEIGHT + INITPART_METIS_NOWEIGHT, + INITPART_METIS_WEIGHT_EDGE }; /* Simple descriptions of types for reports. */ diff --git a/src/proxy.c b/src/proxy.c index 338f1beaa8734168d8139902a2746cd00250bffd..4b8fdfcbd8f6e96a0d32cad55d563cc809471d20 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -43,6 +43,7 @@ #include "error.h" #include "memuse.h" #include "space.h" +#include "threadpool.h" #ifdef WITH_MPI /* MPI data type for the communications */ @@ -394,7 +395,7 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, /* Run through the cells and get the size of the ones that will be sent off. */ threadpool_map(&s->e->threadpool, proxy_cells_count_mapper, s->cells_top, - s->nr_cells, sizeof(struct cell), /*chunk=*/0, + s->nr_cells, sizeof(struct cell), threadpool_auto_chunk_size, /*extra_data=*/NULL); int count_out = 0; int *offset = @@ -421,7 +422,8 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, /* Pack the cells. */ struct pack_mapper_data data = {s, offset, pcells, with_gravity}; threadpool_map(&s->e->threadpool, proxy_cells_pack_mapper, s->cells_top, - s->nr_cells, sizeof(struct cell), /*chunk=*/0, &data); + s->nr_cells, sizeof(struct cell), threadpool_auto_chunk_size, + &data); if (s->e->verbose) message("Packing cells took %.3f %s.", clocks_from_ticks(getticks() - tic2), @@ -429,7 +431,7 @@ void proxy_cells_exchange(struct proxy *proxies, int num_proxies, /* Launch the first part of the exchange. */ threadpool_map(&s->e->threadpool, proxy_cells_exchange_first_mapper, proxies, - num_proxies, sizeof(struct proxy), /*chunk=*/0, + num_proxies, sizeof(struct proxy), threadpool_auto_chunk_size, /*extra_data=*/NULL); for (int k = 0; k < num_proxies; k++) { reqs_in[k] = proxies[k].req_cells_count_in; diff --git a/src/queue.c b/src/queue.c index dcb8d450500110dab7d7d5b11bae7e402cd683c2..8f532a6f24d833dcad7fa1136a1d5400b01de430 100644 --- a/src/queue.c +++ b/src/queue.c @@ -308,3 +308,33 @@ void queue_clean(struct queue *q) { free(q->entries); free(q->tid_incoming); } + +/** + * @brief Dump a formatted list of tasks in the queue to the given file stream. + * + * @param nodeID the node id of this rank. + * @param index a number for this queue, added to the output. + * @param file the FILE stream, should opened for write. + * @param q The task #queue. + */ +void queue_dump(int nodeID, int index, FILE *file, struct queue *q) { + + swift_lock_type *qlock = &q->lock; + + /* Grab the queue lock. */ + if (lock_lock(qlock) != 0) error("Locking the qlock failed.\n"); + + /* Fill any tasks from the incoming DEQ. */ + queue_get_incoming(q); + + /* Loop over the queue entries. */ + for (int k = 0; k < q->count; k++) { + struct task *t = &q->tasks[q->tid[k]]; + + fprintf(file, "%d %d %d %s %s %.2f\n", nodeID, index, k, + taskID_names[t->type], subtaskID_names[t->subtype], t->weight); + } + + /* Release the task lock. */ + if (lock_unlock(qlock) != 0) error("Unlocking the qlock failed.\n"); +} diff --git a/src/queue.h b/src/queue.h index bcc3134610590f43f27163eb52e7e171ea5287fc..0576403bef8ed66dd408d40748435530155a7901 100644 --- a/src/queue.h +++ b/src/queue.h @@ -84,4 +84,6 @@ void queue_init(struct queue *q, struct task *tasks); void queue_insert(struct queue *q, struct task *t); void queue_clean(struct queue *q); +void queue_dump(int nodeID, int index, FILE *file, struct queue *q); + #endif /* SWIFT_QUEUE_H */ diff --git a/src/runner_ghost.c b/src/runner_ghost.c index 2aaf172ea46aa285603270ca5f8fdb1d2b10c1c7..a1f38c9936578e63a31097cdd1b577c8fc6f762b 100644 --- a/src/runner_ghost.c +++ b/src/runner_ghost.c @@ -472,7 +472,7 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { * Therefore we need to update h_max between the super- and top-levels */ if (c->stars.ghost) { for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->stars.h_max, h_max); + atomic_max_f(&tmp->stars.h_max, h_max); } } @@ -783,7 +783,7 @@ void runner_do_black_holes_density_ghost(struct runner *r, struct cell *c, * Therefore we need to update h_max between the super- and top-levels */ if (c->black_holes.density_ghost) { for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->black_holes.h_max, h_max); + atomic_max_f(&tmp->black_holes.h_max, h_max); } } @@ -809,7 +809,8 @@ void runner_do_black_holes_swallow_ghost(struct runner *r, struct cell *c, TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active_hydro(c, e)) return; + if (c->black_holes.count == 0) return; + if (!cell_is_active_black_holes(c, e)) return; /* Recurse? */ if (c->split) { @@ -1378,7 +1379,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { * Therefore we need to update h_max between the super- and top-levels */ if (c->hydro.ghost) { for (struct cell *tmp = c->parent; tmp != NULL; tmp = tmp->parent) { - atomic_max_d(&tmp->hydro.h_max, h_max); + atomic_max_f(&tmp->hydro.h_max, h_max); } } diff --git a/src/runner_others.c b/src/runner_others.c index 2f7a82e2ff0158e99987b25c3bc9c6e7f3c15dec..6dcc92d8a492621346f92e455bbb02fae88e1fb0 100644 --- a/src/runner_others.c +++ b/src/runner_others.c @@ -45,6 +45,7 @@ #include "engine.h" #include "error.h" #include "feedback.h" +#include "fof.h" #include "gravity.h" #include "hydro.h" #include "logger.h" @@ -145,6 +146,7 @@ void runner_do_grav_mesh(struct runner *r, struct cell *c, int timer) { * @param timer 1 if the time is to be recorded. */ void runner_do_cooling(struct runner *r, struct cell *c, int timer) { + const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; const int with_cosmology = (e->policy & engine_policy_cosmology); @@ -202,8 +204,8 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) { /* Let's cool ! */ cooling_cool_part(constants, us, cosmo, hydro_props, - entropy_floor_props, cooling_func, p, xp, time, - dt_cool, dt_therm); + entropy_floor_props, cooling_func, p, xp, dt_cool, + dt_therm, time); /* Apply the effects of feedback on this particle * (Note: Only used in schemes that have a delayed feedback mechanism diff --git a/src/scheduler.c b/src/scheduler.c index 28482ae281cfd0ca4c6d1da1b5134ab77441d466..24efe915a8a5e1f7a9cf066fd356d54e1dfc7b0d 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -53,6 +53,7 @@ #include "space.h" #include "space_getsid.h" #include "task.h" +#include "threadpool.h" #include "timers.h" #include "version.h" @@ -1059,12 +1060,14 @@ void scheduler_splittasks(struct scheduler *s, const int fof_tasks, if (fof_tasks) { /* Call the mapper on each current task. */ threadpool_map(s->threadpool, scheduler_splittasks_fof_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), 0, s); + s->nr_tasks, sizeof(struct task), threadpool_auto_chunk_size, + s); } else { /* Call the mapper on each current task. */ threadpool_map(s->threadpool, scheduler_splittasks_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), 0, s); + s->nr_tasks, sizeof(struct task), threadpool_auto_chunk_size, + s); } } @@ -1363,21 +1366,35 @@ void scheduler_reweight(struct scheduler *s, int verbose) { (sizeof(int) * 8 - intrinsics_clz(t->ci->stars.count)); break; + case task_type_stars_resort: + cost = wscale * intrinsics_popcount(t->flags) * scount_i * + (sizeof(int) * 8 - intrinsics_clz(t->ci->stars.count)); + break; + case task_type_self: if (t->subtype == task_subtype_grav) { cost = 1.f * (wscale * gcount_i) * gcount_i; } else if (t->subtype == task_subtype_external_grav) cost = 1.f * wscale * gcount_i; - else if (t->subtype == task_subtype_stars_density) - cost = 1.f * wscale * scount_i * count_i; - else if (t->subtype == task_subtype_stars_feedback) + else if (t->subtype == task_subtype_stars_density || + t->subtype == task_subtype_stars_feedback) cost = 1.f * wscale * scount_i * count_i; - else if (t->subtype == task_subtype_bh_density) - cost = 1.f * wscale * bcount_i * count_i; - else if (t->subtype == task_subtype_bh_feedback) + else if (t->subtype == task_subtype_bh_density || + t->subtype == task_subtype_bh_swallow || + t->subtype == task_subtype_bh_feedback) cost = 1.f * wscale * bcount_i * count_i; - else // hydro loops + else if (t->subtype == task_subtype_do_gas_swallow) + cost = 1.f * wscale * count_i; + else if (t->subtype == task_subtype_do_bh_swallow) + cost = 1.f * wscale * bcount_i; + else if (t->subtype == task_subtype_density || + t->subtype == task_subtype_gradient || + t->subtype == task_subtype_force || + t->subtype == task_subtype_limiter) cost = 1.f * (wscale * count_i) * count_i; + else + error("Untreated sub-type for selfs: %s", + subtaskID_names[t->subtype]); break; case task_type_pair: @@ -1398,6 +1415,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { sid_scale[t->flags]; } else if (t->subtype == task_subtype_bh_density || + t->subtype == task_subtype_bh_swallow || t->subtype == task_subtype_bh_feedback) { if (t->ci->nodeID != nodeID) cost = 3.f * wscale * count_i * bcount_j * sid_scale[t->flags]; @@ -1407,11 +1425,24 @@ void scheduler_reweight(struct scheduler *s, int verbose) { cost = 2.f * wscale * (bcount_i * count_j + bcount_j * count_i) * sid_scale[t->flags]; - } else { // hydro loops + } else if (t->subtype == task_subtype_do_gas_swallow) { + cost = 1.f * wscale * (count_i + count_j); + + } else if (t->subtype == task_subtype_do_bh_swallow) { + cost = 1.f * wscale * (bcount_i + bcount_j); + + } else if (t->subtype == task_subtype_density || + t->subtype == task_subtype_gradient || + t->subtype == task_subtype_force || + t->subtype == task_subtype_limiter) { if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) cost = 3.f * (wscale * count_i) * count_j * sid_scale[t->flags]; else cost = 2.f * (wscale * count_i) * count_j * sid_scale[t->flags]; + + } else { + error("Untreated sub-type for pairs: %s", + subtaskID_names[t->subtype]); } break; @@ -1431,6 +1462,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { } } else if (t->subtype == task_subtype_bh_density || + t->subtype == task_subtype_bh_swallow || t->subtype == task_subtype_bh_feedback) { if (t->ci->nodeID != nodeID) { cost = 3.f * (wscale * count_i) * bcount_j * sid_scale[t->flags]; @@ -1441,26 +1473,48 @@ void scheduler_reweight(struct scheduler *s, int verbose) { sid_scale[t->flags]; } - } else { // hydro loops + } else if (t->subtype == task_subtype_do_gas_swallow) { + cost = 1.f * wscale * (count_i + count_j); + + } else if (t->subtype == task_subtype_do_bh_swallow) { + cost = 1.f * wscale * (bcount_i + bcount_j); + + } else if (t->subtype == task_subtype_density || + t->subtype == task_subtype_gradient || + t->subtype == task_subtype_force || + t->subtype == task_subtype_limiter) { if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { cost = 3.f * (wscale * count_i) * count_j * sid_scale[t->flags]; } else { cost = 2.f * (wscale * count_i) * count_j * sid_scale[t->flags]; } + + } else { + error("Untreated sub-type for sub-pairs: %s", + subtaskID_names[t->subtype]); } break; case task_type_sub_self: - if (t->subtype == task_subtype_stars_density) { - cost = 1.f * (wscale * scount_i) * count_i; - } else if (t->subtype == task_subtype_stars_feedback) { + if (t->subtype == task_subtype_stars_density || + t->subtype == task_subtype_stars_feedback) { cost = 1.f * (wscale * scount_i) * count_i; - } else if (t->subtype == task_subtype_bh_density) { - cost = 1.f * (wscale * bcount_i) * count_i; - } else if (t->subtype == task_subtype_bh_feedback) { + } else if (t->subtype == task_subtype_bh_density || + t->subtype == task_subtype_bh_swallow || + t->subtype == task_subtype_bh_feedback) { cost = 1.f * (wscale * bcount_i) * count_i; - } else { + } else if (t->subtype == task_subtype_do_gas_swallow) { + cost = 1.f * wscale * count_i; + } else if (t->subtype == task_subtype_do_bh_swallow) { + cost = 1.f * wscale * bcount_i; + } else if (t->subtype == task_subtype_density || + t->subtype == task_subtype_gradient || + t->subtype == task_subtype_force || + t->subtype == task_subtype_limiter) { cost = 1.f * (wscale * count_i) * count_i; + } else { + error("Untreated sub-type for sub-selfs: %s", + subtaskID_names[t->subtype]); } break; case task_type_ghost: @@ -1475,6 +1529,9 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_bh_density_ghost: if (t->ci == t->ci->hydro.super) cost = wscale * bcount_i; break; + case task_type_bh_swallow_ghost2: + if (t->ci == t->ci->hydro.super) cost = wscale * bcount_i; + break; case task_type_drift_part: cost = wscale * count_i; break; @@ -1496,6 +1553,9 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_grav_long_range: cost = wscale * gcount_i; break; + case task_type_grav_mesh: + cost = wscale * gcount_i; + break; case task_type_grav_mm: cost = wscale * (gcount_i + gcount_j); break; @@ -1520,6 +1580,12 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_timestep: cost = wscale * (count_i + gcount_i + scount_i + bcount_i); break; + case task_type_timestep_limiter: + cost = wscale * count_i; + break; + case task_type_timestep_sync: + cost = wscale * count_i; + break; case task_type_send: if (count_i < 1e5) cost = 10.f * (wscale * count_i) * count_i; @@ -1610,7 +1676,7 @@ void scheduler_start(struct scheduler *s) { /* Re-wait the tasks. */ if (s->active_count > 1000) { threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tid_active, - s->active_count, sizeof(int), 0, s); + s->active_count, sizeof(int), threadpool_auto_chunk_size, s); } else { scheduler_rewait_mapper(s->tid_active, s->active_count, s); } @@ -1618,7 +1684,7 @@ void scheduler_start(struct scheduler *s) { /* Loop over the tasks and enqueue whoever is ready. */ if (s->active_count > 1000) { threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tid_active, - s->active_count, sizeof(int), 0, s); + s->active_count, sizeof(int), threadpool_auto_chunk_size, s); } else { scheduler_enqueue_mapper(s->tid_active, s->active_count, s); } @@ -2274,3 +2340,29 @@ void scheduler_write_task_level(const struct scheduler *s) { fclose(f); free(count); } +/** + * @brief dump all the active queues of all the known schedulers into files. + * + * @param e the #scheduler + */ +void scheduler_dump_queues(struct engine *e) { + + struct scheduler *s = &e->sched; + char dumpfile[35]; + +#ifdef WITH_MPI + /* Open a file per rank and write the header. Use per rank to avoid MPI + * calls that can interact with other blocking ones. */ + snprintf(dumpfile, sizeof(dumpfile), "queue_dump_MPI-step%d.dat_%d", e->step, + e->nodeID); +#else + snprintf(dumpfile, sizeof(dumpfile), "queue_dump-step%d.dat", e->step); +#endif + + FILE *file_thread = fopen(dumpfile, "w"); + fprintf(file_thread, "# rank queue index type subtype weight\n"); + for (int l = 0; l < s->nr_queues; l++) { + queue_dump(engine_rank, l, file_thread, &s->queues[l]); + } + fclose(file_thread); +} diff --git a/src/scheduler.h b/src/scheduler.h index 694a88017b863d29ad9d15a4ae7d5acac5cf3223..23cb39afbd7ac890d2d3c7ecacef3bffe01b0128 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -202,5 +202,6 @@ void scheduler_clean(struct scheduler *s); void scheduler_free_tasks(struct scheduler *s); void scheduler_write_dependencies(struct scheduler *s, int verbose); void scheduler_write_task_level(const struct scheduler *s); +void scheduler_dump_queues(struct engine *e); #endif /* SWIFT_SCHEDULER_H */ diff --git a/src/space.c b/src/space.c index 3fffafc4b7cd27827d6a24e116676581c8ae9413..9d652042fd4361ea1fd76e09eeef4556c772c887 100644 --- a/src/space.c +++ b/src/space.c @@ -57,6 +57,7 @@ #include "minmax.h" #include "multipole.h" #include "pressure_floor.h" +#include "proxy.h" #include "restart.h" #include "sort_part.h" #include "star_formation.h" @@ -301,7 +302,8 @@ void space_free_cells(struct space *s) { ticks tic = getticks(); threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper, s->cells_top, - s->nr_cells, sizeof(struct cell), 0, s); + s->nr_cells, sizeof(struct cell), threadpool_auto_chunk_size, + s); s->maxdepth = 0; if (s->e->verbose) @@ -313,8 +315,10 @@ void space_free_cells(struct space *s) { * @brief Free any memory in use for foreign particles. * * @param s The #space. + * @param clear_cell_pointers Are we also setting all the foreign cell particle + * pointers to NULL? */ -void space_free_foreign_parts(struct space *s) { +void space_free_foreign_parts(struct space *s, const int clear_cell_pointers) { #ifdef WITH_MPI if (s->parts_foreign != NULL) { @@ -337,6 +341,13 @@ void space_free_foreign_parts(struct space *s) { s->size_bparts_foreign = 0; s->bparts_foreign = NULL; } + if (clear_cell_pointers) { + for (int k = 0; k < s->e->nr_proxies; k++) { + for (int j = 0; j < s->e->proxies[k].nr_cells_in; j++) { + cell_unlink_foreign_particles(s->e->proxies[k].cells_in[j]); + } + } + } #endif } @@ -1996,7 +2007,8 @@ void space_split(struct space *s, int verbose) { threadpool_map(&s->e->threadpool, space_split_mapper, s->local_cells_with_particles_top, - s->nr_local_cells_with_particles, sizeof(int), 0, s); + s->nr_local_cells_with_particles, sizeof(int), + threadpool_auto_chunk_size, s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -2056,17 +2068,20 @@ void space_reorder_extras(struct space *s, int verbose) { /* Re-order the gas particles */ if (space_extra_parts) threadpool_map(&s->e->threadpool, space_reorder_extra_parts_mapper, - s->local_cells_top, s->nr_local_cells, sizeof(int), 0, s); + s->local_cells_top, s->nr_local_cells, sizeof(int), + threadpool_auto_chunk_size, s); /* Re-order the gravity particles */ if (space_extra_gparts) threadpool_map(&s->e->threadpool, space_reorder_extra_gparts_mapper, - s->local_cells_top, s->nr_local_cells, sizeof(int), 0, s); + s->local_cells_top, s->nr_local_cells, sizeof(int), + threadpool_auto_chunk_size, s); /* Re-order the star particles */ if (space_extra_sparts) threadpool_map(&s->e->threadpool, space_reorder_extra_sparts_mapper, - s->local_cells_top, s->nr_local_cells, sizeof(int), 0, s); + s->local_cells_top, s->nr_local_cells, sizeof(int), + threadpool_auto_chunk_size, s); /* Re-order the black hole particles */ if (space_extra_bparts) @@ -2100,7 +2115,8 @@ void space_sanitize(struct space *s) { if (s->e->nodeID == 0) message("Cleaning up unreasonable values of h"); threadpool_map(&s->e->threadpool, space_sanitize_mapper, s->cells_top, - s->nr_cells, sizeof(struct cell), 0, NULL); + s->nr_cells, sizeof(struct cell), threadpool_auto_chunk_size, + /*extra_data=*/NULL); } /** @@ -2684,7 +2700,8 @@ void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, data.count_extra_bpart = 0; threadpool_map(&s->e->threadpool, space_parts_get_cell_index_mapper, s->parts, - s->nr_parts, sizeof(struct part), 0, &data); + s->nr_parts, sizeof(struct part), threadpool_auto_chunk_size, + &data); *count_inhibited_parts = data.count_inhibited_part; *count_extra_parts = data.count_extra_part; @@ -2732,7 +2749,8 @@ void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts, data.count_extra_bpart = 0; threadpool_map(&s->e->threadpool, space_gparts_get_cell_index_mapper, - s->gparts, s->nr_gparts, sizeof(struct gpart), 0, &data); + s->gparts, s->nr_gparts, sizeof(struct gpart), + threadpool_auto_chunk_size, &data); *count_inhibited_gparts = data.count_inhibited_gpart; *count_extra_gparts = data.count_extra_gpart; @@ -2780,7 +2798,8 @@ void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, data.count_extra_bpart = 0; threadpool_map(&s->e->threadpool, space_sparts_get_cell_index_mapper, - s->sparts, s->nr_sparts, sizeof(struct spart), 0, &data); + s->sparts, s->nr_sparts, sizeof(struct spart), + threadpool_auto_chunk_size, &data); *count_inhibited_sparts = data.count_inhibited_spart; *count_extra_sparts = data.count_extra_spart; @@ -2828,7 +2847,8 @@ void space_bparts_get_cell_index(struct space *s, int *bind, int *cell_counts, data.count_extra_bpart = 0; threadpool_map(&s->e->threadpool, space_bparts_get_cell_index_mapper, - s->bparts, s->nr_bparts, sizeof(struct bpart), 0, &data); + s->bparts, s->nr_bparts, sizeof(struct bpart), + threadpool_auto_chunk_size, &data); *count_inhibited_bparts = data.count_inhibited_bpart; *count_extra_bparts = data.count_extra_bpart; @@ -4184,15 +4204,18 @@ void space_synchronize_particle_positions(struct space *s) { if (s->nr_gparts > 0 && s->nr_parts > 0) threadpool_map(&s->e->threadpool, space_synchronize_part_positions_mapper, - s->parts, s->nr_parts, sizeof(struct part), 0, (void *)s); + s->parts, s->nr_parts, sizeof(struct part), + threadpool_auto_chunk_size, (void *)s); if (s->nr_gparts > 0 && s->nr_sparts > 0) threadpool_map(&s->e->threadpool, space_synchronize_spart_positions_mapper, - s->sparts, s->nr_sparts, sizeof(struct spart), 0, NULL); + s->sparts, s->nr_sparts, sizeof(struct spart), + threadpool_auto_chunk_size, /*extra_data=*/NULL); if (s->nr_gparts > 0 && s->nr_bparts > 0) threadpool_map(&s->e->threadpool, space_synchronize_bpart_positions_mapper, - s->bparts, s->nr_bparts, sizeof(struct bpart), 0, NULL); + s->bparts, s->nr_bparts, sizeof(struct bpart), + threadpool_auto_chunk_size, /*extra_data=*/NULL); if (s->e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -4320,7 +4343,8 @@ void space_first_init_parts(struct space *s, int verbose) { const ticks tic = getticks(); if (s->nr_parts > 0) threadpool_map(&s->e->threadpool, space_first_init_parts_mapper, s->parts, - s->nr_parts, sizeof(struct part), 0, s); + s->nr_parts, sizeof(struct part), threadpool_auto_chunk_size, + s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -4381,7 +4405,8 @@ void space_first_init_gparts(struct space *s, int verbose) { const ticks tic = getticks(); if (s->nr_gparts > 0) threadpool_map(&s->e->threadpool, space_first_init_gparts_mapper, s->gparts, - s->nr_gparts, sizeof(struct gpart), 0, s); + s->nr_gparts, sizeof(struct gpart), + threadpool_auto_chunk_size, s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -4473,7 +4498,8 @@ void space_first_init_sparts(struct space *s, int verbose) { const ticks tic = getticks(); if (s->nr_sparts > 0) threadpool_map(&s->e->threadpool, space_first_init_sparts_mapper, s->sparts, - s->nr_sparts, sizeof(struct spart), 0, s); + s->nr_sparts, sizeof(struct spart), + threadpool_auto_chunk_size, s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -4555,7 +4581,8 @@ void space_first_init_bparts(struct space *s, int verbose) { const ticks tic = getticks(); if (s->nr_bparts > 0) threadpool_map(&s->e->threadpool, space_first_init_bparts_mapper, s->bparts, - s->nr_bparts, sizeof(struct bpart), 0, s); + s->nr_bparts, sizeof(struct bpart), + threadpool_auto_chunk_size, s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -4597,7 +4624,8 @@ void space_init_parts(struct space *s, int verbose) { if (s->nr_parts > 0) threadpool_map(&s->e->threadpool, space_init_parts_mapper, s->parts, - s->nr_parts, sizeof(struct part), 0, s->e); + s->nr_parts, sizeof(struct part), threadpool_auto_chunk_size, + s->e); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -4623,7 +4651,8 @@ void space_init_gparts(struct space *s, int verbose) { if (s->nr_gparts > 0) threadpool_map(&s->e->threadpool, space_init_gparts_mapper, s->gparts, - s->nr_gparts, sizeof(struct gpart), 0, NULL); + s->nr_gparts, sizeof(struct gpart), + threadpool_auto_chunk_size, /*extra_data=*/NULL); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -4649,7 +4678,8 @@ void space_init_sparts(struct space *s, int verbose) { if (s->nr_sparts > 0) threadpool_map(&s->e->threadpool, space_init_sparts_mapper, s->sparts, - s->nr_sparts, sizeof(struct spart), 0, NULL); + s->nr_sparts, sizeof(struct spart), + threadpool_auto_chunk_size, /*extra_data=*/NULL); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -4675,7 +4705,8 @@ void space_init_bparts(struct space *s, int verbose) { if (s->nr_bparts > 0) threadpool_map(&s->e->threadpool, space_init_bparts_mapper, s->bparts, - s->nr_bparts, sizeof(struct bpart), 0, NULL); + s->nr_bparts, sizeof(struct bpart), + threadpool_auto_chunk_size, /*extra_data=*/NULL); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -4710,7 +4741,8 @@ void space_convert_quantities(struct space *s, int verbose) { if (s->nr_parts > 0) threadpool_map(&s->e->threadpool, space_convert_quantities_mapper, s->parts, - s->nr_parts, sizeof(struct part), 0, s); + s->nr_parts, sizeof(struct part), threadpool_auto_chunk_size, + s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -5599,10 +5631,12 @@ void space_check_swallow(struct space *s) { #ifdef SWIFT_DEBUG_CHECKS threadpool_map(&s->e->threadpool, space_check_part_swallow_mapper, s->parts, - s->nr_parts, sizeof(struct part), 0, NULL); + s->nr_parts, sizeof(struct part), threadpool_auto_chunk_size, + /*extra_data=*/NULL); threadpool_map(&s->e->threadpool, space_check_bpart_swallow_mapper, s->bparts, - s->nr_bparts, sizeof(struct bpart), 0, NULL); + s->nr_bparts, sizeof(struct bpart), threadpool_auto_chunk_size, + /*extra_data=*/NULL); #else error("Calling debugging code without debugging flag activated."); #endif @@ -5874,7 +5908,10 @@ void space_struct_restore(struct space *s, FILE *stream) { NULL, "bparts"); } - /* Need to reconnect the gravity parts to their hydro and stars particles. */ + /* Need to reconnect the gravity parts to their hydro, star and BH particles. + * Note that we can't use the threadpool here as we have not restored it yet. + */ + /* Re-link the parts. */ if (s->nr_parts > 0 && s->nr_gparts > 0) part_relink_parts_to_gparts(s->gparts, s->nr_gparts, s->parts); diff --git a/src/space.h b/src/space.h index 5ce9a3429939edec57def1e551d7db09bedcbcf5..885f9ba85e2249976236a108c4357ec9815f43bf 100644 --- a/src/space.h +++ b/src/space.h @@ -379,7 +379,7 @@ void space_reset_task_counters(struct space *s); void space_clean(struct space *s); void space_free_cells(struct space *s); -void space_free_foreign_parts(struct space *s); +void space_free_foreign_parts(struct space *s, const int clear_cell_pointers); void space_struct_dump(struct space *s, FILE *stream); void space_struct_restore(struct space *s, FILE *stream); diff --git a/src/stars/Default/stars_io.h b/src/stars/Default/stars_io.h index ebabc3e0f555a8055f67b402674945e28dae6747..88444726329bad9baf797aaa8437c48d4fbc1516 100644 --- a/src/stars/Default/stars_io.h +++ b/src/stars/Default/stars_io.h @@ -125,8 +125,9 @@ INLINE static void stars_write_particles(const struct spart *sparts, list[2] = io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, 0.f, sparts, mass, "Masses of the particles"); - list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, - 0.f, sparts, id, "Unique ID of the particles"); + list[3] = + io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, 0.f, + sparts, id, "Unique ID of the particles"); list[4] = io_make_output_field( "SmoothingLengths", FLOAT, 1, UNIT_CONV_LENGTH, 1.f, sparts, h, diff --git a/src/stars/EAGLE/stars_io.h b/src/stars/EAGLE/stars_io.h index a7fd91f51355245ec9164df9fd1aa9366ff6e917..1669a8b45b366a6682b8d73fc0056d1f0ddc53ec 100644 --- a/src/stars/EAGLE/stars_io.h +++ b/src/stars/EAGLE/stars_io.h @@ -132,8 +132,9 @@ INLINE static void stars_write_particles(const struct spart *sparts, "Masses of the particles at the current point " "in time (i.e. after stellar losses"); - list[3] = io_make_output_field("ParticleIDs", LONGLONG, 1, UNIT_CONV_NO_UNITS, - 0.f, sparts, id, "Unique ID of the particles"); + list[3] = + io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, 0.f, + sparts, id, "Unique ID of the particles"); list[4] = io_make_output_field( "SmoothingLengths", FLOAT, 1, UNIT_CONV_LENGTH, 1.f, sparts, h, diff --git a/src/statistics.c b/src/statistics.c index 11b64acecb79d9466176ef91e4b9cd70e2ebb743..3a2e2831a1e2b436ae072a285259128740dbb5e5 100644 --- a/src/statistics.c +++ b/src/statistics.c @@ -325,12 +325,14 @@ void stats_collect(const struct space *s, struct statistics *stats) { /* Run parallel collection of statistics for parts */ if (s->nr_parts > 0) threadpool_map(&s->e->threadpool, stats_collect_part_mapper, s->parts, - s->nr_parts, sizeof(struct part), 0, &extra_data); + s->nr_parts, sizeof(struct part), threadpool_auto_chunk_size, + &extra_data); /* Run parallel collection of statistics for gparts */ if (s->nr_gparts > 0) threadpool_map(&s->e->threadpool, stats_collect_gpart_mapper, s->gparts, - s->nr_gparts, sizeof(struct gpart), 0, &extra_data); + s->nr_gparts, sizeof(struct gpart), + threadpool_auto_chunk_size, &extra_data); } /** diff --git a/src/swift.h b/src/swift.h index d8221080b1179b8bddc5441cdd4ae19a3fca5b74..c7a5f8d5acbcc17dd467cbcfb914eac025894dae 100644 --- a/src/swift.h +++ b/src/swift.h @@ -43,6 +43,7 @@ #include "error.h" #include "feedback.h" #include "feedback_properties.h" +#include "fof.h" #include "gravity.h" #include "gravity_derivatives.h" #include "gravity_properties.h" diff --git a/src/swift_velociraptor_part.h b/src/swift_velociraptor_part.h index 700842ac5a13e5bee4af15cc0d8726fc668ce421..d499cedebf98b06df14e259882b286b859bd7d04 100644 --- a/src/swift_velociraptor_part.h +++ b/src/swift_velociraptor_part.h @@ -39,8 +39,10 @@ struct swift_vel_part { /*! Particle velocity. */ float v[3]; +#ifndef HAVE_VELOCIRAPTOR_WITH_NOMASS /*! Particle mass. */ float mass; +#endif /*! Gravitational potential */ float potential; diff --git a/src/task.c b/src/task.c index 1ba295588769c70ad48a4f713a8ed96c3defbe01..30f6a9621916615d9aa6e9e315e30d222acd8f84 100644 --- a/src/task.c +++ b/src/task.c @@ -905,7 +905,7 @@ void task_free_mpi_comms(void) { * * Dumps the information to a file "thread_info-stepn.dat" where n is the * given step value, or "thread_info_MPI-stepn.dat", if we are running - * under MPI. Note if running under MPIU all the ranks are dumped into this + * under MPI. Note if running under MPI all the ranks are dumped into this * one file, which has an additional field to identify the rank. * * @param e the #engine @@ -1172,3 +1172,66 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header, } #endif } + +/** + * @brief dump all the active tasks of all the known engines into files. + * + * Dumps the information into file "task_dump-stepn.dat" where n is the given + * step value, or files "task_dump_MPI-stepn.dat_rank", if we are running + * under MPI. Note if running under MPI all the ranks are dumped into separate + * files to avoid interaction with other MPI calls that may be blocking at the + * time. Very similar to task_dump_all() except for the additional fields used + * in task debugging and we record tasks that have not ran (i.e !skip, but toc + * == 0) and how many waits are still active. + * + * @param e the #engine + */ +void task_dump_active(struct engine *e) { + + /* Need this to convert ticks to seconds. */ + unsigned long long cpufreq = clocks_get_cpufreq(); + char dumpfile[35]; + +#ifdef WITH_MPI + snprintf(dumpfile, sizeof(dumpfile), "task_dump_MPI-step%d.dat_%d", e->step, + e->nodeID); +#else + snprintf(dumpfile, sizeof(dumpfile), "task_dump-step%d.dat", e->step); +#endif + + FILE *file_thread = fopen(dumpfile, "w"); + fprintf(file_thread, + "# rank otherrank type subtype waits pair tic toc" + " ci.hydro.count cj.hydro.count ci.grav.count cj.grav.count" + " flags\n"); + + /* Add some information to help with the plots and conversion of ticks to + * seconds. */ + fprintf(file_thread, "%i 0 none none -1 0 %lld %lld %lld %lld %lld 0 %lld\n", + engine_rank, (long long int)e->tic_step, (long long int)e->toc_step, + e->updates, e->g_updates, e->s_updates, cpufreq); + int count = 0; + for (int l = 0; l < e->sched.nr_tasks; l++) { + struct task *t = &e->sched.tasks[l]; + + /* Not implicit and not skipped. */ + if (!t->implicit && !t->skip) { + + /* Get destination rank of MPI requests. */ + int paired = (t->cj != NULL); + int otherrank = t->ci->nodeID; + if (paired) otherrank = t->cj->nodeID; + + fprintf(file_thread, "%i %i %s %s %i %i %lli %lli %i %i %i %i %lli\n", + engine_rank, otherrank, taskID_names[t->type], + subtaskID_names[t->subtype], t->wait, paired, + (long long int)t->tic, (long long int)t->toc, + (t->ci != NULL) ? t->ci->hydro.count : 0, + (t->cj != NULL) ? t->cj->hydro.count : 0, + (t->ci != NULL) ? t->ci->grav.count : 0, + (t->cj != NULL) ? t->cj->grav.count : 0, t->flags); + } + count++; + } + fclose(file_thread); +} diff --git a/src/task.h b/src/task.h index d3024d4d5dfc93e9dcdee2346127265b102ea9c7..02e1c5ca385749c3dff347b44d8e1f0e984fadd0 100644 --- a/src/task.h +++ b/src/task.h @@ -239,6 +239,7 @@ void task_print(const struct task *t); void task_dump_all(struct engine *e, int step); void task_dump_stats(const char *dumpfile, struct engine *e, int header, int allranks); +void task_dump_active(struct engine *e); void task_get_full_name(int type, int subtype, char *name); void task_get_group_name(int type, int subtype, char *cluster); diff --git a/src/threadpool.c b/src/threadpool.c index 0ef69f9895d3d127a330b105ec385d97975fc82f..c4b6698e9eeb9a657ad66e7a1a7566626b9dc875 100644 --- a/src/threadpool.c +++ b/src/threadpool.c @@ -252,7 +252,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) { * @param N Number of elements in @c map_data. * @param stride Size, in bytes, of each element of @c map_data. * @param chunk Number of map data elements to pass to the function at a time, - * or zero to choose the number automatically. + * or #threadpool_auto_chunk_size to choose the number automatically. * @param extra_data Addtitional pointer that will be passed to the mapping * function, may contain additional data. */ @@ -279,9 +279,10 @@ void threadpool_map(struct threadpool *tp, threadpool_map_function map_function, tp->map_data_size = N; tp->map_data_count = 0; tp->map_data_chunk = - chunk ? chunk - : max((int)(N / (tp->num_threads * threadpool_default_chunk_ratio)), - 1); + (chunk == threadpool_auto_chunk_size) + ? max((int)(N / (tp->num_threads * threadpool_default_chunk_ratio)), + 1) + : chunk; tp->map_function = map_function; tp->map_data = map_data; tp->map_extra_data = extra_data; diff --git a/src/threadpool.h b/src/threadpool.h index 9d19b56836330f59093933bbed5900727142f4a2..a4964ffcbb387a1e3930c3ba172cd783dd6e8123 100644 --- a/src/threadpool.h +++ b/src/threadpool.h @@ -32,6 +32,7 @@ /* Local defines. */ #define threadpool_log_initial_size 1000 #define threadpool_default_chunk_ratio 7 +#define threadpool_auto_chunk_size 0 /* Function type for mappings. */ typedef void (*threadpool_map_function)(void *map_data, int num_elements, diff --git a/src/velociraptor_interface.c b/src/velociraptor_interface.c index 6f6f76bc12ea9a78fd661fe8664c054e500c2576..adfe49366d60d3c5796865beb7d51467b831e1ae 100644 --- a/src/velociraptor_interface.c +++ b/src/velociraptor_interface.c @@ -35,12 +35,16 @@ #include "engine.h" #include "hydro.h" #include "swift_velociraptor_part.h" +#include "threadpool.h" #include "velociraptor_struct.h" #ifdef HAVE_VELOCIRAPTOR /** * @brief Structure for passing cosmological information to VELOCIraptor. + * + * This should match the structure cosmoinfo in the file src/swiftinterface.h + * in the VELOCIraptor code. */ struct cosmoinfo { @@ -77,6 +81,9 @@ struct cosmoinfo { /** * @brief Structure for passing unit information to VELOCIraptor. + * + * This should match the structure unitinfo in the file src/swiftinterface.h + * in the VELOCIraptor code. */ struct unitinfo { @@ -111,6 +118,9 @@ struct cell_loc { /** * @brief Structure for passing simulation information to VELOCIraptor for a * given call. + * + * This should match the structure siminfo in the file src/swiftinterface.h + * in the VELOCIraptor code. */ struct siminfo { @@ -129,6 +139,9 @@ struct siminfo { /*! Number of top-level cells. */ int numcells; + /*! Number of top-level cells. */ + int numcellsperdim; + /*! Locations of top-level cells. */ struct cell_loc *cell_loc; @@ -161,6 +174,11 @@ struct siminfo { /*! Do we have other particles? */ int iother; + +#ifdef HAVE_VELOCIRAPTOR_WITH_NOMASS + /*! Mass of the DM particles */ + double mass_uniform_box; +#endif }; /** @@ -228,10 +246,14 @@ void velociraptor_convert_particles_mapper(void *map_data, int nr_gparts, const struct cooling_function_data *cool_func = e->cooling_func; const float a_inv = e->cosmology->a_inv; + const int periodic = s->periodic; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const double pos_dithering[3] = {s->pos_dithering[0], s->pos_dithering[1], + s->pos_dithering[2]}; /* Convert particle properties into VELOCIraptor units. * VELOCIraptor wants: - * - Co-moving positions, + * - Un-dithered co-moving positions, * - Peculiar velocities, * - Co-moving potential, * - Physical internal energy (for the gas), @@ -239,15 +261,27 @@ void velociraptor_convert_particles_mapper(void *map_data, int nr_gparts, */ for (int i = 0; i < nr_gparts; i++) { - swift_parts[i].x[0] = gparts[i].x[0]; - swift_parts[i].x[1] = gparts[i].x[1]; - swift_parts[i].x[2] = gparts[i].x[2]; + if (periodic) { + swift_parts[i].x[0] = + box_wrap(gparts[i].x[0] - pos_dithering[0], 0.0, dim[0]); + swift_parts[i].x[1] = + box_wrap(gparts[i].x[1] - pos_dithering[1], 0.0, dim[1]); + swift_parts[i].x[2] = + box_wrap(gparts[i].x[2] - pos_dithering[2], 0.0, dim[2]); + } else { + swift_parts[i].x[0] = gparts[i].x[0]; + swift_parts[i].x[1] = gparts[i].x[1]; + swift_parts[i].x[2] = gparts[i].x[2]; + } swift_parts[i].v[0] = gparts[i].v_full[0] * a_inv; swift_parts[i].v[1] = gparts[i].v_full[1] * a_inv; swift_parts[i].v[2] = gparts[i].v_full[2] * a_inv; +#ifndef HAVE_VELOCIRAPTOR_WITH_NOMASS swift_parts[i].mass = gravity_get_mass(&gparts[i]); +#endif + swift_parts[i].potential = gravity_get_comoving_potential(&gparts[i]); swift_parts[i].type = gparts[i].type; @@ -408,6 +442,12 @@ void velociraptor_invoke(struct engine *e, const int linked_with_snap) { const int nr_cells = s->nr_cells; const struct cell *cells_top = s->cells_top; + /* Start by freeing some of the unnecessary memory to give VR some breathing + space */ +#ifdef WITH_MPI + space_free_foreign_parts(e->s, /*clear_cell_pointers=*/1); +#endif + /* Allow thread to run on any core for the duration of the call to * VELOCIraptor so that when OpenMP threads are spawned * they can run on any core on the processor. */ @@ -509,6 +549,20 @@ void velociraptor_invoke(struct engine *e, const int linked_with_snap) { sim_info.interparticlespacing = -1.; } +#ifdef HAVE_VELOCIRAPTOR_WITH_NOMASS + /* Assume all particles have the same mass */ + double DM_mass = 0.; + for (size_t i = 0; i < e->s->nr_gparts; ++i) { + const struct gpart *gp = &e->s->gparts[i]; + if (gp->time_bin != time_bin_inhibited && + gp->time_bin != time_bin_not_created) { + DM_mass = gp->mass; + break; + } + } + sim_info.mass_uniform_box = DM_mass; +#endif + /* Set the spatial extent of the simulation volume */ sim_info.spacedimension[0] = s->dim[0]; sim_info.spacedimension[1] = s->dim[1]; @@ -516,6 +570,9 @@ void velociraptor_invoke(struct engine *e, const int linked_with_snap) { /* Store number of top-level cells */ sim_info.numcells = s->nr_cells; + sim_info.numcellsperdim = s->cdim[0]; /* We assume a cubic box! */ + if (s->cdim[0] != s->cdim[1] || s->cdim[0] != s->cdim[2]) + error("Trying to run VR on a non-cubic number of top-level cells"); /* Size and inverse size of the top-level cells in VELOCIraptor units */ sim_info.cellwidth[0] = s->cells_top[0].width[0]; @@ -539,9 +596,18 @@ void velociraptor_invoke(struct engine *e, const int linked_with_snap) { for (int i = 0; i < s->nr_cells; i++) { cell_node_ids[i] = cells_top[i].nodeID; - sim_info.cell_loc[i].loc[0] = cells_top[i].loc[0]; - sim_info.cell_loc[i].loc[1] = cells_top[i].loc[1]; - sim_info.cell_loc[i].loc[2] = cells_top[i].loc[2]; + if (s->periodic) { + sim_info.cell_loc[i].loc[0] = + box_wrap(cells_top[i].loc[0] - s->pos_dithering[0], 0.0, s->dim[0]); + sim_info.cell_loc[i].loc[1] = + box_wrap(cells_top[i].loc[1] - s->pos_dithering[1], 0.0, s->dim[1]); + sim_info.cell_loc[i].loc[2] = + box_wrap(cells_top[i].loc[2] - s->pos_dithering[2], 0.0, s->dim[2]); + } else { + sim_info.cell_loc[i].loc[0] = cells_top[i].loc[0]; + sim_info.cell_loc[i].loc[1] = cells_top[i].loc[1]; + sim_info.cell_loc[i].loc[2] = cells_top[i].loc[2]; + } } if (e->verbose) { @@ -628,7 +694,8 @@ void velociraptor_invoke(struct engine *e, const int linked_with_snap) { struct velociraptor_copy_data copy_data = {e, swift_parts}; threadpool_map(&e->threadpool, velociraptor_convert_particles_mapper, - s->gparts, nr_gparts, sizeof(struct gpart), 0, ©_data); + s->gparts, nr_gparts, sizeof(struct gpart), + threadpool_auto_chunk_size, ©_data); /* Report timing */ if (e->verbose) @@ -697,6 +764,12 @@ void velociraptor_invoke(struct engine *e, const int linked_with_snap) { /* Record we have ran stf this timestep */ e->stf_this_timestep = 1; + /* Reallocate the memory that was freed earlier */ +#ifdef WITH_MPI + + engine_allocate_foreign_particles(e); +#endif + #else error("SWIFT not configure to run with VELOCIraptor."); #endif /* HAVE_VELOCIRAPTOR */ diff --git a/tools/analyse_runtime.py b/tools/analyse_runtime.py index 6d6c61be463bbc398c612cc9838c0453587e6193..52caaef25d0184a4d46f105d619d31790f2d74c0 100755 --- a/tools/analyse_runtime.py +++ b/tools/analyse_runtime.py @@ -56,7 +56,7 @@ num_files = len(sys.argv) - 1 labels = [ ["engine_split_gas_particles:", 1], ["Gpart assignment", 1], - ["Mesh comunication", 1], + ["Mesh communication", 1], ["Forward Fourier transform", 1], ["Green function", 1], ["Backwards Fourier transform", 1], diff --git a/tools/check_mpireports.py b/tools/check_mpireports.py new file mode 100755 index 0000000000000000000000000000000000000000..ce555751431c087840b1a4c96b58d6f5a1c213c9 --- /dev/null +++ b/tools/check_mpireports.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +""" +Usage: + check_mpireportx.py [options] mpi-report... + +Check any mpi reports from a step for any requests which are not fulfilled, +that any sends or receives that are not matched to a successful completion. +Note that a report with a final sum of memory equal to zero will not have any +of these! + +This file is part of SWIFT. + +Copyright (C) 2020 Peter W. Draper (p.w.draper@durham.ac.uk) +All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import sys +import argparse + +# Handle the command line. +parser = argparse.ArgumentParser(description="Check MPI report") + +parser.add_argument("input", + nargs="+", + metavar="mpi-report", + help="MPI report") +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Verbose output", + default=False, + action="store_true", +) +args = parser.parse_args() +infiles = args.input + +# Indices for words in a line. +sticcol=0 +eticcol=1 +dticcol=2 +stepcol=3 +rankcol=4 +otherrankcol=5 +typecol=6 +itypecol=7 +subtypecol=8 +isubtypecol=9 +activationcol=10 +tagcol=11 +sizecol=12 +sum=13 + +# Keyed lines. +isends = {} +irecvs = {} +esends = {} +erecvs = {} + +# Gather keys from input file. We created dicts with matchable keys +# for the sends and recvs, when they start and complete. +for f in infiles: + if args.verbose: + print "Processing: " + f + with open(f, "r") as fp: + for line in fp: + if line[0] == '#': + continue + words = line.split() + if words[activationcol] == "1": + key = words[otherrankcol] + "/" + \ + words[rankcol] + "/" + \ + words[subtypecol] + "/" + \ + words[tagcol] + "/" + \ + words[sizecol] + if words[typecol] == "send": + isends[key] = [line[:-1]] + else: + irecvs[key] = [line[:-1]] + + else: + # Size will be negative. + key = words[otherrankcol] + "/" + \ + words[rankcol] + "/" + \ + words[subtypecol] + "/" + \ + words[tagcol] + "/" + \ + words[sizecol][1:] + if words[typecol] == "send": + esends[key] = [line[:-1]] + else: + erecvs[key] = [line[:-1]] + +# Now report any uncompleted sends or receives. +print "# stic etic dtic step rank otherrank type itype subtype isubtype activation tag size sum" +for key in isends: + if not key in esends: + print isends[key][0] +for key in irecvs: + if not key in erecvs: + print irecvs[key][0] + +sys.exit(0) diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py index c093d18c2dba6bed0a22e9f7279285b45817445d..4a87ebad3cf13bdedd39e329509f003e858a0a1b 100755 --- a/tools/task_plots/analyse_tasks.py +++ b/tools/task_plots/analyse_tasks.py @@ -93,6 +93,7 @@ TASKTYPES = [ "kick2", "timestep", "timestep_limiter", + "timestep_sync", "send", "recv", "grav_long_range", @@ -116,6 +117,9 @@ TASKTYPES = [ "bh_in", "bh_out", "bh_ghost", + "bh_swallow_ghost1", + "bh_swallow_ghost2", + "bh_swallow_ghost3", "fof_self", "fof_pair", "count", @@ -135,16 +139,21 @@ SUBTYPES = [ "tend_bpart", "xv", "rho", + "part_swallow", + "bpart_merger", "gpart", "multipole", "spart", "stars_density", "stars_feedback", - "sf_counts" - "bpart", + "sf_counts", + "bpart_rho", + "bpart_swallow", + "bpart_feedback", "bh_density", "bh_swallow", - "do_swallow", + "do_gas_swallow", + "do_bh_swallow", "bh_feedback", "count", ] diff --git a/tools/task_plots/plot_tasks.py b/tools/task_plots/plot_tasks.py index 64d5529cc19499d3e4ee7d521e5afcbccb38c019..b0b9f0e511dbdeff42a6c1885740593f8d7dbc42 100755 --- a/tools/task_plots/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -170,6 +170,7 @@ TASKTYPES = [ "kick2", "timestep", "timestep_limiter", + "timestep_sync", "send", "recv", "grav_long_range", @@ -193,6 +194,9 @@ TASKTYPES = [ "bh_in", "bh_out", "bh_ghost", + "bh_swallow_ghost1", + "bh_swallow_ghost2", + "bh_swallow_ghost3", "fof_self", "fof_pair", "count", @@ -212,16 +216,21 @@ SUBTYPES = [ "tend_bpart", "xv", "rho", + "part_swallow", + "bpart_merger", "gpart", "multipole", "spart", "stars_density", "stars_feedback", "sf_counts", - "bpart", + "bpart_rho", + "bpart_swallow", + "bpart_feedback", "bh_density", "bh_swallow", - "do_swallow", + "do_gas_swallow", + "do_bh_swallow", "bh_feedback", "count", ]