diff --git a/examples/DwarfGalaxy/README b/examples/DwarfGalaxy/README new file mode 100644 index 0000000000000000000000000000000000000000..7a9167694a24c088997316180233b28b9126f298 --- /dev/null +++ b/examples/DwarfGalaxy/README @@ -0,0 +1,7 @@ +This example is a galaxy extracted from the example "ZoomIn". It allows +to test SWIFT on a smaller problem. See the README in "ZoomIn" for more +information. + + +MD5 check-sum of the ICS: +ae2af84d88f30011b6a8af3f37d140cf dwarf_galaxy.hdf5 \ No newline at end of file diff --git a/examples/DwarfGalaxy/dwarf_galaxy.yml b/examples/DwarfGalaxy/dwarf_galaxy.yml new file mode 100644 index 0000000000000000000000000000000000000000..655d41a92827ee56c08e977fe895d28ea8541d9e --- /dev/null +++ b/examples/DwarfGalaxy/dwarf_galaxy.yml @@ -0,0 +1,71 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.08567758e21 # kpc in centimeters + UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Structure finding options +StructureFinding: + config_file_name: stf_input.cfg # Name of the STF config file. + basename: ./stf # Common part of the name of output files. + output_time_format: 0 # Specifies the frequency format of structure finding. 0 for simulation steps (delta_step) and 1 for simulation time intervals (delta_time). + scale_factor_first: 0.92 # Scale-factor of the first snaphot (cosmological run) + time_first: 0.01 # Time of the first structure finding output (in internal units). + delta_step: 1000 # Time difference between consecutive structure finding outputs (in internal units) in simulation steps. + delta_time: 1.10 # Time difference between consecutive structure finding outputs (in internal units) in simulation time intervals. + +# Cosmological parameters +Cosmology: + h: 0.673 # Reduced Hubble constant + a_begin: 0.9873046739 # Initial scale-factor of the simulation + a_end: 1.0 # Final scale factor of the simulation + Omega_m: 0.315 # Matter density parameter + Omega_lambda: 0.685 # Dark-energy density parameter + Omega_b: 0.0486 # Baryon density parameter + +Scheduler: + max_top_level_cells: 8 + cell_split_size: 400 # (Optional) Maximal number of particles per cell (this is the default value). + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: dwarf_galaxy # Common part of the name of output files + time_first: 0. # Time of the first output (non-cosmological run) (in internal units) + delta_time: 0.02 # Time difference between consecutive outputs (in internal units) + compression: 1 + +# Parameters governing the conserved quantities statistics +Statistics: + scale_factor_first: 0.987345 # Scale-factor of the first stat dump (cosmological run) + time_first: 0.01 # Time of the first stat dump (non-cosmological run) (in internal units) + delta_time: 1.05 # Time between statistics output + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion) + comoving_softening: 0.05 # Comoving softening length (in internal units). + max_physical_softening: 0.01 # Physical softening length (in internal units). + mesh_side_length: 16 + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100 # (internal units) + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./dwarf_galaxy.hdf5 # The file to read + cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget + cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget + diff --git a/examples/DwarfGalaxy/getIC.sh b/examples/DwarfGalaxy/getIC.sh new file mode 100644 index 0000000000000000000000000000000000000000..92f4cd3939845d57a61683e95135163b8639371f --- /dev/null +++ b/examples/DwarfGalaxy/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget https://obswww.unige.ch/~lhausamm/swift/IC/DwarfGalaxy/dwarf_galaxy.hdf5 diff --git a/examples/DwarfGalaxy/run.sh b/examples/DwarfGalaxy/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..4cc87880215a37c9eac59b19e584f4887cba2c38 --- /dev/null +++ b/examples/DwarfGalaxy/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e dwarf_galaxy.hdf5 ] +then + echo "Fetching initial conditions for the dwarf galaxy example..." + ./getIC.sh +fi + +../swift -b -G -s -S -t 8 dwarf_galaxy.yml 2>&1 | tee output.log + diff --git a/examples/ZoomIn/README b/examples/ZoomIn/README new file mode 100644 index 0000000000000000000000000000000000000000..cffc275f2ae1046156d392f8725a7b542c80471a --- /dev/null +++ b/examples/ZoomIn/README @@ -0,0 +1,16 @@ +Initial conditions for a zoom in cosmological simulation of dwarf +galaxies. These have been generated by MUSIC and ran up to z=0 with +GEAR (see Revaz and Jablonka 2018 for more details on the simulation). + +The cosmology is taken from Planck 2015. + +The initial conditions have been cleaned to contain only the required +fields. The ICs have been created for Gadget and the positions and box +size are hence expressed in h-full units (e.g. box size of 32 / h Mpc). +Similarly, the peculiar velocitites contain an extra sqrt(a) factor. + +We will use SWIFT to cancel the h- and a-factors from the ICs. Gas +particles will be generated at startup. + +MD5 check-sum of the ICS: +9aafe154438478ed435e88664c1c5dba zoom_in.hdf5 diff --git a/examples/ZoomIn/getIC.sh b/examples/ZoomIn/getIC.sh new file mode 100644 index 0000000000000000000000000000000000000000..6cdfaec981af515249578faa72798c53448e7ecb --- /dev/null +++ b/examples/ZoomIn/getIC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +wget https://obswww.unige.ch/~lhausamm/swift/IC/ZoomIn/zoom_in.hdf5 diff --git a/examples/ZoomIn/run.sh b/examples/ZoomIn/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..99eda1cfc779c1958d19d0c7ae234b6c211f8127 --- /dev/null +++ b/examples/ZoomIn/run.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + # Generate the initial conditions if they are not present. +if [ ! -e zoom_in.hdf5 ] +then + echo "Fetching initial conditions for the zoom in example..." + ./getIC.sh +fi + +../swift -b -c -G -s -S -t 8 zoom_in.yml 2>&1 | tee output.log + diff --git a/examples/ZoomIn/zoom_in.yml b/examples/ZoomIn/zoom_in.yml new file mode 100644 index 0000000000000000000000000000000000000000..5f0fa4958b63150d94ff06df042a03af6f221038 --- /dev/null +++ b/examples/ZoomIn/zoom_in.yml @@ -0,0 +1,61 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1.98848e43 # 10^10 M_sun in grams + UnitLength_in_cgs: 3.08567758e21 # kpc in centimeters + UnitVelocity_in_cgs: 1e5 # km/s in centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Cosmological parameters +Cosmology: + h: 0.673 # Reduced Hubble constant + a_begin: 0.9873046739 # Initial scale-factor of the simulation + a_end: 1.0 # Final scale factor of the simulation + Omega_m: 0.315 # Matter density parameter + Omega_lambda: 0.685 # Dark-energy density parameter + Omega_b: 0.0486 # Baryon density parameter + +Scheduler: + max_top_level_cells: 8 + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1e-2 # The end time of the simulation (in internal units). + dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + +# Parameters governing the snapshots +Snapshots: + basename: zoom_in # Common part of the name of output files + scale_factor_first: 0.987345 # Scale-factor of the first snaphot (cosmological run) + time_first: 0.01 # Time of the first output (non-cosmological run) (in internal units) + delta_time: 1.01 # Time difference between consecutive outputs (in internal units) + compression: 1 + +# Parameters governing the conserved quantities statistics +Statistics: + scale_factor_first: 0.987345 # Scale-factor of the first stat dump (cosmological run) + time_first: 0.01 # Time of the first stat dump (non-cosmological run) (in internal units) + delta_time: 1.05 # Time between statistics output + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion) + comoving_softening: 0.05 # Comoving softening length (in internal units). + max_physical_softening: 0.01 # Physical softening length (in internal units). + mesh_side_length: 16 + +# Parameters for the hydrodynamics scheme +SPH: + resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). + CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. + minimal_temperature: 100 # (internal units) + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./zoom_in.hdf5 # The file to read + cleanup_h_factors: 1 # Remove the h-factors inherited from Gadget + cleanup_velocity_factors: 1 # Remove the sqrt(a) factor in the velocities inherited from Gadget + diff --git a/examples/plot_task_level.py b/examples/plot_task_level.py new file mode 100644 index 0000000000000000000000000000000000000000..2d51f4829692bc71826ec6c4b93c025f7106828f --- /dev/null +++ b/examples/plot_task_level.py @@ -0,0 +1,43 @@ +#!/usr/bin/python +""" +Usage: + ./plot_task_level.py task_level.txt + +Description: + Plot the number of tasks for each depth level and each type of task. +""" + + +import pandas as pd +import matplotlib.pyplot as plt +import sys + +# get filename +filename = sys.argv[-1] + +# Column names +names = ["type", "subtype", "depth", "count"] + +# read file +data = pd.read_csv(filename, sep=' ', comment="#", names=names) + +# generate color map +cmap = plt.get_cmap("hsv") +N = data["depth"].max() + 5 + +# plot data +for i in range(data["depth"].max()): + ind = data["depth"] == i + label = "depth = %i" % i + c = cmap(i / N) + plt.plot(data["type"][ind] + "_" + data["subtype"][ind], + data["count"][ind], ".", label=label, + color=c) + +# modify figure parameters and show it +plt.gca().set_yscale("log") +plt.xticks(rotation=45) +plt.ylabel("Number of Tasks") +plt.gcf().subplots_adjust(bottom=0.15) +plt.legend() +plt.show() diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index a8d8c49a7ca8676278ac1ab50c7b6cdf3755445c..b84f981f2ce538d459f9e3448b1df6ee44b04f57 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -157,11 +157,11 @@ for task in SUBTYPES: # For fiddling with colours... if args.verbose: - print "#Selected colours:" + print("#Selected colours:") for task in sorted(TASKCOLOURS.keys()): - print "# " + task + ": " + TASKCOLOURS[task] + print("# " + task + ": " + TASKCOLOURS[task]) for task in sorted(SUBCOLOURS.keys()): - print "# " + task + ": " + SUBCOLOURS[task] + print("# " + task + ": " + SUBCOLOURS[task]) # Read input. data = pl.loadtxt( infile ) @@ -169,11 +169,11 @@ data = pl.loadtxt( infile ) # Do we have an MPI file? full_step = data[0,:] if full_step.size == 13: - print "# MPI mode" + print("# MPI mode") mpimode = True if ranks == None: ranks = range(int(max(data[:,0])) + 1) - print "# Number of ranks:", len(ranks) + print("# Number of ranks:", len(ranks)) rankcol = 0 threadscol = 1 taskcol = 2 @@ -181,7 +181,7 @@ if full_step.size == 13: ticcol = 5 toccol = 6 else: - print "# non MPI mode" + print("# non MPI mode") ranks = [0] mpimode = False rankcol = -1 @@ -194,10 +194,10 @@ else: # Get CPU_CLOCK to convert ticks into milliseconds. CPU_CLOCK = float(full_step[-1]) / 1000.0 if args.verbose: - print "# CPU frequency:", CPU_CLOCK * 1000.0 + print("# CPU frequency:", CPU_CLOCK * 1000.0) nthread = int(max(data[:,threadscol])) + 1 -print "# Number of threads:", nthread +print("# Number of threads:", nthread) # Avoid start and end times of zero. sdata = data[data[:,ticcol] != 0] @@ -224,24 +224,24 @@ if delta_t == 0: dt = toc_step - tic_step if dt > delta_t: delta_t = dt - print "# Data range: ", delta_t / CPU_CLOCK, "ms" + print("# Data range: ", delta_t / CPU_CLOCK, "ms") # Once more doing the real gather and plots this time. for rank in ranks: - print "# Processing rank: ", rank + print("# Processing rank: ", rank) if mpimode: data = sdata[sdata[:,rankcol] == rank] full_step = data[0,:] tic_step = int(full_step[ticcol]) toc_step = int(full_step[toccol]) - print "# Min tic = ", tic_step + print("# Min tic = ", tic_step) data = data[1:,:] typesseen = [] nethread = 0 # Dummy image for ranks that have no tasks. if data.size == 0: - print "# Rank ", rank, " has no tasks" + print("# Rank ", rank, " has no tasks") fig = pl.figure() ax = fig.add_subplot(1,1,1) ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) @@ -367,6 +367,6 @@ for rank in ranks: else: outpng = outbase + ".png" pl.savefig(outpng) - print "Graphics done, output written to", outpng + print("Graphics done, output written to", outpng) sys.exit(0) diff --git a/src/cell.c b/src/cell.c index 1854f7f24cd1fe19e0c364a43215ca5a10dc57d8..ff76d9b3ee50879184f92efe849b9dad0427654b 100644 --- a/src/cell.c +++ b/src/cell.c @@ -337,6 +337,7 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, temp->split = 0; temp->hydro.dx_max_part = 0.f; temp->hydro.dx_max_sort = 0.f; + temp->stars.dx_max_part = 0.f; temp->nodeID = c->nodeID; temp->parent = c; c->progeny[k] = temp; @@ -410,6 +411,7 @@ int cell_pack_end_step(struct cell *restrict c, pcells[0].grav.ti_end_min = c->grav.ti_end_min; pcells[0].grav.ti_end_max = c->grav.ti_end_max; pcells[0].hydro.dx_max_part = c->hydro.dx_max_part; + pcells[0].stars.dx_max_part = c->stars.dx_max_part; /* Fill in the progeny, depth-first recursion. */ int count = 1; @@ -446,6 +448,7 @@ int cell_unpack_end_step(struct cell *restrict c, c->grav.ti_end_min = pcells[0].grav.ti_end_min; c->grav.ti_end_max = pcells[0].grav.ti_end_max; c->hydro.dx_max_part = pcells[0].hydro.dx_max_part; + c->stars.dx_max_part = pcells[0].stars.dx_max_part; /* Fill in the progeny, depth-first recursion. */ int count = 1; @@ -1156,8 +1159,11 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, ptrdiff_t sparts_offset, void cell_sanitize(struct cell *c, int treated) { const int count = c->hydro.count; + const int scount = c->stars.count; struct part *parts = c->hydro.parts; + struct spart *sparts = c->stars.parts; float h_max = 0.f; + float stars_h_max = 0.f; /* Treat cells will <1000 particles */ if (count < 1000 && !treated) { @@ -1170,6 +1176,10 @@ void cell_sanitize(struct cell *c, int treated) { if (parts[i].h == 0.f || parts[i].h > upper_h_max) parts[i].h = upper_h_max; } + for (int i = 0; i < scount; ++i) { + if (sparts[i].h == 0.f || sparts[i].h > upper_h_max) + sparts[i].h = upper_h_max; + } } /* Recurse and gather the new h_max values */ @@ -1183,16 +1193,20 @@ void cell_sanitize(struct cell *c, int treated) { /* And collect */ h_max = max(h_max, c->progeny[k]->hydro.h_max); + stars_h_max = max(stars_h_max, c->progeny[k]->stars.h_max); } } } else { /* Get the new value of h_max */ for (int i = 0; i < count; ++i) h_max = max(h_max, parts[i].h); + for (int i = 0; i < scount; ++i) + stars_h_max = max(stars_h_max, sparts[i].h); } /* Record the change */ c->hydro.h_max = h_max; + c->stars.h_max = stars_h_max; } /** @@ -1621,6 +1635,13 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) { } } +/** + * @brief Activate the #spart drifts on the given cell. + */ +void cell_activate_drift_spart(struct cell *c, struct scheduler *s) { + cell_activate_drift_gpart(c, s); +} + /** * @brief Activate the sorts up a cell hierarchy. */ @@ -1692,6 +1713,7 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, /* Store the current dx_max and h_max values. */ ci->hydro.dx_max_part_old = ci->hydro.dx_max_part; ci->hydro.h_max_old = ci->hydro.h_max; + if (cj != NULL) { cj->hydro.dx_max_part_old = cj->hydro.dx_max_part; cj->hydro.h_max_old = cj->hydro.h_max; @@ -2042,7 +2064,353 @@ void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, */ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, struct scheduler *s) { - // LOIC: to implement + const struct engine *e = s->space->e; + + /* Store the current dx_max and h_max values. */ + ci->stars.dx_max_part_old = ci->stars.dx_max_part; + ci->stars.h_max_old = ci->stars.h_max; + + if (cj != NULL) { + cj->stars.dx_max_part_old = cj->stars.dx_max_part; + cj->stars.h_max_old = cj->stars.h_max; + } + + /* Self interaction? */ + if (cj == NULL) { + + /* Do anything? */ + if (ci->stars.count == 0 || !cell_is_active_stars(ci, e)) return; + + /* Recurse? */ + if (cell_can_recurse_in_self_stars_task(ci)) { + + /* Loop over all progenies and pairs of progenies */ + for (int j = 0; j < 8; j++) { + if (ci->progeny[j] != NULL) { + cell_activate_subcell_stars_tasks(ci->progeny[j], NULL, s); + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[j], ci->progeny[k], + s); + } + } + } else { + + /* We have reached the bottom of the tree: activate drift */ + cell_activate_drift_spart(ci, s); + cell_activate_drift_part(ci, s); + } + } + + /* Otherwise, pair interation */ + else { + + /* Should we even bother? */ + if (!cell_is_active_stars(ci, e) && !cell_is_active_stars(cj, e)) return; + if (ci->stars.count == 0 || cj->stars.count == 0) return; + + /* Get the orientation of the pair. */ + double shift[3]; + int sid = space_getsid(s->space, &ci, &cj, shift); + + /* recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci) && + cell_can_recurse_in_pair_stars_task(cj)) { + + /* Different types of flags. */ + switch (sid) { + + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + break; + + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[0], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[1], + s); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + break; + + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[0], + s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[2], + s); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[0], + s); + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[1], + s); + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[2], + s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[3], + s); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[0], + s); + if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[1], + s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[3], + s); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[0], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[2], + s); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[3], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[1], + s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[2], + s); + if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[3], + s); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[1], + s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[3], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[3], + s); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + break; + + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[2], + s); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[3], + s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[3], + s); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[4], cj->progeny[3], + s); + break; + + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[0], + s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[4], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[4], + s); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[0], + s); + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[1], + s); + if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[4], + s); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[5], + s); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[0], + s); + if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[1], + s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[4], + s); + if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[5], + s); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[0], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[4], + s); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[5], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[1], + s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[4], + s); + if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[5], + s); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[1], + s); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[2], cj->progeny[5], + s); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[1], + s); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[6], cj->progeny[5], + s); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[1], cj->progeny[0], + s); + if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[1], cj->progeny[2], + s); + if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[1], cj->progeny[4], + s); + if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[1], cj->progeny[6], + s); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[0], + s); + if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[2], + s); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[4], + s); + if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[3], cj->progeny[6], + s); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[0], + s); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[2], + s); + if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[4], + s); + if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[5], cj->progeny[6], + s); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[0], + s); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[2], + s); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[4], + s); + if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) + cell_activate_subcell_stars_tasks(ci->progeny[7], cj->progeny[6], + s); + break; + } + + } + + /* Otherwise, activate the sorts and drifts. */ + else if (cell_is_active_stars(ci, e) || cell_is_active_stars(cj, e)) { + + /* We are going to interact this pair, so store some values. */ + atomic_or(&ci->hydro.requires_sorts, 1 << sid); + atomic_or(&cj->hydro.requires_sorts, 1 << sid); + ci->hydro.dx_max_sort_old = ci->hydro.dx_max_sort; + cj->hydro.dx_max_sort_old = cj->hydro.dx_max_sort; + + /* Activate the drifts if the cells are local. */ + if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + if (ci->nodeID == engine_rank) cell_activate_drift_spart(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + if (cj->nodeID == engine_rank) cell_activate_drift_spart(cj, s); + + /* Do we need to sort the cells? */ + cell_activate_sorts(ci, sid, s); + cell_activate_sorts(cj, sid, s); + } + } /* Otherwise, pair interation */ } /** @@ -2988,11 +3356,15 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { */ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { + const float stars_h_max = e->stars_properties->h_max; const integertime_t ti_old_gpart = c->grav.ti_old_part; const integertime_t ti_current = e->ti_current; struct gpart *const gparts = c->grav.parts; struct spart *const sparts = c->stars.parts; + float dx_max = 0.f, dx2_max = 0.f; + float cell_h_max = 0.f; + /* Drift irrespective of cell flags? */ force |= c->grav.do_drift; @@ -3014,9 +3386,17 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { /* Recurse */ cell_drift_gpart(cp, e, force); + + /* Update */ + dx_max = max(dx_max, cp->stars.dx_max_part); + cell_h_max = max(cell_h_max, cp->stars.h_max); } } + /* Store the values */ + c->stars.h_max = cell_h_max; + c->stars.dx_max_part = dx_max; + /* Update the time of the last drift */ c->grav.ti_old_part = ti_current; @@ -3094,8 +3474,26 @@ void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { } #endif - /* Note: no need to compute dx_max as all spart have a gpart */ - } + /* Limit h to within the allowed range */ + sp->h = min(sp->h, stars_h_max); + + /* Compute (square of) motion since last cell construction */ + const float dx2 = sp->x_diff[0] * sp->x_diff[0] + + sp->x_diff[1] * sp->x_diff[1] + + sp->x_diff[2] * sp->x_diff[2]; + dx2_max = max(dx2_max, dx2); + + /* Maximal smoothing length */ + cell_h_max = max(cell_h_max, sp->h); + + } /* Note: no need to compute dx_max as all spart have a gpart */ + + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); + + /* Store the values */ + c->stars.h_max = cell_h_max; + c->stars.dx_max_part = dx_max; /* Update the time of the last drift */ c->grav.ti_old_part = ti_current; diff --git a/src/cell.h b/src/cell.h index c273ecf56a67852a6e587aff692f96a8001cc37f..d0bcf1592bc9fc01b828517b6b0b560b7ec4f1cc 100644 --- a/src/cell.h +++ b/src/cell.h @@ -144,6 +144,9 @@ struct pcell { /*! Number of #spart in this cell. */ int count; + /*! Maximal smoothing length. */ + double h_max; + } stars; /*! Relative indices of the cell's progeny. */ @@ -188,6 +191,10 @@ struct pcell_step { /*! Stars variables */ struct { + + /*! Maximal distance any #part has travelled since last rebuild */ + float dx_max_part; + } stars; }; @@ -426,6 +433,18 @@ struct cell { /*! Nr of #spart in this cell. */ int count; + /*! Max smoothing length in this cell. */ + double h_max; + + /*! Values of h_max before the drifts, used for sub-cell tasks. */ + float h_max_old; + + /*! Maximum part movement in this cell since last construction. */ + float dx_max_part; + + /*! Values of dx_max before the drifts, used for sub-cell tasks. */ + float dx_max_part_old; + /*! Dependency implicit task for the star ghost (in->ghost->out)*/ struct task *ghost_in; @@ -662,8 +681,12 @@ cell_can_recurse_in_self_hydro_task(const struct cell *c) { __attribute__((always_inline)) INLINE static int cell_can_recurse_in_pair_stars_task(const struct cell *c) { - // LOIC: To implement - return 0; + /* Is the cell split ? */ + /* If so, is the cut-off radius plus the max distance the parts have moved */ + /* smaller than the sub-cell sizes ? */ + /* Note: We use the _old values as these might have been updated by a drift */ + return c->split && ((kernel_gamma * c->stars.h_max_old + + c->stars.dx_max_part_old) < 0.5f * c->dmin); } /** @@ -675,8 +698,8 @@ cell_can_recurse_in_pair_stars_task(const struct cell *c) { __attribute__((always_inline)) INLINE static int cell_can_recurse_in_self_stars_task(const struct cell *c) { - // LOIC: To implement - return 0; + /* Is the cell split and not smaller than the smoothing length? */ + return c->split && (kernel_gamma * c->stars.h_max_old < 0.5f * c->dmin); } /** @@ -724,8 +747,13 @@ __attribute__((always_inline)) INLINE static int cell_can_split_self_hydro_task( __attribute__((always_inline)) INLINE static int cell_can_split_pair_stars_task( const struct cell *c) { - // LOIC: To implement - return 0; + /* Is the cell split ? */ + /* If so, is the cut-off radius with some leeway smaller than */ + /* the sub-cell sizes ? */ + /* Note that since tasks are create after a rebuild no need to take */ + /* into account any part motion (i.e. dx_max == 0 here) */ + return c->split && + (space_stretch * kernel_gamma * c->stars.h_max < 0.5f * c->dmin); } /** @@ -737,8 +765,13 @@ __attribute__((always_inline)) INLINE static int cell_can_split_pair_stars_task( __attribute__((always_inline)) INLINE static int cell_can_split_self_stars_task( const struct cell *c) { - // LOIC: To implement - return 0; + /* Is the cell split ? */ + /* If so, is the cut-off radius with some leeway smaller than */ + /* the sub-cell sizes ? */ + /* Note: No need for more checks here as all the sub-pairs and sub-self */ + /* tasks will be created. So no need to check for h_max */ + return c->split && + (space_stretch * kernel_gamma * c->stars.h_max < 0.5f * c->dmin); } /** diff --git a/src/drift.h b/src/drift.h index ff0fea744012b7143afed2a05b286d4646cdd69a..351b15381dde9a95af908003b1c8df01b56610fa 100644 --- a/src/drift.h +++ b/src/drift.h @@ -137,6 +137,12 @@ __attribute__((always_inline)) INLINE static void drift_spart( sp->x[0] += sp->v[0] * dt_drift; sp->x[1] += sp->v[1] * dt_drift; sp->x[2] += sp->v[2] * dt_drift; + + /* Compute offsets since last cell construction */ + for (int k = 0; k < 3; k++) { + const float dx = sp->v[k] * dt_drift; + sp->x_diff[k] -= dx; + } } #endif /* SWIFT_DRIFT_H */ diff --git a/src/engine.c b/src/engine.c index 8bd453e088f735713f73d239c4fcc8e224a546c2..17010a1104b42dd854df1aeb764b8eeb994f9a0c 100644 --- a/src/engine.c +++ b/src/engine.c @@ -3329,14 +3329,11 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, } /** - * @brief Duplicates the first hydro loop and construct all the - * dependencies for the stars + * @brief Creates all the task dependencies for the stars * - * This is done by looping over all the previously constructed tasks - * and adding another task involving the same cells but this time - * corresponding to the second hydro loop over neighbours. - * With all the relevant tasks for a given cell available, we construct - * all the dependencies for that cell. + * @param map_data The tasks + * @param num_elements number of tasks + * @param extra_data The #engine */ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, void *extra_data) { @@ -3358,8 +3355,9 @@ void engine_link_stars_tasks_mapper(void *map_data, int num_elements, /* Now, build all the dependencies for the stars */ engine_make_stars_loops_dependencies(sched, t, t->ci); - scheduler_addunlock(sched, t->ci->stars.ghost_out, - t->ci->super->end_force); + if (t->ci == t->ci->super) + scheduler_addunlock(sched, t->ci->super->stars.ghost_out, + t->ci->super->end_force); } /* Otherwise, pair interaction? */ @@ -5062,6 +5060,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, #endif if (e->nodeID == 0) scheduler_write_dependencies(&e->sched, e->verbose); + if (e->nodeID == 0) scheduler_write_task_level(&e->sched); /* Run the 0th time-step */ TIMER_TIC2; @@ -5143,6 +5142,20 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, } } + if (s->cells_top != NULL && s->nr_sparts > 0) { + for (int i = 0; i < s->nr_cells; i++) { + struct cell *c = &s->cells_top[i]; + if (c->nodeID == engine_rank && c->stars.count > 0) { + float spart_h_max = c->stars.parts[0].h; + for (int k = 1; k < c->stars.count; k++) { + if (c->stars.parts[k].h > spart_h_max) + spart_h_max = c->stars.parts[k].h; + } + c->stars.h_max = max(spart_h_max, c->stars.h_max); + } + } + } + clocks_gettime(&time2); #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/map.c b/src/map.c index 6b693885b08e1c103faa0b1b33d53cdc996c8877..68c3618fcdb10a618a97e5d1a2565d58db677cdb 100644 --- a/src/map.c +++ b/src/map.c @@ -191,6 +191,13 @@ void map_h_max(struct part *p, struct cell *c, void *data) { if (p->h > (*p2)->h) *p2 = p; } +void map_stars_h_max(struct spart *p, struct cell *c, void *data) { + + struct spart **p2 = (struct spart **)data; + + if (p->h > (*p2)->h) *p2 = p; +} + /** * @brief Mapping function for neighbour count. */ diff --git a/src/map.h b/src/map.h index 950a5fd96ebdc7177b41912b1565163f33de8701..6ad05e30df0644e1ee37b1b912bc11681ccf837c 100644 --- a/src/map.h +++ b/src/map.h @@ -34,6 +34,7 @@ void map_wcount_min(struct part *p, struct cell *c, void *data); void map_wcount_max(struct part *p, struct cell *c, void *data); void map_h_min(struct part *p, struct cell *c, void *data); void map_h_max(struct part *p, struct cell *c, void *data); +void map_stars_h_max(struct spart *p, struct cell *c, void *data); void map_icount(struct part *p, struct cell *c, void *data); void map_dump(struct part *p, struct cell *c, void *data); diff --git a/src/runner.c b/src/runner.c index 33aaeb03ceebcae18440830882d75796e97234db..424f6e526ae0c9a5b2be4283b69f34b2bd8e11be 100644 --- a/src/runner.c +++ b/src/runner.c @@ -283,7 +283,7 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { for (struct cell *finger = c; finger != NULL; finger = finger->parent) { /* Run through this cell's density interactions. */ - for (struct link *l = finger->hydro.density; l != NULL; l = l->next) { + for (struct link *l = finger->stars.density; l != NULL; l = l->next) { #ifdef SWIFT_DEBUG_CHECKS if (l->t->ti_run < r->e->ti_current) @@ -2322,6 +2322,8 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { const size_t nr_sparts = c->stars.count; const integertime_t ti_current = r->e->ti_current; + error("Need to add h_max computation"); + TIMER_TIC; integertime_t ti_gravity_end_min = max_nr_timesteps; diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h index 5e656e1420134026ea6bc1d5c9c3df27887a4797..e696e4fd10853536008d9d9fafc90e6475fd291a 100644 --- a/src/runner_doiact_stars.h +++ b/src/runner_doiact_stars.h @@ -963,7 +963,7 @@ void runner_doself_branch_stars_density(struct runner *r, struct cell *c) { if (!cell_is_active_stars(c, e)) return; /* Did we mess up the recursion? */ - if (c->hydro.h_max_old * kernel_gamma > c->dmin) + if (c->stars.h_max_old * kernel_gamma > c->dmin) error("Cell smaller than smoothing length"); runner_doself_stars_density(r, c, 1); diff --git a/src/scheduler.c b/src/scheduler.c index 46be9eb4de6ddae7b00ee937bacea486a614e896..4c1c46a2c8cc6fa0697ac557e7560a1bcb128d2c 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -876,8 +876,6 @@ static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) { */ static void scheduler_splittask_stars(struct task *t, struct scheduler *s) { - // LOIC: This is un-tested. Need to verify that it works. - /* Iterate on this task until we're done with it. */ int redo = 1; while (redo) { @@ -1829,6 +1827,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { const float gcount_i = (t->ci != NULL) ? t->ci->grav.count : 0.f; const float gcount_j = (t->cj != NULL) ? t->cj->grav.count : 0.f; const float scount_i = (t->ci != NULL) ? t->ci->stars.count : 0.f; + const float scount_j = (t->cj != NULL) ? t->cj->stars.count : 0.f; switch (t->type) { case task_type_sort: @@ -1837,22 +1836,30 @@ void scheduler_reweight(struct scheduler *s, int verbose) { break; case task_type_self: - // LOIC: Need to do something for stars here if (t->subtype == task_subtype_grav) cost = 1.f * (wscale * gcount_i) * gcount_i; else if (t->subtype == task_subtype_external_grav) cost = 1.f * wscale * gcount_i; + else if (t->subtype == task_subtype_stars_density) + cost = 1.f * wscale * scount_i * count_i; else cost = 1.f * (wscale * count_i) * count_i; break; case task_type_pair: - // LOIC: Need to do something for stars here if (t->subtype == task_subtype_grav) { if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) cost = 3.f * (wscale * gcount_i) * gcount_j; else cost = 2.f * (wscale * gcount_i) * gcount_j; + } else if (t->subtype == task_subtype_stars_density) { + if (t->ci->nodeID != nodeID) + cost = 3.f * wscale * count_i * scount_j * sid_scale[t->flags]; + else if (t->cj->nodeID != nodeID) + cost = 3.f * wscale * scount_i * count_j * sid_scale[t->flags]; + else + cost = 2.f * wscale * (scount_i * count_j + scount_j * count_i) * + sid_scale[t->flags]; } else { if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) cost = 3.f * (wscale * count_i) * count_j * sid_scale[t->flags]; @@ -1862,23 +1869,34 @@ void scheduler_reweight(struct scheduler *s, int verbose) { break; case task_type_sub_pair: - // LOIC: Need to do something for stars here - if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { #ifdef SWIFT_DEBUG_CHECKS - if (t->flags < 0) error("Negative flag value!"); + if (t->flags < 0) error("Negative flag value!"); #endif - cost = 3.f * (wscale * count_i) * count_j * sid_scale[t->flags]; + if (t->subtype == task_subtype_stars_density) { + if (t->ci->nodeID != nodeID) { + cost = 3.f * (wscale * count_i) * scount_j * sid_scale[t->flags]; + } else if (t->cj->nodeID != nodeID) { + cost = 3.f * (wscale * scount_i) * count_j * sid_scale[t->flags]; + } else { + cost = 2.f * wscale * (scount_i * count_j + scount_j * count_i) * + sid_scale[t->flags]; + } + } else { -#ifdef SWIFT_DEBUG_CHECKS - if (t->flags < 0) error("Negative flag value!"); -#endif - cost = 2.f * (wscale * count_i) * count_j * sid_scale[t->flags]; + if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { + cost = 3.f * (wscale * count_i) * count_j * sid_scale[t->flags]; + } else { + cost = 2.f * (wscale * count_i) * count_j * sid_scale[t->flags]; + } } break; case task_type_sub_self: - // LOIC: Need to do something for stars here - cost = 1.f * (wscale * count_i) * count_i; + if (t->subtype == task_subtype_stars_density) { + cost = 1.f * (wscale * scount_i) * count_i; + } else { + cost = 1.f * (wscale * count_i) * count_i; + } break; case task_type_ghost: if (t->ci == t->ci->hydro.super) cost = wscale * count_i; @@ -2526,3 +2544,61 @@ void scheduler_free_tasks(struct scheduler *s) { } s->size = 0; } + +/** + * @brief write down each task level + */ +void scheduler_write_task_level(const struct scheduler *s) { + /* init */ + const int max_depth = 30; + const struct task *tasks = s->tasks; + int nr_tasks = s->nr_tasks; + + /* Init counter */ + int size = task_type_count * task_subtype_count * max_depth; + int *count = (int *)malloc(size * sizeof(int)); + if (count == NULL) error("Failed to allocate memory"); + + for (int i = 0; i < size; i++) count[i] = 0; + + /* Count tasks */ + for (int i = 0; i < nr_tasks; i++) { + const struct task *t = &tasks[i]; + if (t->ci) { + + if ((int)t->ci->depth >= max_depth) + error("Cell is too deep, you need to increase max_depth"); + + int ind = t->type * task_subtype_count * max_depth; + ind += t->subtype * max_depth; + ind += (int)t->ci->depth; + + count[ind] += 1; + } + } + + /* Open file */ + char filename[200] = "task_level.txt"; + FILE *f = fopen(filename, "w"); + if (f == NULL) error("Error opening task level file."); + + /* Print header */ + fprintf(f, "# task_type, task_subtype, depth, count\n"); + + /* Print tasks level */ + for (int i = 0; i < size; i++) { + if (count[i] == 0) continue; + + int type = i / (task_subtype_count * max_depth); + int subtype = i - task_subtype_count * max_depth * type; + subtype /= max_depth; + int depth = i - task_subtype_count * max_depth * type; + depth -= subtype * max_depth; + fprintf(f, "%s %s %i %i\n", taskID_names[type], subtaskID_names[subtype], + depth, count[i]); + } + + /* clean up */ + fclose(f); + free(count); +} diff --git a/src/scheduler.h b/src/scheduler.h index 1a75544de12b8402e553e3ae2b84e2d8a65c56e8..f1e130c6ce2a8538b0126e86ee0cbd79cf5a0e0d 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -173,5 +173,6 @@ void scheduler_print_tasks(const struct scheduler *s, const char *fileName); void scheduler_clean(struct scheduler *s); void scheduler_free_tasks(struct scheduler *s); void scheduler_write_dependencies(struct scheduler *s, int verbose); +void scheduler_write_task_level(const struct scheduler *s); #endif /* SWIFT_SCHEDULER_H */ diff --git a/src/space.c b/src/space.c index e6c78d782ac6bd276414502b3d6f517d694d200e..5d75e06027b4cbd63bd49446b783a37c03fefa0c 100644 --- a/src/space.c +++ b/src/space.c @@ -174,6 +174,7 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->grav.mm = NULL; c->hydro.dx_max_part = 0.0f; c->hydro.dx_max_sort = 0.0f; + c->stars.dx_max_part = 0.f; c->hydro.sorted = 0; c->hydro.count = 0; c->grav.count = 0; @@ -266,6 +267,7 @@ void space_free_cells(struct space *s) { void space_regrid(struct space *s, int verbose) { const size_t nr_parts = s->nr_parts; + const size_t nr_sparts = s->nr_sparts; const ticks tic = getticks(); const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0; @@ -279,6 +281,9 @@ void space_regrid(struct space *s, int verbose) { if (c->hydro.h_max > h_max) { h_max = s->cells_top[k].hydro.h_max; } + if (c->stars.h_max > h_max) { + h_max = s->cells_top[k].stars.h_max; + } } } else if (s->cells_top != NULL) { for (int k = 0; k < s->nr_cells; k++) { @@ -286,11 +291,17 @@ void space_regrid(struct space *s, int verbose) { if (c->nodeID == engine_rank && c->hydro.h_max > h_max) { h_max = s->cells_top[k].hydro.h_max; } + if (c->nodeID == engine_rank && c->stars.h_max > h_max) { + h_max = s->cells_top[k].stars.h_max; + } } } else { for (size_t k = 0; k < nr_parts; k++) { if (s->parts[k].h > h_max) h_max = s->parts[k].h; } + for (size_t k = 0; k < nr_sparts; k++) { + if (s->sparts[k].h > h_max) h_max = s->sparts[k].h; + } } } @@ -1787,6 +1798,7 @@ void space_split_recursive(struct space *s, struct cell *c, const int depth = c->depth; int maxdepth = 0; float h_max = 0.0f; + float stars_h_max = 0.f; integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, ti_hydro_beg_max = 0; integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, @@ -1877,6 +1889,8 @@ void space_split_recursive(struct space *s, struct cell *c, cp->hydro.h_max = 0.f; cp->hydro.dx_max_part = 0.f; cp->hydro.dx_max_sort = 0.f; + cp->stars.h_max = 0.f; + cp->stars.dx_max_part = 0.f; cp->nodeID = c->nodeID; cp->parent = c; cp->super = NULL; @@ -1925,6 +1939,7 @@ void space_split_recursive(struct space *s, struct cell *c, /* Update the cell-wide properties */ h_max = max(h_max, cp->hydro.h_max); + stars_h_max = max(h_max, cp->stars.h_max); ti_hydro_end_min = min(ti_hydro_end_min, cp->hydro.ti_end_min); ti_hydro_end_max = max(ti_hydro_end_max, cp->hydro.ti_end_max); ti_hydro_beg_max = max(ti_hydro_beg_max, cp->hydro.ti_beg_max); @@ -2094,6 +2109,12 @@ void space_split_recursive(struct space *s, struct cell *c, #endif gravity_time_bin_min = min(gravity_time_bin_min, sparts[k].time_bin); gravity_time_bin_max = max(gravity_time_bin_max, sparts[k].time_bin); + stars_h_max = max(stars_h_max, sparts[k].h); + + /* Reset x_diff */ + sparts[k].x_diff[0] = 0.f; + sparts[k].x_diff[1] = 0.f; + sparts[k].x_diff[2] = 0.f; } /* Convert into integer times */ @@ -2142,6 +2163,7 @@ void space_split_recursive(struct space *s, struct cell *c, c->grav.ti_end_min = ti_gravity_end_min; c->grav.ti_end_max = ti_gravity_end_max; c->grav.ti_beg_max = ti_gravity_beg_max; + c->stars.h_max = stars_h_max; c->maxdepth = maxdepth; /* Set ownership according to the start of the parts array. */ diff --git a/src/stars/Default/stars_part.h b/src/stars/Default/stars_part.h index fe5be3c7f6f190adea2c81ee14c0cad7a10fbd2c..1922cc0e260a3c0f2052649a87252019514e637f 100644 --- a/src/stars/Default/stars_part.h +++ b/src/stars/Default/stars_part.h @@ -38,6 +38,9 @@ struct spart { /*! Particle position. */ double x[3]; + /* Offset between current position and position at last tree rebuild. */ + float x_diff[3]; + /*! Particle velocity. */ float v[3];