diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml index cc5d87ca025fbb950b8b5c46c62831c5ec5bb003..53eb99765d859a26dcdeb8c9148fb74c058c1544 100644 --- a/examples/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_12/eagle_12.yml @@ -27,7 +27,7 @@ Statistics: Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. epsilon: 0.001 # Softening length (in internal units). - theta: 0.7 # Opening angle (Multipole acceptance criterion) + theta: 0.85 # Opening angle (Multipole acceptance criterion) # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/EAGLE_25/eagle_25.yml b/examples/EAGLE_25/eagle_25.yml index 5cced70ed7b94ab572492eb57eb929925bb3157a..c01abfe94b98e8366ac05f1e3bce50a3931738c5 100644 --- a/examples/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_25/eagle_25.yml @@ -13,6 +13,9 @@ TimeIntegration: dt_min: 1e-10 # The minimal time-step size of the simulation (in internal units). dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units). +Scheduler: + max_top_level_cells: 20 + # Parameters governing the snapshots Snapshots: basename: eagle # Common part of the name of output files @@ -26,8 +29,8 @@ Statistics: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - epsilon: 0.0001 # Softening length (in internal units). - theta: 0.7 # Opening angle (Multipole acceptance criterion) + epsilon: 0.001 # Softening length (in internal units). + theta: 0.85 # Opening angle (Multipole acceptance criterion) # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/EAGLE_50/eagle_50.yml b/examples/EAGLE_50/eagle_50.yml index 05199ae8e7e36164e592ad94ac608540381b485f..2a6b1ef156f220ebc63bded8bbd80b21fcf6cde2 100644 --- a/examples/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_50/eagle_50.yml @@ -26,8 +26,8 @@ Statistics: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - epsilon: 0.0001 # Softening length (in internal units). - theta: 0.7 # Opening angle (Multipole acceptance criterion) + epsilon: 0.001 # Softening length (in internal units). + theta: 0.85 # Opening angle (Multipole acceptance criterion) # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/EAGLE_6/eagle_6.yml b/examples/EAGLE_6/eagle_6.yml index cf00a17694e9848825d96e1e5267961908dbbbb9..6586d0d5bb5467ae55d18c43f83f01b4c715ec86 100644 --- a/examples/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_6/eagle_6.yml @@ -6,6 +6,10 @@ InternalUnitSystem: UnitCurrent_in_cgs: 1 # Amperes UnitTemp_in_cgs: 1 # Kelvin +Scheduler: + max_top_level_cells: 8 + tasks_per_cell: 50 + # Parameters governing the time integration TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). @@ -26,8 +30,8 @@ Statistics: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.85 # Opening angle (Multipole acceptance criterion) epsilon: 0.001 # Softening length (in internal units). + theta: 0.85 # Opening angle (Multipole acceptance criterion) # Parameters for the hydrodynamics scheme SPH: diff --git a/examples/UniformDMBox/uniformBox.yml b/examples/UniformDMBox/uniformBox.yml index e59d677b308ca70f212f74c7e4d8b79f015c77a9..1abb256671f1cc8c87daa711bd63f7ea6abdbbab 100644 --- a/examples/UniformDMBox/uniformBox.yml +++ b/examples/UniformDMBox/uniformBox.yml @@ -9,9 +9,9 @@ InternalUnitSystem: # Parameters governing the time integration TimeIntegration: time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 1. # The end time of the simulation (in internal units). + time_end: 100. # The end time of the simulation (in internal units). dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + dt_max: 1. # The maximal time-step size of the simulation (in internal units). Scheduler: max_top_level_cells: 8 @@ -21,18 +21,18 @@ Scheduler: Snapshots: basename: uniformDMBox # Common part of the name of output files time_first: 0. # Time of the first output (in internal units) - delta_time: 0.01 # Time difference between consecutive outputs (in internal units) + delta_time: 10. # Time difference between consecutive outputs (in internal units) # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. - theta: 0.7 # Opening angle (Multipole acceptance criterion) - epsilon: 0.00001 # Softening length (in internal units). + theta: 0.8 # Opening angle (Multipole acceptance criterion) + epsilon: 0.01 # Softening length (in internal units). # Parameters governing the conserved quantities statistics Statistics: - delta_time: 1e-2 # Time between statistics output + delta_time: 5. # Time between statistics output # Parameters related to the initial conditions InitialConditions: - file_name: ./uniformDMBox_50.hdf5 # The file to read + file_name: ./uniformDMBox_16.hdf5 # The file to read diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py index a689f007d4a6001b73ccfac9d55d47d2863cbf5c..bd20d0f796598d8a9f0b3e906bbb6048ac03b866 100755 --- a/examples/analyse_tasks.py +++ b/examples/analyse_tasks.py @@ -52,11 +52,10 @@ infile = args.input # Tasks and subtypes. Indexed as in tasks.h. TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", - "init_grav", "ghost", "extra_ghost", "drift_part", - "drift_gpart", "kick1", "kick2", "timestep", "send", "recv", - "grav_top_level", "grav_long_range", "grav_mm", "grav_down", - "cooling", "sourceterms", "count"] - + "init_grav", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart", + "kick1", "kick2", "timestep", "send", "recv", "grav_top_level", + "grav_long_range", "grav_ghost_in", "grav_ghost_out", "grav_mm", "grav_down", "cooling", + "sourceterms", "count"] SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", "tend", "xv", "rho", "gpart", "multipole", "spart", "count"] diff --git a/examples/check_ngbs.py b/examples/check_ngbs.py index 6b403c6a4555c442e3004921748c57890cdb2e0f..fca975e76457445480eb7b19e60e8f9174dee941 100644 --- a/examples/check_ngbs.py +++ b/examples/check_ngbs.py @@ -18,8 +18,6 @@ inputFile2 = "" # Check list of density neighbours and check that they are correct. def check_density_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h, num_invalid, acc): - error_val = False - for k in range(0,num_invalid): # Filter neighbour lists for valid particle ids @@ -28,7 +26,21 @@ def check_density_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h, nu # Check neighbour lists for differences id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) + + # Check for duplicate IDs + duplicate_check_naive = len(filter_neigh_naive) != len(set(filter_neigh_naive)) + duplicate_check_sort = len(filter_neigh_sort) != len(set(filter_neigh_sort)) + + if duplicate_check_naive: + print "Duplicate neighbour ID found in: ", inputFile1 + print filter_neigh_naive + return True + if duplicate_check_sort: + print "Duplicate neighbour ID found in: ", inputFile2 + print filter_neigh_sort + return True + pid = pids[mask][k] # Loop over discrepancies and check if they are actually neighbours @@ -53,9 +65,9 @@ def check_density_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h, nu if diff < acc * hig2: print "Missing interaction due to precision issue will be ignored." else: - error_val = True + return True - return error_val + return False # Check list of force neighbours and check that they are correct. def check_force_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h, num_invalid, acc): diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index 6f9dca798b5a72a7adea3c20ae74c1071fe30d40..60c70cf9f8167ed44829765bd5cf476c2a800f84 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -110,9 +110,9 @@ pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", - "init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart", + "init_grav", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart", "kick1", "kick2", "timestep", "send", "recv", "grav_top_level", - "grav_long_range", "grav_mm", "grav_down", "cooling", + "grav_long_range", "grav_ghost_in", "grav_ghost_out", "grav_mm", "grav_down", "cooling", "sourceterms", "count"] SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", @@ -123,7 +123,7 @@ FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force", "sub_self/density", "pair/force", "pair/density", "pair/grav", "sub_pair/force", "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", - "recv/tend", "send/tend"] + "recv/tend", "send/tend", "recv/gpart", "send/gpart"] # A number of colours for the various types. Recycled when there are # more task types than colours... diff --git a/src/active.h b/src/active.h index f9ed4a0d40127f1544c744a61b05515a19857b05..9eb4123392f4b1b6bca2f68d59991ff90b92614d 100644 --- a/src/active.h +++ b/src/active.h @@ -82,19 +82,19 @@ __attribute__((always_inline)) INLINE static int cell_are_gpart_drifted( * @param e The #engine containing information about the current time. * @return 1 if the #cell contains at least an active particle, 0 otherwise. */ -__attribute__((always_inline)) INLINE static int cell_is_active( +__attribute__((always_inline)) INLINE static int cell_is_active_hydro( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_end_min < e->ti_current) + if (c->ti_hydro_end_min < e->ti_current) error( "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " "e->ti_current=%lld (t=%e)", - c->ti_end_min, c->ti_end_min * e->timeBase, e->ti_current, + c->ti_hydro_end_min, c->ti_hydro_end_min * e->timeBase, e->ti_current, e->ti_current * e->timeBase); #endif - return (c->ti_end_min == e->ti_current); + return (c->ti_hydro_end_min == e->ti_current); } /** @@ -104,18 +104,61 @@ __attribute__((always_inline)) INLINE static int cell_is_active( * @param e The #engine containing information about the current time. * @return 1 if all particles in a #cell are active, 0 otherwise. */ -__attribute__((always_inline)) INLINE static int cell_is_all_active( +__attribute__((always_inline)) INLINE static int cell_is_all_active_hydro( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_end_max < e->ti_current) + if (c->ti_hydro_end_max < e->ti_current) error( "cell in an impossible time-zone! c->ti_end_max=%lld " "e->ti_current=%lld", - c->ti_end_max, e->ti_current); + c->ti_hydro_end_max, e->ti_current); #endif - return (c->ti_end_max == e->ti_current); + return (c->ti_hydro_end_max == e->ti_current); +} + +/** + * @brief Does a cell contain any g-particle finishing their time-step now ? + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell contains at least an active particle, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_is_active_gravity( + const struct cell *c, const struct engine *e) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->ti_gravity_end_min < e->ti_current) + error( + "cell in an impossible time-zone! c->ti_end_min=%lld (t=%e) and " + "e->ti_current=%lld (t=%e)", + c->ti_gravity_end_min, c->ti_gravity_end_min * e->timeBase, + e->ti_current, e->ti_current * e->timeBase); +#endif + + return (c->ti_gravity_end_min == e->ti_current); +} + +/** + * @brief Are *all* g-particles in a cell finishing their time-step now ? + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if all particles in a #cell are active, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_is_all_active_gravity( + const struct cell *c, const struct engine *e) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->ti_gravity_end_max < e->ti_current) + error( + "cell in an impossible time-zone! c->ti_end_max=%lld " + "e->ti_current=%lld", + c->ti_gravity_end_max, e->ti_current); +#endif + + return (c->ti_gravity_end_max == e->ti_current); } /** @@ -215,19 +258,41 @@ __attribute__((always_inline)) INLINE static int spart_is_active( * @param e The #engine containing information about the current time. * @return 1 if the #cell contains at least an active particle, 0 otherwise. */ -__attribute__((always_inline)) INLINE static int cell_is_starting( +__attribute__((always_inline)) INLINE static int cell_is_starting_hydro( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_beg_max > e->ti_current) + if (c->ti_hydro_beg_max > e->ti_current) error( "cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and " "e->ti_current=%lld (t=%e)", - c->ti_beg_max, c->ti_beg_max * e->timeBase, e->ti_current, + c->ti_hydro_beg_max, c->ti_hydro_beg_max * e->timeBase, e->ti_current, e->ti_current * e->timeBase); #endif - return (c->ti_beg_max == e->ti_current); + return (c->ti_hydro_beg_max == e->ti_current); +} + +/** + * @brief Does a cell contain any g-particle starting their time-step now ? + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell contains at least an active particle, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_is_starting_gravity( + const struct cell *c, const struct engine *e) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->ti_gravity_beg_max > e->ti_current) + error( + "cell in an impossible time-zone! c->ti_beg_max=%lld (t=%e) and " + "e->ti_current=%lld (t=%e)", + c->ti_gravity_beg_max, c->ti_gravity_beg_max * e->timeBase, + e->ti_current, e->ti_current * e->timeBase); +#endif + + return (c->ti_gravity_beg_max == e->ti_current); } /** diff --git a/src/cell.c b/src/cell.c index 9c5d1bd3aae8a0982db79c8fd370354c5c840dac..9aa2e80636d98a829371fed06a1170f28cb85aa8 100644 --- a/src/cell.c +++ b/src/cell.c @@ -173,14 +173,20 @@ int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { /* Start by packing the data of the current cell. */ pc->h_max = c->h_max; - pc->ti_end_min = c->ti_end_min; - pc->ti_end_max = c->ti_end_max; + pc->ti_hydro_end_min = c->ti_hydro_end_min; + pc->ti_hydro_end_max = c->ti_hydro_end_max; + pc->ti_gravity_end_min = c->ti_gravity_end_min; + pc->ti_gravity_end_max = c->ti_gravity_end_max; pc->ti_old_part = c->ti_old_part; pc->ti_old_gpart = c->ti_old_gpart; + pc->ti_old_multipole = c->ti_old_multipole; pc->count = c->count; pc->gcount = c->gcount; pc->scount = c->scount; c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag; +#ifdef SWIFT_DEBUG_CHECKS + pc->cellID = c->cellID; +#endif /* Fill in the progeny, depth-first recursion. */ int count = 1; @@ -217,14 +223,20 @@ int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, /* Unpack the current pcell. */ c->h_max = pc->h_max; - c->ti_end_min = pc->ti_end_min; - c->ti_end_max = pc->ti_end_max; + c->ti_hydro_end_min = pc->ti_hydro_end_min; + c->ti_hydro_end_max = pc->ti_hydro_end_max; + c->ti_gravity_end_min = pc->ti_gravity_end_min; + c->ti_gravity_end_max = pc->ti_gravity_end_max; c->ti_old_part = pc->ti_old_part; c->ti_old_gpart = pc->ti_old_gpart; + c->ti_old_multipole = pc->ti_old_multipole; c->count = pc->count; c->gcount = pc->gcount; c->scount = pc->scount; c->tag = pc->tag; +#ifdef SWIFT_DEBUG_CHECKS + c->cellID = pc->cellID; +#endif /* Number of new cells created. */ int count = 1; @@ -283,7 +295,10 @@ int cell_pack_end_step(struct cell *restrict c, #ifdef WITH_MPI /* Pack this cell's data. */ - pcells[0].ti_end_min = c->ti_end_min; + pcells[0].ti_hydro_end_min = c->ti_hydro_end_min; + pcells[0].ti_hydro_end_max = c->ti_hydro_end_max; + pcells[0].ti_gravity_end_min = c->ti_gravity_end_min; + pcells[0].ti_gravity_end_max = c->ti_gravity_end_max; pcells[0].dx_max_part = c->dx_max_part; pcells[0].dx_max_gpart = c->dx_max_gpart; @@ -317,7 +332,10 @@ int cell_unpack_end_step(struct cell *restrict c, #ifdef WITH_MPI /* Unpack this cell's data. */ - c->ti_end_min = pcells[0].ti_end_min; + c->ti_hydro_end_min = pcells[0].ti_hydro_end_min; + c->ti_hydro_end_max = pcells[0].ti_hydro_end_max; + c->ti_gravity_end_min = pcells[0].ti_gravity_end_min; + c->ti_gravity_end_max = pcells[0].ti_gravity_end_max; c->dx_max_part = pcells[0].dx_max_part; c->dx_max_gpart = pcells[0].dx_max_gpart; @@ -337,6 +355,71 @@ int cell_unpack_end_step(struct cell *restrict c, #endif } +/** + * @brief Pack the multipole information of the given cell and all it's + * sub-cells. + * + * @param c The #cell. + * @param pcells (output) The multipole information we pack into + * + * @return The number of packed cells. + */ +int cell_pack_multipoles(struct cell *restrict c, + struct gravity_tensors *restrict pcells) { + +#ifdef WITH_MPI + + /* Pack this cell's data. */ + pcells[0] = *c->multipole; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_pack_multipoles(c->progeny[k], &pcells[count]); + } + + /* Return the number of packed values. */ + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + +/** + * @brief Unpack the multipole information of a given cell and its sub-cells. + * + * @param c The #cell + * @param pcells The multipole information to unpack + * + * @return The number of cells created. + */ +int cell_unpack_multipoles(struct cell *restrict c, + struct gravity_tensors *restrict pcells) { + +#ifdef WITH_MPI + + /* Unpack this cell's data. */ + *c->multipole = pcells[0]; + + /* Fill in the progeny, depth-first recursion. */ + int count = 1; + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + count += cell_unpack_multipoles(c->progeny[k], &pcells[count]); + } + + /* Return the number of packed values. */ + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + /** * @brief Lock a cell for access to its array of #part and hold its parents. * @@ -1108,9 +1191,6 @@ void cell_check_multipole_drift_point(struct cell *c, void *data) { const integertime_t ti_drift = *(integertime_t *)data; - /* Only check local cells */ - if (c->nodeID != engine_rank) return; - if (c->ti_old_multipole != ti_drift) error( "Cell multipole in an incorrect time-zone! c->ti_old_multipole=%lld " @@ -1321,13 +1401,13 @@ void cell_activate_drift_part(struct cell *c, struct scheduler *s) { /* Set the do_sub_drifts all the way up and activate the super drift if this has not yet been done. */ - if (c == c->super) { + if (c == c->super_hydro) { scheduler_activate(s, c->drift_part); } else { for (struct cell *parent = c->parent; parent != NULL && !parent->do_sub_drift; parent = parent->parent) { parent->do_sub_drift = 1; - if (parent == c->super) { + if (parent == c->super_hydro) { scheduler_activate(s, parent->drift_part); break; } @@ -1348,14 +1428,14 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) { /* Set the do_grav_sub_drifts all the way up and activate the super drift if this has not yet been done. */ - if (c == c->super) { + if (c == c->super_gravity) { scheduler_activate(s, c->drift_gpart); } else { for (struct cell *parent = c->parent; parent != NULL && !parent->do_grav_sub_drift; parent = parent->parent) { parent->do_grav_sub_drift = 1; - if (parent == c->super) { + if (parent == c->super_gravity) { scheduler_activate(s, parent->drift_gpart); break; } @@ -1367,14 +1447,14 @@ void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) { * @brief Activate the sorts up a cell hierarchy. */ void cell_activate_sorts_up(struct cell *c, struct scheduler *s) { - if (c == c->super) { + if (c == c->super_hydro) { scheduler_activate(s, c->sorts); if (c->nodeID == engine_rank) cell_activate_drift_part(c, s); } else { for (struct cell *parent = c->parent; parent != NULL && !parent->do_sub_sort; parent = parent->parent) { parent->do_sub_sort = 1; - if (parent == c->super) { + if (parent == c->super_hydro) { scheduler_activate(s, parent->sorts); if (parent->nodeID == engine_rank) cell_activate_drift_part(parent, s); break; @@ -1417,8 +1497,8 @@ void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) { * @param cj The second #cell we recurse in. * @param s The task #scheduler. */ -void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, - struct scheduler *s) { +void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s) { const struct engine *e = s->space->e; /* Store the current dx_max and h_max values. */ @@ -1432,7 +1512,7 @@ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, /* Self interaction? */ if (cj == NULL) { /* Do anything? */ - if (!cell_is_active(ci, e)) return; + if (!cell_is_active_hydro(ci, e)) return; /* Recurse? */ if (cell_can_recurse_in_self_task(ci)) { @@ -1440,10 +1520,11 @@ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, /* Loop over all progenies and pairs of progenies */ for (int j = 0; j < 8; j++) { if (ci->progeny[j] != NULL) { - cell_activate_subcell_tasks(ci->progeny[j], NULL, s); + cell_activate_subcell_hydro_tasks(ci->progeny[j], NULL, s); for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL) - cell_activate_subcell_tasks(ci->progeny[j], ci->progeny[k], s); + cell_activate_subcell_hydro_tasks(ci->progeny[j], ci->progeny[k], + s); } } } else { @@ -1467,200 +1548,200 @@ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, /* Regular sub-cell interactions of a single cell. */ case 0: /* ( 1 , 1 , 1 ) */ if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s); break; case 1: /* ( 1 , 1 , 0 ) */ if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[0], s); if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s); if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s); if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[1], s); break; case 2: /* ( 1 , 1 , -1 ) */ if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s); break; case 3: /* ( 1 , 0 , 1 ) */ if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[0], s); if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s); if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s); if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[2], s); break; case 4: /* ( 1 , 0 , 0 ) */ if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[0], s); if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[1], s); if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[2], s); if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[3], s); if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[0], s); if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[1], s); if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s); if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[3], s); if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[0], s); if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s); if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[2], s); if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[3], s); if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s); if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[1], s); if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[2], s); if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[3], s); break; case 5: /* ( 1 , 0 , -1 ) */ if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[1], s); if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[3], s); if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s); if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[3], s); break; case 6: /* ( 1 , -1 , 1 ) */ if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s); break; case 7: /* ( 1 , -1 , 0 ) */ if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[2], s); if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[3], s); if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s); if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[3], s); break; case 8: /* ( 1 , -1 , -1 ) */ if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - cell_activate_subcell_tasks(ci->progeny[4], cj->progeny[3], s); + cell_activate_subcell_hydro_tasks(ci->progeny[4], cj->progeny[3], s); break; case 9: /* ( 0 , 1 , 1 ) */ if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[0], s); if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[4], s); if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s); if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[4], s); break; case 10: /* ( 0 , 1 , 0 ) */ if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[0], s); if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[1], s); if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[4], s); if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) - cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[5], s); + cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[5], s); if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[0], s); if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[1], s); if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[4], s); if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[5], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[5], s); if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[0], s); if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s); if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[4], s); if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[5], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[5], s); if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s); if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[1], s); if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[4], s); if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[5], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[5], s); break; case 11: /* ( 0 , 1 , -1 ) */ if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[1], s); if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) - cell_activate_subcell_tasks(ci->progeny[2], cj->progeny[5], s); + cell_activate_subcell_hydro_tasks(ci->progeny[2], cj->progeny[5], s); if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[1], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[1], s); if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) - cell_activate_subcell_tasks(ci->progeny[6], cj->progeny[5], s); + cell_activate_subcell_hydro_tasks(ci->progeny[6], cj->progeny[5], s); break; case 12: /* ( 0 , 0 , 1 ) */ if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[1], cj->progeny[0], s); if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[1], cj->progeny[2], s); if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[1], cj->progeny[4], s); if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) - cell_activate_subcell_tasks(ci->progeny[1], cj->progeny[6], s); + cell_activate_subcell_hydro_tasks(ci->progeny[1], cj->progeny[6], s); if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[0], s); if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[2], s); if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[4], s); if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) - cell_activate_subcell_tasks(ci->progeny[3], cj->progeny[6], s); + cell_activate_subcell_hydro_tasks(ci->progeny[3], cj->progeny[6], s); if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[0], s); if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[2], s); if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[4], s); if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) - cell_activate_subcell_tasks(ci->progeny[5], cj->progeny[6], s); + cell_activate_subcell_hydro_tasks(ci->progeny[5], cj->progeny[6], s); if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[0], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[0], s); if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[2], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[2], s); if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[4], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[4], s); if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) - cell_activate_subcell_tasks(ci->progeny[7], cj->progeny[6], s); + cell_activate_subcell_hydro_tasks(ci->progeny[7], cj->progeny[6], s); break; } } /* Otherwise, activate the sorts and drifts. */ - else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) { /* Get the type of pair if not specified explicitly. */ double shift[3]; @@ -1704,7 +1785,7 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, if (cj == NULL) { /* Do anything? */ - if (!cell_is_active(ci, e)) return; + if (!cell_is_active_gravity(ci, e)) return; /* Recurse? */ if (ci->split) { @@ -1730,7 +1811,8 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, else { /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) + return; if (ci->ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); if (cj->ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); @@ -1764,7 +1846,7 @@ void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, else if (!ci->split && !cj->split) { /* Activate the drifts if the cells are local. */ - if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + if (cell_is_active_gravity(ci, e) || cell_is_active_gravity(cj, e)) { if (ci->nodeID == engine_rank) cell_activate_drift_gpart(ci, s); if (cj->nodeID == engine_rank) cell_activate_drift_gpart(cj, s); } @@ -1833,7 +1915,7 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci, const struct engine *e = sp->e; /* Do anything? */ - if (!cell_is_active(ci, e)) return; + if (!cell_is_active_gravity(ci, e)) return; /* Recurse? */ if (ci->split) { @@ -1852,7 +1934,7 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci, } /** - * @brief Un-skips all the tasks associated with a given cell and checks + * @brief Un-skips all the hydro tasks associated with a given cell and checks * if the space needs to be rebuilt. * * @param c the #cell. @@ -1860,7 +1942,7 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci, * * @return 1 If the space needs rebuilding. 0 otherwise. */ -int cell_unskip_tasks(struct cell *c, struct scheduler *s) { +int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { struct engine *e = s->space->e; const int nodeID = e->nodeID; @@ -1871,8 +1953,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { struct task *t = l->t; struct cell *ci = t->ci; struct cell *cj = t->cj; - const int ci_active = cell_is_active(ci, e); - const int cj_active = (cj != NULL) ? cell_is_active(cj, e) : 0; + const int ci_active = cell_is_active_hydro(ci, e); + const int cj_active = (cj != NULL) ? cell_is_active_hydro(cj, e) : 0; /* Only activate tasks that involve a local active cell. */ if ((ci_active && ci->nodeID == nodeID) || @@ -1902,7 +1984,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { } /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair || t->type == task_type_sub_self) { - cell_activate_subcell_tasks(t->ci, t->cj, s); + cell_activate_subcell_hydro_tasks(t->ci, t->cj, s); } } @@ -1922,6 +2004,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, ci->recv_xv); if (ci_active) { scheduler_activate(s, ci->recv_rho); + #ifdef EXTRA_HYDRO_LOOP scheduler_activate(s, ci->recv_gradient); #endif @@ -1960,6 +2043,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, cj->recv_xv); if (cj_active) { scheduler_activate(s, cj->recv_rho); + #ifdef EXTRA_HYDRO_LOOP scheduler_activate(s, cj->recv_gradient); #endif @@ -1996,15 +2080,54 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { } } + /* Unskip all the other task types. */ + if (c->nodeID == nodeID && cell_is_active_hydro(c, e)) { + + for (struct link *l = c->gradient; l != NULL; l = l->next) + scheduler_activate(s, l->t); + for (struct link *l = c->force; l != NULL; l = l->next) + scheduler_activate(s, l->t); + + if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); + if (c->ghost_in != NULL) scheduler_activate(s, c->ghost_in); + if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out); + if (c->ghost != NULL) scheduler_activate(s, c->ghost); + if (c->kick1 != NULL) scheduler_activate(s, c->kick1); + if (c->kick2 != NULL) scheduler_activate(s, c->kick2); + if (c->timestep != NULL) scheduler_activate(s, c->timestep); + if (c->cooling != NULL) scheduler_activate(s, c->cooling); + if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); + } + + return rebuild; +} + +/** + * @brief Un-skips all the gravity tasks associated with a given cell and checks + * if the space needs to be rebuilt. + * + * @param c the #cell. + * @param s the #scheduler. + * + * @return 1 If the space needs rebuilding. 0 otherwise. + */ +int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s) { + + struct engine *e = s->space->e; + const int nodeID = e->nodeID; + int rebuild = 0; + /* Un-skip the gravity tasks involved with this cell. */ for (struct link *l = c->grav; l != NULL; l = l->next) { struct task *t = l->t; struct cell *ci = t->ci; struct cell *cj = t->cj; + const int ci_active = cell_is_active_gravity(ci, e); + const int cj_active = (cj != NULL) ? cell_is_active_gravity(cj, e) : 0; /* Only activate tasks that involve a local active cell. */ - if ((cell_is_active(ci, e) && ci->nodeID == nodeID) || - (cj != NULL && cell_is_active(cj, e) && cj->nodeID == nodeID)) { + if ((ci_active && ci->nodeID == nodeID) || + (cj_active && cj->nodeID == nodeID)) { scheduler_activate(s, t); /* Set the drifting flags */ @@ -2017,30 +2140,74 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { cell_activate_subcell_grav_tasks(t->ci, t->cj, s); } } + + if (t->type == task_type_pair) { + +#ifdef WITH_MPI + /* Activate the send/recv tasks. */ + if (ci->nodeID != nodeID) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (cj_active) { + scheduler_activate(s, ci->recv_grav); + } + + /* If the foreign cell is active, we want its ti_end values. */ + if (ci_active) scheduler_activate(s, ci->recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (ci_active) { + + scheduler_activate_send(s, cj->send_grav, ci->nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_gpart(cj, s); + } + + /* If the local cell is active, send its ti_end values. */ + if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID); + + } else if (cj->nodeID != nodeID) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (ci_active) { + scheduler_activate(s, cj->recv_grav); + } + + /* If the foreign cell is active, we want its ti_end values. */ + if (cj_active) scheduler_activate(s, cj->recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (cj_active) { + + scheduler_activate_send(s, ci->send_grav, cj->nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_gpart(ci, s); + } + + /* If the local cell is active, send its ti_end values. */ + if (ci_active) scheduler_activate_send(s, ci->send_ti, cj->nodeID); + } +#endif + } } /* Unskip all the other task types. */ - if (c->nodeID == nodeID && cell_is_active(c, e)) { - - for (struct link *l = c->gradient; l != NULL; l = l->next) - scheduler_activate(s, l->t); - for (struct link *l = c->force; l != NULL; l = l->next) - scheduler_activate(s, l->t); + if (c->nodeID == nodeID && cell_is_active_gravity(c, e)) { - if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); - if (c->ghost_in != NULL) scheduler_activate(s, c->ghost_in); - if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out); - if (c->ghost != NULL) scheduler_activate(s, c->ghost); if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); + if (c->grav_ghost_in != NULL) scheduler_activate(s, c->grav_ghost_in); + if (c->grav_ghost_out != NULL) scheduler_activate(s, c->grav_ghost_out); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); if (c->kick2 != NULL) scheduler_activate(s, c->kick2); if (c->timestep != NULL) scheduler_activate(s, c->timestep); - if (c->grav_ghost[0] != NULL) scheduler_activate(s, c->grav_ghost[0]); - if (c->grav_ghost[1] != NULL) scheduler_activate(s, c->grav_ghost[1]); if (c->grav_down != NULL) scheduler_activate(s, c->grav_down); if (c->grav_long_range != NULL) scheduler_activate(s, c->grav_long_range); - if (c->cooling != NULL) scheduler_activate(s, c->cooling); - if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); } return rebuild; @@ -2066,6 +2233,50 @@ void cell_set_super(struct cell *c, struct cell *super) { if (c->progeny[k] != NULL) cell_set_super(c->progeny[k], super); } +/** + * @brief Set the super-cell pointers for all cells in a hierarchy. + * + * @param c The top-level #cell to play with. + * @param super_hydro Pointer to the deepest cell with tasks in this part of the + * tree. + */ +void cell_set_super_hydro(struct cell *c, struct cell *super_hydro) { + + /* Are we in a cell with some kind of self/pair task ? */ + if (super_hydro == NULL && c->density != NULL) super_hydro = c; + + /* Set the super-cell */ + c->super_hydro = super_hydro; + + /* Recurse */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + cell_set_super_hydro(c->progeny[k], super_hydro); +} + +/** + * @brief Set the super-cell pointers for all cells in a hierarchy. + * + * @param c The top-level #cell to play with. + * @param super_gravity Pointer to the deepest cell with tasks in this part of + * the tree. + */ +void cell_set_super_gravity(struct cell *c, struct cell *super_gravity) { + + /* Are we in a cell with some kind of self/pair task ? */ + if (super_gravity == NULL && c->grav != NULL) super_gravity = c; + + /* Set the super-cell */ + c->super_gravity = super_gravity; + + /* Recurse */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + cell_set_super_gravity(c->progeny[k], super_gravity); +} + /** * @brief Mapper function to set the super pointer of the cells. * @@ -2074,8 +2285,15 @@ void cell_set_super(struct cell *c, struct cell *super) { * @param extra_data Unused parameter. */ void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data) { + + const struct engine *e = (const struct engine *)extra_data; + for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &((struct cell *)map_data)[ind]; + if (e->policy & engine_policy_hydro) cell_set_super_hydro(c, NULL); + if (e->policy & + (engine_policy_self_gravity | engine_policy_external_gravity)) + cell_set_super_gravity(c, NULL); cell_set_super(c, NULL); } } @@ -2388,7 +2606,7 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) { void cell_check_timesteps(struct cell *c) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_end_min == 0 && c->nr_tasks > 0) + if (c->ti_hydro_end_min == 0 && c->ti_gravity_end_min == 0 && c->nr_tasks > 0) error("Cell without assigned time-step"); if (c->split) { diff --git a/src/cell.h b/src/cell.h index 8b62e4362011617f52c7d8efcde409200098adcd..a9ba34b0935dcd13f9bc06b17a7c6d01e130e0f6 100644 --- a/src/cell.h +++ b/src/cell.h @@ -80,14 +80,23 @@ struct pcell { /*! Maximal smoothing length. */ double h_max; - /*! Minimal integer end-of-timestep in this cell */ - integertime_t ti_end_min; + /*! Minimal integer end-of-timestep in this cell for hydro tasks */ + integertime_t ti_hydro_end_min; - /*! Maximal integer end-of-timestep in this cell */ - integertime_t ti_end_max; + /*! Maximal integer end-of-timestep in this cell for hydro tasks */ + integertime_t ti_hydro_end_max; - /*! Maximal integer beginning-of-timestep in this cell */ - integertime_t ti_beg_max; + /*! Maximal integer beginning-of-timestep in this cell for hydro tasks */ + integertime_t ti_hydro_beg_max; + + /*! Minimal integer end-of-timestep in this cell for gravity tasks */ + integertime_t ti_gravity_end_min; + + /*! Maximal integer end-of-timestep in this cell for gravity tasks */ + integertime_t ti_gravity_end_max; + + /*! Maximal integer beginning-of-timestep in this cell for gravity tasks */ + integertime_t ti_gravity_beg_max; /*! Integer time of the last drift of the #part in this cell */ integertime_t ti_old_part; @@ -95,6 +104,9 @@ struct pcell { /*! Integer time of the last drift of the #gpart in this cell */ integertime_t ti_old_gpart; + /*! Integer time of the last drift of the #multipole in this cell */ + integertime_t ti_old_multipole; + /*! Number of #part in this cell. */ int count; @@ -110,6 +122,11 @@ struct pcell { /*! Relative indices of the cell's progeny. */ int progeny[8]; +#ifdef SWIFT_DEBUG_CHECKS + /* Cell ID (for debugging) */ + int cellID; +#endif + } SWIFT_STRUCT_ALIGN; /** @@ -117,8 +134,17 @@ struct pcell { */ struct pcell_step { - /*! Minimal integer end-of-timestep in this cell */ - integertime_t ti_end_min; + /*! Minimal integer end-of-timestep in this cell (hydro) */ + integertime_t ti_hydro_end_min; + + /*! Minimal integer end-of-timestep in this cell (hydro) */ + integertime_t ti_hydro_end_max; + + /*! Minimal integer end-of-timestep in this cell (gravity) */ + integertime_t ti_gravity_end_min; + + /*! Minimal integer end-of-timestep in this cell (gravity) */ + integertime_t ti_gravity_end_max; /*! Maximal distance any #part has travelled since last rebuild */ float dx_max_part; @@ -170,11 +196,16 @@ struct cell { /*! Parent cell. */ struct cell *parent; - /*! Super cell, i.e. the highest-level parent cell that has pair/self tasks */ + /*! Super cell, i.e. the highest-level parent cell with *any* task */ struct cell *super; - /*! The task computing this cell's sorts. */ - struct task *sorts; + /*! Super cell, i.e. the highest-level parent cell that has a hydro pair/self + * tasks */ + struct cell *super_hydro; + + /*! Super cell, i.e. the highest-level parent cell that has a grav pair/self + * tasks */ + struct cell *super_gravity; /*! Linked list of the tasks computing this cell's hydro density. */ struct link *density; @@ -188,6 +219,9 @@ struct cell { /*! Linked list of the tasks computing this cell's gravity forces. */ struct link *grav; + /*! The task computing this cell's sorts. */ + struct task *sorts; + /*! The multipole initialistation task */ struct task *init_grav; @@ -219,7 +253,7 @@ struct cell { struct task *timestep; /*! Task linking the FFT mesh to the rest of gravity tasks */ - struct task *grav_ghost[2]; + struct task *grav_ghost_in, *grav_ghost_out; /*! Task computing long range non-periodic gravity interactions */ struct task *grav_long_range; @@ -235,27 +269,33 @@ struct cell { #ifdef WITH_MPI - /* Task receiving data (positions). */ + /* Task receiving hydro data (positions). */ struct task *recv_xv; - /* Task receiving data (density). */ + /* Task receiving hydro data (density). */ struct task *recv_rho; - /* Task receiving data (gradient). */ + /* Task receiving hydro data (gradient). */ struct task *recv_gradient; + /* Task receiving gpart data. */ + struct task *recv_grav; + /* Task receiving data (time-step). */ struct task *recv_ti; - /* Linked list for sending data (positions). */ + /* Linked list for sending hydro data (positions). */ struct link *send_xv; - /* Linked list for sending data (density). */ + /* Linked list for sending hydro data (density). */ struct link *send_rho; - /* Linked list for sending data (gradient). */ + /* Linked list for sending hydro data (gradient). */ struct link *send_gradient; + /* Linked list for sending gpart data. */ + struct link *send_grav; + /* Linked list for sending data (time-step). */ struct link *send_ti; @@ -273,14 +313,24 @@ struct cell { #endif - /*! Minimum end of (integer) time step in this cell. */ - integertime_t ti_end_min; + /*! Minimum end of (integer) time step in this cell for hydro tasks. */ + integertime_t ti_hydro_end_min; + + /*! Maximum end of (integer) time step in this cell for hydro tasks. */ + integertime_t ti_hydro_end_max; + + /*! Maximum beginning of (integer) time step in this cell for hydro tasks. */ + integertime_t ti_hydro_beg_max; - /*! Maximum end of (integer) time step in this cell. */ - integertime_t ti_end_max; + /*! Minimum end of (integer) time step in this cell for gravity tasks. */ + integertime_t ti_gravity_end_min; - /*! Maximum beginning of (integer) time step in this cell. */ - integertime_t ti_beg_max; + /*! Maximum end of (integer) time step in this cell for gravity tasks. */ + integertime_t ti_gravity_end_max; + + /*! Maximum beginning of (integer) time step in this cell for gravity tasks. + */ + integertime_t ti_gravity_beg_max; /*! Last (integer) time the cell's part were drifted forward in time. */ integertime_t ti_old_part; @@ -397,6 +447,9 @@ struct cell { char do_sub_sort; #ifdef SWIFT_DEBUG_CHECKS + /* Cell ID (for debugging) */ + int cellID; + /*! Last (integer) time the cell's sort arrays were updated. */ integertime_t ti_sort; @@ -430,6 +483,8 @@ int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); int cell_pack_end_step(struct cell *c, struct pcell_step *pcell); int cell_unpack_end_step(struct cell *c, struct pcell_step *pcell); +int cell_pack_multipoles(struct cell *c, struct gravity_tensors *m); +int cell_unpack_multipoles(struct cell *c, struct gravity_tensors *m); int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); @@ -443,7 +498,8 @@ void cell_check_part_drift_point(struct cell *c, void *data); void cell_check_gpart_drift_point(struct cell *c, void *data); void cell_check_multipole_drift_point(struct cell *c, void *data); void cell_reset_task_counters(struct cell *c); -int cell_unskip_tasks(struct cell *c, struct scheduler *s); +int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s); +int cell_unskip_gravity_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); void cell_drift_part(struct cell *c, const struct engine *e, int force); void cell_drift_gpart(struct cell *c, const struct engine *e, int force); @@ -451,8 +507,8 @@ void cell_drift_multipole(struct cell *c, const struct engine *e); void cell_drift_all_multipoles(struct cell *c, const struct engine *e); void cell_check_timesteps(struct cell *c); void cell_store_pre_drift_values(struct cell *c); -void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, - struct scheduler *s); +void cell_activate_subcell_hydro_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s); void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, struct scheduler *s); void cell_activate_subcell_external_grav_tasks(struct cell *ci, @@ -492,12 +548,8 @@ __attribute__((always_inline)) INLINE static int cell_can_recurse_in_pair_task( __attribute__((always_inline)) INLINE static int cell_can_recurse_in_self_task( const struct cell *c) { - /* Is the cell split ? */ - /* Note: No need for more checks here as all the sub-pairs and sub-self */ - /* operations will be executed. So no need for the particle to be at exactly - */ - /* the right place. */ - return c->split; + /* Is the cell split and not smaller than the smoothing length? */ + return c->split && (kernel_gamma * c->h_max_old < 0.5f * c->dmin); } /** diff --git a/src/collectgroup.c b/src/collectgroup.c index b7e5486b59a2ec5e47b7b864071a2bb1e5ce1850..b704a0a5ea33cc1c5332fc0575061ad8e38f4d21 100644 --- a/src/collectgroup.c +++ b/src/collectgroup.c @@ -37,7 +37,8 @@ /* Local collections for MPI reduces. */ struct mpicollectgroup1 { size_t updates, g_updates, s_updates; - integertime_t ti_end_min; + integertime_t ti_hydro_end_min; + integertime_t ti_gravity_end_min; int forcerebuild; }; @@ -75,9 +76,15 @@ void collectgroup_init() { * @param e The #engine */ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { - e->ti_end_min = grp1->ti_end_min; - e->ti_end_max = grp1->ti_end_max; - e->ti_beg_max = grp1->ti_beg_max; + e->ti_hydro_end_min = grp1->ti_hydro_end_min; + e->ti_hydro_end_max = grp1->ti_hydro_end_max; + e->ti_hydro_beg_max = grp1->ti_hydro_beg_max; + e->ti_gravity_end_min = grp1->ti_gravity_end_min; + e->ti_gravity_end_max = grp1->ti_gravity_end_max; + e->ti_gravity_beg_max = grp1->ti_gravity_beg_max; + e->ti_end_min = min(e->ti_hydro_end_min, e->ti_gravity_end_min); + e->ti_end_max = max(e->ti_hydro_end_max, e->ti_gravity_end_max); + e->ti_beg_max = max(e->ti_hydro_beg_max, e->ti_gravity_beg_max); e->updates = grp1->updates; e->g_updates = grp1->g_updates; e->s_updates = grp1->s_updates; @@ -92,21 +99,37 @@ void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { * @param g_updates the number of updated gravity particles on this node this * step. * @param s_updates the number of updated star particles on this node this step. - * @param ti_end_min the minimum end time for next time step after this step. - * @param ti_end_max the maximum end time for next time step after this step. - * @param ti_beg_max the maximum begin time for next time step after this step. + * @param ti_hydro_end_min the minimum end time for next hydro time step after + * this step. + * @param ti_hydro_end_max the maximum end time for next hydro time step after + * this step. + * @param ti_hydro_beg_max the maximum begin time for next hydro time step after + * this step. + * @param ti_gravity_end_min the minimum end time for next gravity time step + * after this step. + * @param ti_gravity_end_max the maximum end time for next gravity time step + * after this step. + * @param ti_gravity_beg_max the maximum begin time for next gravity time step + * after this step. * @param forcerebuild whether a rebuild is required after this step. */ void collectgroup1_init(struct collectgroup1 *grp1, size_t updates, size_t g_updates, size_t s_updates, - integertime_t ti_end_min, integertime_t ti_end_max, - integertime_t ti_beg_max, int forcerebuild) { + integertime_t ti_hydro_end_min, + integertime_t ti_hydro_end_max, + integertime_t ti_hydro_beg_max, + integertime_t ti_gravity_end_min, + integertime_t ti_gravity_end_max, + integertime_t ti_gravity_beg_max, int forcerebuild) { grp1->updates = updates; grp1->g_updates = g_updates; grp1->s_updates = s_updates; - grp1->ti_end_min = ti_end_min; - grp1->ti_end_max = ti_end_max; - grp1->ti_beg_max = ti_beg_max; + grp1->ti_hydro_end_min = ti_hydro_end_min; + grp1->ti_hydro_end_max = ti_hydro_end_max; + grp1->ti_hydro_beg_max = ti_hydro_beg_max; + grp1->ti_gravity_end_min = ti_gravity_end_min; + grp1->ti_gravity_end_max = ti_gravity_end_max; + grp1->ti_gravity_beg_max = ti_gravity_beg_max; grp1->forcerebuild = forcerebuild; } @@ -127,7 +150,8 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { mpigrp11.updates = grp1->updates; mpigrp11.g_updates = grp1->g_updates; mpigrp11.s_updates = grp1->s_updates; - mpigrp11.ti_end_min = grp1->ti_end_min; + mpigrp11.ti_hydro_end_min = grp1->ti_hydro_end_min; + mpigrp11.ti_gravity_end_min = grp1->ti_gravity_end_min; mpigrp11.forcerebuild = grp1->forcerebuild; struct mpicollectgroup1 mpigrp12; @@ -139,7 +163,8 @@ void collectgroup1_reduce(struct collectgroup1 *grp1) { grp1->updates = mpigrp12.updates; grp1->g_updates = mpigrp12.g_updates; grp1->s_updates = mpigrp12.s_updates; - grp1->ti_end_min = mpigrp12.ti_end_min; + grp1->ti_hydro_end_min = mpigrp12.ti_hydro_end_min; + grp1->ti_gravity_end_min = mpigrp12.ti_gravity_end_min; grp1->forcerebuild = mpigrp12.forcerebuild; #endif @@ -162,7 +187,10 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, mpigrp11->s_updates += mpigrp12->s_updates; /* Minimum end time. */ - mpigrp11->ti_end_min = min(mpigrp11->ti_end_min, mpigrp12->ti_end_min); + mpigrp11->ti_hydro_end_min = + min(mpigrp11->ti_hydro_end_min, mpigrp12->ti_hydro_end_min); + mpigrp11->ti_gravity_end_min = + min(mpigrp11->ti_gravity_end_min, mpigrp12->ti_gravity_end_min); /* Everyone must agree to not rebuild. */ if (mpigrp11->forcerebuild || mpigrp12->forcerebuild) diff --git a/src/collectgroup.h b/src/collectgroup.h index b0cbf0763d94dec65ea1aae2c521c1c4b9f7bf83..f2014ed254fdde7ea293224751061d824782b4a7 100644 --- a/src/collectgroup.h +++ b/src/collectgroup.h @@ -38,7 +38,8 @@ struct collectgroup1 { size_t updates, g_updates, s_updates; /* Times for the time-step */ - integertime_t ti_end_min, ti_end_max, ti_beg_max; + integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; + integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; /* Force the engine to rebuild? */ int forcerebuild; @@ -48,8 +49,12 @@ void collectgroup_init(); void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e); void collectgroup1_init(struct collectgroup1 *grp1, size_t updates, size_t g_updates, size_t s_updates, - integertime_t ti_end_min, integertime_t ti_end_max, - integertime_t ti_beg_max, int forcerebuild); + integertime_t ti_hydro_end_min, + integertime_t ti_hydro_end_max, + integertime_t ti_hydro_beg_max, + integertime_t ti_gravity_end_min, + integertime_t ti_gravity_end_max, + integertime_t ti_gravity_beg_max, int forcerebuild); void collectgroup1_reduce(struct collectgroup1 *grp1); #endif /* SWIFT_COLLECTGROUP_H */ diff --git a/src/debug.c b/src/debug.c index 402234a27ef50bbc38c06f61dea96e48ff02c59a..8675355ca3a97eaa29d79a7bc0e064bb577168d3 100644 --- a/src/debug.c +++ b/src/debug.c @@ -317,7 +317,7 @@ static void dumpCells_map(struct cell *c, void *data) { /* Active cells, otherwise all. */ if (active) - active = cell_is_active(c, e); + active = cell_is_active_hydro(c, e); else active = 1; @@ -344,9 +344,9 @@ static void dumpCells_map(struct cell *c, void *data) { "%6.1f %20lld %6d %6d %6d %6d %6d\n", c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1], c->width[2], e->step, c->count, c->gcount, c->scount, pactcount, - c->depth, ntasks, c->ti_end_min, get_time_bin(c->ti_end_min), - (c->super == c), cell_is_active(c, e), c->nodeID, - c->nodeID == e->nodeID); + c->depth, ntasks, c->ti_hydro_end_min, + get_time_bin(c->ti_hydro_end_min), (c->super == c), + cell_is_active_hydro(c, e), c->nodeID, c->nodeID == e->nodeID); } } } diff --git a/src/engine.c b/src/engine.c index cd30b7ab1b738d983ac2dfe11aebf89dc5218a18..f21c7cfc8af721b9f8d895cffecb0c3605d34861 100644 --- a/src/engine.c +++ b/src/engine.c @@ -104,7 +104,8 @@ int engine_rank; struct end_of_step_data { int updates, g_updates, s_updates; - integertime_t ti_end_min, ti_end_max, ti_beg_max; + integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max; + integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max; struct engine *e; }; @@ -136,13 +137,18 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { */ void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in, struct task *ghost_out) { + + /* If we have reached the leaf OR have to few particles to play with*/ if (!c->split || c->count < engine_max_parts_per_ghost) { + + /* Add the ghost task and its dependencies */ struct scheduler *s = &e->sched; c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, c, NULL); scheduler_addunlock(s, ghost_in, c->ghost); scheduler_addunlock(s, c->ghost, ghost_out); } else { + /* Keep recursing */ for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) engine_add_ghosts(e, c->progeny[k], ghost_in, ghost_out); @@ -151,47 +157,26 @@ void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in, /** * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - - * i.e. all the O(Npart) tasks. + * i.e. all the O(Npart) tasks -- timestep version * * Tasks are only created here. The dependencies will be added later on. * - * Note that there is no need to recurse below the super-cell. + * Note that there is no need to recurse below the super-cell. Note also + * that we only add tasks if the relevant particles are present in the cell. * * @param e The #engine. * @param c The #cell. */ -void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { +void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) { struct scheduler *s = &e->sched; - const int periodic = e->s->periodic; - const int is_with_hydro = (e->policy & engine_policy_hydro); - const int is_self_gravity = (e->policy & engine_policy_self_gravity); - const int is_external_gravity = (e->policy & engine_policy_external_gravity); - const int is_with_cooling = (e->policy & engine_policy_cooling); - const int is_with_sourceterms = (e->policy & engine_policy_sourceterms); /* Are we in a super-cell ? */ if (c->super == c) { - /* Add the sort task. */ - if (is_with_hydro) { - c->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0, - c, NULL); - } - /* Local tasks only... */ if (c->nodeID == e->nodeID) { - /* Add the drift tasks corresponding to the policy. */ - if (is_with_hydro) { - c->drift_part = scheduler_addtask(s, task_type_drift_part, - task_subtype_none, 0, 0, c, NULL); - } - if (is_self_gravity || is_external_gravity) { - c->drift_gpart = scheduler_addtask(s, task_type_drift_gpart, - task_subtype_none, 0, 0, c, NULL); - } - /* Add the two half kicks */ c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0, c, NULL); @@ -205,53 +190,71 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->kick2, c->timestep); scheduler_addunlock(s, c->timestep, c->kick1); + } - /* Add the self-gravity tasks */ - if (is_self_gravity) { + } else { /* We are above the super-cell so need to go deeper */ - /* Initialisation of the multipoles */ - c->init_grav = scheduler_addtask(s, task_type_init_grav, - task_subtype_none, 0, 0, c, NULL); + /* Recurse. */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_hierarchical_tasks_common(e, c->progeny[k]); + } +} - /* Gravity non-neighbouring pm calculations */ - c->grav_long_range = scheduler_addtask( - s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL); +/** + * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks -- hydro version + * + * Tasks are only created here. The dependencies will be added later on. + * + * Note that there is no need to recurse below the super-cell. Note also + * that we only add tasks if the relevant particles are present in the cell. + * + * @param e The #engine. + * @param c The #cell. + */ +void engine_make_hierarchical_tasks_hydro(struct engine *e, struct cell *c) { - /* Gravity recursive down-pass */ - c->grav_down = scheduler_addtask(s, task_type_grav_down, - task_subtype_none, 0, 0, c, NULL); + struct scheduler *s = &e->sched; + const int is_with_cooling = (e->policy & engine_policy_cooling); + const int is_with_sourceterms = (e->policy & engine_policy_sourceterms); - if (periodic) scheduler_addunlock(s, c->init_grav, c->grav_ghost[0]); - scheduler_addunlock(s, c->init_grav, c->grav_long_range); - scheduler_addunlock(s, c->grav_long_range, c->grav_down); - scheduler_addunlock(s, c->grav_down, c->kick2); - } + /* Are we in a super-cell ? */ + if (c->super_hydro == c) { - /* Add the hydrodynamics tasks */ - if (is_with_hydro) { + /* Add the sort task. */ + c->sorts = + scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0, c, NULL); - /* Generate the ghost tasks. */ - c->ghost_in = - scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, - /* implicit = */ 1, c, NULL); - c->ghost_out = - scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, - /* implicit = */ 1, c, NULL); - engine_add_ghosts(e, c, c->ghost_in, c->ghost_out); + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { + + /* Add the drift task. */ + c->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, c, NULL); + + /* Generate the ghost tasks. */ + c->ghost_in = + scheduler_addtask(s, task_type_ghost_in, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + c->ghost_out = + scheduler_addtask(s, task_type_ghost_out, task_subtype_none, 0, + /* implicit = */ 1, c, NULL); + engine_add_ghosts(e, c, c->ghost_in, c->ghost_out); #ifdef EXTRA_HYDRO_LOOP - /* Generate the extra ghost task. */ - c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost, - task_subtype_none, 0, 0, c, NULL); + /* Generate the extra ghost task. */ + c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost, + task_subtype_none, 0, 0, c, NULL); #endif - } /* Cooling task */ if (is_with_cooling) { c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none, 0, 0, c, NULL); - scheduler_addunlock(s, c->cooling, c->kick2); + scheduler_addunlock(s, c->cooling, c->super->kick2); } /* add source terms */ @@ -263,25 +266,90 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { } else { /* We are above the super-cell so need to go deeper */ -#ifdef SWIFT_DEBUG_CHECKS - if (c->super != NULL) error("Incorrectly set super pointer"); -#endif + /* Recurse. */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_make_hierarchical_tasks_hydro(e, c->progeny[k]); + } +} + +/** + * @brief Generate the hydro hierarchical tasks for a hierarchy of cells - + * i.e. all the O(Npart) tasks -- gravity version + * + * Tasks are only created here. The dependencies will be added later on. + * + * Note that there is no need to recurse below the super-cell. Note also + * that we only add tasks if the relevant particles are present in the cell. + * + * @param e The #engine. + * @param c The #cell. + */ +void engine_make_hierarchical_tasks_gravity(struct engine *e, struct cell *c) { + + struct scheduler *s = &e->sched; + const int periodic = e->s->periodic; + const int is_self_gravity = (e->policy & engine_policy_self_gravity); + + /* Are we in a super-cell ? */ + if (c->super_gravity == c) { + + /* Local tasks only... */ + if (c->nodeID == e->nodeID) { + + c->drift_gpart = scheduler_addtask(s, task_type_drift_gpart, + task_subtype_none, 0, 0, c, NULL); + + if (is_self_gravity) { + + /* Initialisation of the multipoles */ + c->init_grav = scheduler_addtask(s, task_type_init_grav, + task_subtype_none, 0, 0, c, NULL); + + /* Gravity non-neighbouring pm calculations */ + c->grav_long_range = scheduler_addtask( + s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL); + + /* Gravity recursive down-pass */ + c->grav_down = scheduler_addtask(s, task_type_grav_down, + task_subtype_none, 0, 0, c, NULL); + + if (periodic) scheduler_addunlock(s, c->init_grav, c->grav_ghost_in); + if (periodic) scheduler_addunlock(s, c->grav_ghost_out, c->grav_down); + scheduler_addunlock(s, c->init_grav, c->grav_long_range); + scheduler_addunlock(s, c->grav_long_range, c->grav_down); + scheduler_addunlock(s, c->grav_down, c->super->kick2); + } + } + + } else { /* We are above the super-cell so need to go deeper */ /* Recurse. */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_make_hierarchical_tasks(e, c->progeny[k]); + engine_make_hierarchical_tasks_gravity(e, c->progeny[k]); } } void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements, void *extra_data) { struct engine *e = (struct engine *)extra_data; + const int is_with_hydro = (e->policy & engine_policy_hydro); + const int is_with_self_gravity = (e->policy & engine_policy_self_gravity); + const int is_with_external_gravity = + (e->policy & engine_policy_external_gravity); for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &((struct cell *)map_data)[ind]; - engine_make_hierarchical_tasks(e, c); + /* Make the common tasks (time integration) */ + engine_make_hierarchical_tasks_common(e, c); + /* Add the hydro stuff */ + if (is_with_hydro) engine_make_hierarchical_tasks_hydro(e, c); + /* And the gravity stuff */ + if (is_with_self_gravity || is_with_external_gravity) + engine_make_hierarchical_tasks_gravity(e, c); } } @@ -1044,29 +1112,7 @@ void engine_repartition_trigger(struct engine *e) { } /** - * @brief Add up/down gravity tasks to a cell hierarchy. - * - * @param e The #engine. - * @param c The #cell - * @param up The upward gravity #task. - * @param down The downward gravity #task. - */ -void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up, - struct task *down) { - - /* Link the tasks to this cell. */ - // c->grav_up = up; - c->grav_down = down; - - /* Recurse? */ - if (c->split) - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - engine_addtasks_grav(e, c->progeny[k], up, down); -} - -/** - * @brief Add send tasks to a hierarchy of cells. + * @brief Add send tasks for the hydro pairs to a hierarchy of cells. * * @param e The #engine. * @param ci The sending #cell. @@ -1074,11 +1120,10 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up, * @param t_xv The send_xv #task, if it has already been created. * @param t_rho The send_rho #task, if it has already been created. * @param t_gradient The send_gradient #task, if already created. - * @param t_ti The send_ti #task, if required and has already been created. */ -void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, - struct task *t_xv, struct task *t_rho, - struct task *t_gradient, struct task *t_ti) { +void engine_addtasks_send_hydro(struct engine *e, struct cell *ci, + struct cell *cj, struct task *t_xv, + struct task *t_rho, struct task *t_gradient) { #ifdef WITH_MPI struct link *l = NULL; @@ -1097,49 +1142,45 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, /* Create the tasks and their dependencies? */ if (t_xv == NULL) { - t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, 4 * ci->tag, - 0, ci, cj); + t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, + 6 * ci->tag + 0, 0, ci, cj); t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho, - 4 * ci->tag + 1, 0, ci, cj); - t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, - 4 * ci->tag + 2, 0, ci, cj); + 6 * ci->tag + 1, 0, ci, cj); #ifdef EXTRA_HYDRO_LOOP t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient, - 4 * ci->tag + 3, 0, ci, cj); + 6 * ci->tag + 3, 0, ci, cj); #endif #ifdef EXTRA_HYDRO_LOOP scheduler_addunlock(s, t_gradient, ci->super->kick2); - scheduler_addunlock(s, ci->super->extra_ghost, t_gradient); + scheduler_addunlock(s, ci->super_hydro->extra_ghost, t_gradient); - /* The send_rho task should unlock the super-cell's extra_ghost task. */ - scheduler_addunlock(s, t_rho, ci->super->extra_ghost); + /* The send_rho task should unlock the super_hydro-cell's extra_ghost + * task. */ + scheduler_addunlock(s, t_rho, ci->super_hydro->extra_ghost); /* The send_rho task depends on the cell's ghost task. */ - scheduler_addunlock(s, ci->super->ghost_out, t_rho); + scheduler_addunlock(s, ci->super_hydro->ghost_out, t_rho); - /* The send_xv task should unlock the super-cell's ghost task. */ - scheduler_addunlock(s, t_xv, ci->super->ghost_in); + /* The send_xv task should unlock the super_hydro-cell's ghost task. */ + scheduler_addunlock(s, t_xv, ci->super_hydro->ghost_in); #else - /* The send_rho task should unlock the super-cell's kick task. */ + /* The send_rho task should unlock the super_hydro-cell's kick task. */ scheduler_addunlock(s, t_rho, ci->super->kick2); /* The send_rho task depends on the cell's ghost task. */ - scheduler_addunlock(s, ci->super->ghost_out, t_rho); + scheduler_addunlock(s, ci->super_hydro->ghost_out, t_rho); - /* The send_xv task should unlock the super-cell's ghost task. */ - scheduler_addunlock(s, t_xv, ci->super->ghost_in); + /* The send_xv task should unlock the super_hydro-cell's ghost task. */ + scheduler_addunlock(s, t_xv, ci->super_hydro->ghost_in); #endif /* Drift before you send */ - scheduler_addunlock(s, ci->super->drift_part, t_xv); - - /* The super-cell's timestep task should unlock the send_ti task. */ - scheduler_addunlock(s, ci->super->timestep, t_ti); + scheduler_addunlock(s, ci->super_hydro->drift_part, t_xv); } /* Add them to the local cell. */ @@ -1148,6 +1189,116 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, #ifdef EXTRA_HYDRO_LOOP engine_addlink(e, &ci->send_gradient, t_gradient); #endif + } + + /* Recurse? */ + if (ci->split) + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) + engine_addtasks_send_hydro(e, ci->progeny[k], cj, t_xv, t_rho, + t_gradient); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add send tasks for the gravity pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param ci The sending #cell. + * @param cj Dummy cell containing the nodeID of the receiving node. + * @param t_grav The send_grav #task, if it has already been created. + */ +void engine_addtasks_send_gravity(struct engine *e, struct cell *ci, + struct cell *cj, struct task *t_grav) { + +#ifdef WITH_MPI + struct link *l = NULL; + struct scheduler *s = &e->sched; + const int nodeID = cj->nodeID; + + /* Check if any of the gravity tasks are for the target node. */ + for (l = ci->grav; l != NULL; l = l->next) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) + break; + + /* If so, attach send tasks. */ + if (l != NULL) { + + /* Create the tasks and their dependencies? */ + if (t_grav == NULL) { + + t_grav = scheduler_addtask(s, task_type_send, task_subtype_gpart, + 6 * ci->tag + 4, 0, ci, cj); + + /* The sends should unlock the down pass. */ + scheduler_addunlock(s, t_grav, ci->super_gravity->grav_down); + + /* Drift before you send */ + scheduler_addunlock(s, ci->super_gravity->drift_gpart, t_grav); + } + + /* Add them to the local cell. */ + engine_addlink(e, &ci->send_grav, t_grav); + } + + /* Recurse? */ + if (ci->split) + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) + engine_addtasks_send_gravity(e, ci->progeny[k], cj, t_grav); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add send tasks for the time-step to a hierarchy of cells. + * + * @param e The #engine. + * @param ci The sending #cell. + * @param cj Dummy cell containing the nodeID of the receiving node. + * @param t_ti The send_ti #task, if it has already been created. + */ +void engine_addtasks_send_timestep(struct engine *e, struct cell *ci, + struct cell *cj, struct task *t_ti) { + +#ifdef WITH_MPI + struct link *l = NULL; + struct scheduler *s = &e->sched; + const int nodeID = cj->nodeID; + + /* Check if any of the gravity tasks are for the target node. */ + for (l = ci->grav; l != NULL; l = l->next) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) + break; + + /* Check whether instead any of the hydro tasks are for the target node. */ + if (l == NULL) + for (l = ci->density; l != NULL; l = l->next) + if (l->t->ci->nodeID == nodeID || + (l->t->cj != NULL && l->t->cj->nodeID == nodeID)) + break; + + /* If found anything, attach send tasks. */ + if (l != NULL) { + + /* Create the tasks and their dependencies? */ + if (t_ti == NULL) { + + t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, + 6 * ci->tag + 2, 0, ci, cj); + + /* The super-cell's timestep task should unlock the send_ti task. */ + scheduler_addunlock(s, ci->super->timestep, t_ti); + } + + /* Add them to the local cell. */ engine_addlink(e, &ci->send_ti, t_ti); } @@ -1155,8 +1306,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, if (ci->split) for (int k = 0; k < 8; k++) if (ci->progeny[k] != NULL) - engine_addtasks_send(e, ci->progeny[k], cj, t_xv, t_rho, t_gradient, - t_ti); + engine_addtasks_send_timestep(e, ci->progeny[k], cj, t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -1164,76 +1314,139 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, } /** - * @brief Add recv tasks to a hierarchy of cells. + * @brief Add recv tasks for hydro pairs to a hierarchy of cells. * * @param e The #engine. * @param c The foreign #cell. * @param t_xv The recv_xv #task, if it has already been created. * @param t_rho The recv_rho #task, if it has already been created. * @param t_gradient The recv_gradient #task, if it has already been created. - * @param t_ti The recv_ti #task, if required and has already been created. */ -void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, - struct task *t_rho, struct task *t_gradient, - struct task *t_ti) { +void engine_addtasks_recv_hydro(struct engine *e, struct cell *c, + struct task *t_xv, struct task *t_rho, + struct task *t_gradient) { #ifdef WITH_MPI struct scheduler *s = &e->sched; - /* Do we need to construct a recv task? - Note that since c is a foreign cell, all its density tasks will involve - only the current rank, and thus we don't have to check them.*/ + /* Have we reached a level where there are any hydro tasks ? */ if (t_xv == NULL && c->density != NULL) { /* Create the tasks. */ - t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 4 * c->tag, 0, - c, NULL); + t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 6 * c->tag + 0, + 0, c, NULL); t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho, - 4 * c->tag + 1, 0, c, NULL); - t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, - 4 * c->tag + 2, 0, c, NULL); + 6 * c->tag + 1, 0, c, NULL); #ifdef EXTRA_HYDRO_LOOP t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient, - 4 * c->tag + 3, 0, c, NULL); + 6 * c->tag + 3, 0, c, NULL); #endif } + c->recv_xv = t_xv; c->recv_rho = t_rho; c->recv_gradient = t_gradient; - c->recv_ti = t_ti; -/* Add dependencies. */ -#ifdef EXTRA_HYDRO_LOOP + /* Add dependencies. */ + if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); + for (struct link *l = c->density; l != NULL; l = l->next) { scheduler_addunlock(s, t_xv, l->t); scheduler_addunlock(s, l->t, t_rho); } +#ifdef EXTRA_HYDRO_LOOP for (struct link *l = c->gradient; l != NULL; l = l->next) { scheduler_addunlock(s, t_rho, l->t); scheduler_addunlock(s, l->t, t_gradient); } - for (struct link *l = c->force; l != NULL; l = l->next) { + for (struct link *l = c->force; l != NULL; l = l->next) scheduler_addunlock(s, t_gradient, l->t); - scheduler_addunlock(s, l->t, t_ti); +#else + for (struct link *l = c->force; l != NULL; l = l->next) + scheduler_addunlock(s, t_rho, l->t); +#endif + + /* Recurse? */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_addtasks_recv_hydro(e, c->progeny[k], t_xv, t_rho, t_gradient); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add recv tasks for gravity pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param c The foreign #cell. + * @param t_grav The recv_gpart #task, if it has already been created. + */ +void engine_addtasks_recv_gravity(struct engine *e, struct cell *c, + struct task *t_grav) { + +#ifdef WITH_MPI + struct scheduler *s = &e->sched; + + /* Have we reached a level where there are any gravity tasks ? */ + if (t_grav == NULL && c->grav != NULL) { + + /* Create the tasks. */ + t_grav = scheduler_addtask(s, task_type_recv, task_subtype_gpart, + 6 * c->tag + 4, 0, c, NULL); } - if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); + + c->recv_grav = t_grav; + + for (struct link *l = c->grav; l != NULL; l = l->next) + scheduler_addunlock(s, t_grav, l->t); + + /* Recurse? */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) + engine_addtasks_recv_gravity(e, c->progeny[k], t_grav); + #else - for (struct link *l = c->density; l != NULL; l = l->next) { - scheduler_addunlock(s, t_xv, l->t); - scheduler_addunlock(s, l->t, t_rho); + error("SWIFT was not compiled with MPI support."); +#endif +} + +/** + * @brief Add recv tasks for gravity pairs to a hierarchy of cells. + * + * @param e The #engine. + * @param c The foreign #cell. + * @param t_ti The recv_ti #task, if already been created. + */ +void engine_addtasks_recv_timestep(struct engine *e, struct cell *c, + struct task *t_ti) { + +#ifdef WITH_MPI + struct scheduler *s = &e->sched; + + /* Have we reached a level where there are any self/pair tasks ? */ + if (t_ti == NULL && (c->grav != NULL || c->density != NULL)) { + + t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, + 6 * c->tag + 2, 0, c, NULL); } - for (struct link *l = c->force; l != NULL; l = l->next) { - scheduler_addunlock(s, t_rho, l->t); + + c->recv_ti = t_ti; + + for (struct link *l = c->grav; l != NULL; l = l->next) + scheduler_addunlock(s, l->t, t_ti); + + for (struct link *l = c->force; l != NULL; l = l->next) scheduler_addunlock(s, l->t, t_ti); - } - if (c->sorts != NULL) scheduler_addunlock(s, t_xv, c->sorts); -#endif /* Recurse? */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_addtasks_recv(e, c->progeny[k], t_xv, t_rho, t_gradient, t_ti); + engine_addtasks_recv_timestep(e, c->progeny[k], t_ti); #else error("SWIFT was not compiled with MPI support."); @@ -1257,7 +1470,7 @@ void engine_exchange_cells(struct engine *e) { MPI_Request reqs_in[engine_maxproxies]; MPI_Request reqs_out[engine_maxproxies]; MPI_Status status; - ticks tic = getticks(); + const ticks tic = getticks(); /* Run through the cells and get the size of the ones that will be sent off. */ @@ -1330,8 +1543,10 @@ void engine_exchange_cells(struct engine *e) { size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; for (int k = 0; k < nr_proxies; k++) for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - count_parts_in += e->proxies[k].cells_in[j]->count; - count_gparts_in += e->proxies[k].cells_in[j]->gcount; + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) + count_parts_in += e->proxies[k].cells_in[j]->count; + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) + count_gparts_in += e->proxies[k].cells_in[j]->gcount; count_sparts_in += e->proxies[k].cells_in[j]->scount; } if (count_parts_in > s->size_parts_foreign) { @@ -1362,11 +1577,18 @@ void engine_exchange_cells(struct engine *e) { struct spart *sparts = s->sparts_foreign; for (int k = 0; k < nr_proxies; k++) { for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - cell_link_parts(e->proxies[k].cells_in[j], parts); - cell_link_gparts(e->proxies[k].cells_in[j], gparts); + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { + cell_link_parts(e->proxies[k].cells_in[j], parts); + parts = &parts[e->proxies[k].cells_in[j]->count]; + } + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { + cell_link_gparts(e->proxies[k].cells_in[j], gparts); + gparts = &gparts[e->proxies[k].cells_in[j]->gcount]; + } + cell_link_sparts(e->proxies[k].cells_in[j], sparts); - parts = &parts[e->proxies[k].cells_in[j]->count]; - gparts = &gparts[e->proxies[k].cells_in[j]->gcount]; sparts = &sparts[e->proxies[k].cells_in[j]->scount]; } } @@ -1720,12 +1942,235 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, #endif } +/** + * @brief Exchanges the top-level multipoles between all the nodes + * such that every node has a multipole for each top-level cell. + * + * @param e The #engine. + */ +void engine_exchange_top_multipoles(struct engine *e) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + for (int i = 0; i < e->s->nr_cells; ++i) { + const struct gravity_tensors *m = &e->s->multipoles_top[i]; + if (e->s->cells_top[i].nodeID == engine_rank) { + if (m->m_pole.M_000 > 0.) { + if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0]) + error("Invalid multipole position in X"); + if (m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1]) + error("Invalid multipole position in Y"); + if (m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2]) + error("Invalid multipole position in Z"); + } + } else { + if (m->m_pole.M_000 != 0.) error("Non-zero mass for foreign m-pole"); + if (m->CoM[0] != 0.) error("Non-zero position in X for foreign m-pole"); + if (m->CoM[1] != 0.) error("Non-zero position in Y for foreign m-pole"); + if (m->CoM[2] != 0.) error("Non-zero position in Z for foreign m-pole"); + if (m->m_pole.num_gpart != 0) + error("Non-zero gpart count in foreign m-pole"); + } + } +#endif + + /* Each node (space) has constructed its own top-level multipoles. + * We now need to make sure every other node has a copy of everything. + * + * WARNING: Adult stuff ahead: don't do this at home! + * + * Since all nodes have their top-level multi-poles computed + * and all foreign ones set to 0 (all bytes), we can gather all the m-poles + * by doing a bit-wise OR reduction across all the nodes directly in + * place inside the multi-poles_top array. + * This only works if the foreign m-poles on every nodes are zeroed and no + * multi-pole is present on more than one node (two things guaranteed by the + * domain decomposition). + */ + MPI_Allreduce(MPI_IN_PLACE, e->s->multipoles_top, + e->s->nr_cells * sizeof(struct gravity_tensors), MPI_BYTE, + MPI_BOR, MPI_COMM_WORLD); + +#ifdef SWIFT_DEBUG_CHECKS + long long counter = 0; + + /* Let's check that what we received makes sense */ + for (int i = 0; i < e->s->nr_cells; ++i) { + const struct gravity_tensors *m = &e->s->multipoles_top[i]; + counter += m->m_pole.num_gpart; + if (m->m_pole.M_000 > 0.) { + if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0]) + error("Invalid multipole position in X"); + if (m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1]) + error("Invalid multipole position in Y"); + if (m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2]) + error("Invalid multipole position in Z"); + } + } + if (counter != e->total_nr_gparts) + error("Total particles in multipoles inconsistent with engine"); +#endif + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + +void engine_exchange_proxy_multipoles(struct engine *e) { + +#ifdef WITH_MPI + + const ticks tic = getticks(); + + /* Start by counting the number of cells to send and receive */ + int count_send = 0; + int count_recv = 0; + int count_send_requests = 0; + int count_recv_requests = 0; + + /* Loop over the proxies. */ + for (int pid = 0; pid < e->nr_proxies; pid++) { + + /* Get a handle on the proxy. */ + const struct proxy *p = &e->proxies[pid]; + + /* Now collect the number of requests associated */ + count_recv_requests += p->nr_cells_in; + count_send_requests += p->nr_cells_out; + + /* And the actual number of things we are going to ship */ + for (int k = 0; k < p->nr_cells_in; k++) + count_recv += p->cells_in[k]->pcell_size; + + for (int k = 0; k < p->nr_cells_out; k++) + count_send += p->cells_out[k]->pcell_size; + } + + /* Allocate the buffers for the packed data */ + struct gravity_tensors *buffer_send = + malloc(sizeof(struct gravity_tensors) * count_send); + struct gravity_tensors *buffer_recv = + malloc(sizeof(struct gravity_tensors) * count_recv); + if (buffer_send == NULL || buffer_recv == NULL) + error("Unable to allocate memory for multipole transactions"); + + /* Also allocate the MPI requests */ + const int count_requests = count_send_requests + count_recv_requests; + MPI_Request *requests = malloc(sizeof(MPI_Request) * count_requests); + if (requests == NULL) error("Unable to allocate memory for MPI requests"); + + int this_request = 0; + int this_recv = 0; + int this_send = 0; + + /* Loop over the proxies to issue the receives. */ + for (int pid = 0; pid < e->nr_proxies; pid++) { + + /* Get a handle on the proxy. */ + const struct proxy *p = &e->proxies[pid]; + + for (int k = 0; k < p->nr_cells_in; k++) { + + const int num_elements = p->cells_in[k]->pcell_size; + + /* Receive everything */ + MPI_Irecv(&buffer_recv[this_recv], + num_elements * sizeof(struct gravity_tensors), MPI_BYTE, + p->cells_in[k]->nodeID, p->cells_in[k]->tag, MPI_COMM_WORLD, + &requests[this_request]); + + /* Move to the next slot in the buffers */ + this_recv += num_elements; + this_request++; + } + + /* Loop over the proxies to issue the sends. */ + for (int k = 0; k < p->nr_cells_out; k++) { + + /* Number of multipoles in this cell hierarchy */ + const int num_elements = p->cells_out[k]->pcell_size; + + /* Let's pack everything recursively */ + cell_pack_multipoles(p->cells_out[k], &buffer_send[this_send]); + + /* Send everything (note the use of cells_in[0] to get the correct node + * ID. */ + MPI_Isend(&buffer_send[this_send], + num_elements * sizeof(struct gravity_tensors), MPI_BYTE, + p->cells_in[0]->nodeID, p->cells_out[k]->tag, MPI_COMM_WORLD, + &requests[this_request]); + + /* Move to the next slot in the buffers */ + this_send += num_elements; + this_request++; + } + } + + /* Wait for all the requests to arrive home */ + MPI_Status *stats = malloc(count_requests * sizeof(MPI_Status)); + int res; + if ((res = MPI_Waitall(count_requests, requests, stats)) != MPI_SUCCESS) { + for (int k = 0; k < count_requests; ++k) { + char buff[MPI_MAX_ERROR_STRING]; + MPI_Error_string(stats[k].MPI_ERROR, buff, &res); + message("request from source %i, tag %i has error '%s'.", + stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); + } + error("Failed during waitall for multipole data."); + } + + /* Let's now unpack the multipoles at the right place */ + this_recv = 0; + for (int pid = 0; pid < e->nr_proxies; pid++) { + + /* Get a handle on the proxy. */ + const struct proxy *p = &e->proxies[pid]; + + for (int k = 0; k < p->nr_cells_in; k++) { + + const int num_elements = p->cells_in[k]->pcell_size; + +#ifdef SWIFT_DEBUG_CHECKS + + /* Check that the first element (top-level cell's multipole) matches what + * we received */ + if (p->cells_in[k]->multipole->m_pole.num_gpart != + buffer_recv[this_recv].m_pole.num_gpart) + error("Current: M_000=%e num_gpart=%lld\n New: M_000=%e num_gpart=%lld", + p->cells_in[k]->multipole->m_pole.M_000, + p->cells_in[k]->multipole->m_pole.num_gpart, + buffer_recv[this_recv].m_pole.M_000, + buffer_recv[this_recv].m_pole.num_gpart); +#endif + + /* Unpack recursively */ + cell_unpack_multipoles(p->cells_in[k], &buffer_recv[this_recv]); + + /* Move to the next slot in the buffers */ + this_recv += num_elements; + } + } + + /* Free everything */ + free(stats); + free(buffer_send); + free(buffer_recv); + free(requests); + + /* How much time did this take? */ + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + /** * @brief Constructs the top-level tasks for the short-range gravity * and long-range gravity interactions. * - * - One FTT task per MPI rank. - * - Multiple gravity ghosts for dependencies. * - All top-cells get a self task. * - All pairs within range according to the multipole acceptance * criterion get a pair task. @@ -1773,8 +2218,8 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, const int ghost_id = cell_getid(cdim_ghost, i / 4, j / 4, k / 4); if (ghost_id > n_ghosts) error("Invalid ghost_id"); if (periodic) { - ci->grav_ghost[0] = ghosts[2 * ghost_id + 0]; - ci->grav_ghost[1] = ghosts[2 * ghost_id + 1]; + ci->grav_ghost_in = ghosts[2 * ghost_id + 0]; + ci->grav_ghost_out = ghosts[2 * ghost_id + 1]; } /* Recover the multipole information */ @@ -1790,15 +2235,12 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, const int cjd = cell_getid(cdim, ii, jj, kk); struct cell *cj = &cells[cjd]; - /* Avoid duplicates */ - if (cid <= cjd) continue; + /* Avoid duplicates of local pairs*/ + if (cid <= cjd && cj->nodeID == nodeID) continue; /* Skip cells without gravity particles */ if (cj->gcount == 0) continue; - /* Is that neighbour local ? */ - if (cj->nodeID != nodeID) continue; // MATTHIEU - /* Recover the multipole information */ const struct gravity_tensors *const multi_j = cj->multipole; @@ -1831,11 +2273,10 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, /** * @brief Constructs the top-level tasks for the short-range gravity - * interactions. + * interactions (master function). * - * - All top-cells get a self task. - * - All pairs within range according to the multipole acceptance - * criterion get a pair task. + * - Create the FFT task and the array of gravity ghosts. + * - Call the mapper function to create the other tasks. * * @param e The #engine. */ @@ -1863,9 +2304,9 @@ void engine_make_self_gravity_tasks(struct engine *e) { /* Make the ghosts implicit and add the dependencies */ for (int n = 0; n < n_ghosts / 2; ++n) { ghosts[2 * n + 0] = scheduler_addtask( - sched, task_type_grav_ghost, task_subtype_none, 0, 1, NULL, NULL); + sched, task_type_grav_ghost_in, task_subtype_none, 0, 1, NULL, NULL); ghosts[2 * n + 1] = scheduler_addtask( - sched, task_type_grav_ghost, task_subtype_none, 0, 1, NULL, NULL); + sched, task_type_grav_ghost_out, task_subtype_none, 0, 1, NULL, NULL); scheduler_addunlock(sched, ghosts[2 * n + 0], s->grav_top_level); scheduler_addunlock(sched, s->grav_top_level, ghosts[2 * n + 1]); } @@ -1876,6 +2317,19 @@ void engine_make_self_gravity_tasks(struct engine *e) { threadpool_map(&e->threadpool, engine_make_self_gravity_tasks_mapper, NULL, s->nr_cells, 1, 0, extra_data); +#ifdef SWIFT_DEBUG_CHECKS + if (periodic) + for (int i = 0; i < s->nr_cells; ++i) { + const struct cell *c = &s->cells_top[i]; + if (c->nodeID == engine_rank && + (c->grav_ghost_in == NULL || c->grav_ghost_out == NULL)) + error("Invalid gravity_ghost for local cell"); + if (c->nodeID != engine_rank && + (c->grav_ghost_in != NULL || c->grav_ghost_out != NULL)) + error("Invalid gravity_ghost for foreign cell"); + } +#endif + /* Clean up. */ if (periodic) free(ghosts); } @@ -2085,9 +2539,9 @@ static inline void engine_make_self_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> gravity --> grav_down --> kick */ - scheduler_addunlock(sched, c->super->drift_gpart, gravity); - scheduler_addunlock(sched, c->super->init_grav, gravity); - scheduler_addunlock(sched, gravity, c->super->grav_down); + scheduler_addunlock(sched, c->super_gravity->drift_gpart, gravity); + scheduler_addunlock(sched, c->super_gravity->init_grav, gravity); + scheduler_addunlock(sched, gravity, c->super_gravity->grav_down); } /** @@ -2102,7 +2556,7 @@ static inline void engine_make_external_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> external gravity --> kick */ - scheduler_addunlock(sched, c->super->drift_gpart, gravity); + scheduler_addunlock(sched, c->super_gravity->drift_gpart, gravity); scheduler_addunlock(sched, gravity, c->super->kick2); } @@ -2116,7 +2570,6 @@ void engine_link_gravity_tasks(struct engine *e) { struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; const int nr_tasks = sched->nr_tasks; - const int periodic = e->s->periodic; for (int k = 0; k < nr_tasks; k++) { @@ -2127,7 +2580,6 @@ void engine_link_gravity_tasks(struct engine *e) { if (t->type == task_type_self && t->subtype == task_subtype_grav) { engine_make_self_gravity_dependencies(sched, t, t->ci); - if (periodic) scheduler_addunlock(sched, t->ci->super->grav_ghost[1], t); } /* Self-interaction for external gravity ? */ @@ -2143,15 +2595,12 @@ void engine_link_gravity_tasks(struct engine *e) { if (t->ci->nodeID == nodeID) { engine_make_self_gravity_dependencies(sched, t, t->ci); - if (periodic && t->ci->super < t->cj->super) - scheduler_addunlock(sched, t->ci->super->grav_ghost[1], t); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && + t->ci->super_gravity != t->cj->super_gravity) { engine_make_self_gravity_dependencies(sched, t, t->cj); - if (periodic && t->ci->super < t->cj->super) - scheduler_addunlock(sched, t->cj->super->grav_ghost[1], t); } } @@ -2180,7 +2629,8 @@ void engine_link_gravity_tasks(struct engine *e) { engine_make_self_gravity_dependencies(sched, t, t->ci); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && + t->ci->super_gravity != t->cj->super_gravity) { engine_make_self_gravity_dependencies(sched, t, t->cj); } @@ -2206,18 +2656,11 @@ static inline void engine_make_hydro_loops_dependencies( /* density loop --> ghost --> gradient loop --> extra_ghost */ /* extra_ghost --> force loop */ - scheduler_addunlock(sched, density, c->super->ghost_in); - scheduler_addunlock(sched, c->super->ghost_out, gradient); - scheduler_addunlock(sched, gradient, c->super->extra_ghost); - scheduler_addunlock(sched, c->super->extra_ghost, force); - - if (with_cooling) { - /* force loop --> cooling (--> kick2) */ - scheduler_addunlock(sched, force, c->super->cooling); - } else { - /* force loop --> kick2 */ - scheduler_addunlock(sched, force, c->super->kick2); - } + scheduler_addunlock(sched, c->super_hydro->sorts, density); + scheduler_addunlock(sched, density, c->super_hydro->ghost_in); + scheduler_addunlock(sched, c->super_hydro->ghost_out, gradient); + scheduler_addunlock(sched, gradient, c->super_hydro->extra_ghost); + scheduler_addunlock(sched, c->super_hydro->extra_ghost, force); } #else @@ -2237,16 +2680,8 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, struct cell *c, int with_cooling) { /* density loop --> ghost --> force loop */ - scheduler_addunlock(sched, density, c->super->ghost_in); - scheduler_addunlock(sched, c->super->ghost_out, force); - - if (with_cooling) { - /* force loop --> cooling (--> kick2) */ - scheduler_addunlock(sched, force, c->super->cooling); - } else { - /* force loop --> kick2 */ - scheduler_addunlock(sched, force, c->super->kick2); - } + scheduler_addunlock(sched, density, c->super_hydro->ghost_in); + scheduler_addunlock(sched, c->super_hydro->ghost_out, force); } #endif @@ -2273,14 +2708,14 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Sort tasks depend on the drift of the cell. */ if (t->type == task_type_sort && t->ci->nodeID == engine_rank) { - scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); } /* Self-interaction? */ else if (t->type == task_type_self && t->subtype == task_subtype_density) { /* Make the self-density tasks depend on the drift only. */ - scheduler_addunlock(sched, t->ci->super->drift_part, t); + scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop. */ @@ -2308,6 +2743,7 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Now, build all the dependencies for the hydro */ engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); + scheduler_addunlock(sched, t2, t->ci->super->kick2); #endif } @@ -2316,12 +2752,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Make all density tasks depend on the drift and the sorts. */ if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->drift_part, t); - scheduler_addunlock(sched, t->ci->super->sorts, t); - if (t->ci->super != t->cj->super) { + scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); + scheduler_addunlock(sched, t->ci->super_hydro->sorts, t); + if (t->ci->super_hydro != t->cj->super_hydro) { if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->drift_part, t); - scheduler_addunlock(sched, t->cj->super->sorts, t); + scheduler_addunlock(sched, t->cj->super_hydro->drift_part, t); + scheduler_addunlock(sched, t->cj->super_hydro->sorts, t); } #ifdef EXTRA_HYDRO_LOOP @@ -2338,12 +2774,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &t->cj->force, t3); /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super-cells */ + /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, with_cooling); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && t->ci->super_hydro != t->cj->super_hydro) { engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, with_cooling); } @@ -2359,12 +2795,17 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &t->cj->force, t2); /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super-cells */ + /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); + scheduler_addunlock(sched, t2, t->ci->super->kick2); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, with_cooling); + if (t->cj->nodeID == nodeID) { + if (t->ci->super_hydro != t->cj->super_hydro) + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, + with_cooling); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t2, t->cj->super->kick2); } #endif @@ -2376,8 +2817,8 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, t->subtype == task_subtype_density) { /* Make all density tasks depend on the drift and sorts. */ - scheduler_addunlock(sched, t->ci->super->drift_part, t); - scheduler_addunlock(sched, t->ci->super->sorts, t); + scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); + scheduler_addunlock(sched, t->ci->super_hydro->sorts, t); #ifdef EXTRA_HYDRO_LOOP @@ -2394,7 +2835,7 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &t->ci->force, t3); /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super-cells */ + /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, with_cooling); @@ -2410,10 +2851,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &t->ci->force, t2); /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super-cells */ + /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); - } + scheduler_addunlock(sched, t2, t->ci->super->kick2); + } else + error("oo"); #endif } @@ -2423,12 +2866,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Make all density tasks depend on the drift. */ if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->super->drift_part, t); - scheduler_addunlock(sched, t->ci->super->sorts, t); - if (t->ci->super != t->cj->super) { + scheduler_addunlock(sched, t->ci->super_hydro->drift_part, t); + scheduler_addunlock(sched, t->ci->super_hydro->sorts, t); + if (t->ci->super_hydro != t->cj->super_hydro) { if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->super->drift_part, t); - scheduler_addunlock(sched, t->cj->super->sorts, t); + scheduler_addunlock(sched, t->cj->super_hydro->drift_part, t); + scheduler_addunlock(sched, t->cj->super_hydro->sorts, t); } #ifdef EXTRA_HYDRO_LOOP @@ -2448,12 +2891,12 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &t->cj->force, t3); /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super-cells */ + /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->ci, with_cooling); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && t->ci->super_hydro != t->cj->super_hydro) { engine_make_hydro_loops_dependencies(sched, t, t2, t3, t->cj, with_cooling); } @@ -2469,52 +2912,23 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, engine_addlink(e, &t->cj->force, t2); /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super-cells */ + /* that are local and are not descendant of the same super_hydro-cells */ if (t->ci->nodeID == nodeID) { engine_make_hydro_loops_dependencies(sched, t, t2, t->ci, with_cooling); + scheduler_addunlock(sched, t2, t->ci->super->kick2); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, with_cooling); + if (t->cj->nodeID == nodeID) { + if (t->ci->super_hydro != t->cj->super_hydro) + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj, + with_cooling); + if (t->ci->super != t->cj->super) + scheduler_addunlock(sched, t2, t->cj->super->kick2); } #endif } } } -/** - * @brief Constructs the gravity tasks building the multipoles and propagating - *them to the children - * - * Correct implementation is still lacking here. - * - * @param e The #engine. - */ -void engine_make_gravityrecursive_tasks(struct engine *e) { - - /* struct space *s = e->s; */ - /* struct scheduler *sched = &e->sched; */ - /* const int nodeID = e->nodeID; */ - /* const int nr_cells = s->nr_cells; */ - /* struct cell *cells = s->cells_top; */ - - /* for (int k = 0; k < nr_cells; k++) { */ - - /* /\* Only do this for local cells containing gravity particles *\/ */ - /* if (cells[k].nodeID == nodeID && cells[k].gcount > 0) { */ - - /* /\* Create tasks at top level. *\/ */ - /* struct task *up = NULL; */ - /* struct task *down = NULL; */ - /* /\* scheduler_addtask(sched, task_type_grav_down, - * task_subtype_none, 0, 0, *\/ */ - /* /\* &cells[k], NULL); *\/ */ - - /* /\* Push tasks down the cell hierarchy. *\/ */ - /* engine_addtasks_grav(e, &cells[k], up, down); */ - /* } */ - /* } */ -} - /** * @brief Fill the #space's task list. * @@ -2579,10 +2993,6 @@ void engine_maketasks(struct engine *e) { error("Failed to allocate cell-task links."); e->nr_links = 0; - /* Add the gravity up/down tasks at the top-level cells and push them down. */ - if (e->policy & engine_policy_self_gravity) - engine_make_gravityrecursive_tasks(e); - /* Count the number of tasks associated with each cell and store the density tasks in each cell, and make each sort depend on the sorts of its progeny. */ @@ -2592,7 +3002,7 @@ void engine_maketasks(struct engine *e) { /* Now that the self/pair tasks are at the right level, set the super * pointers. */ threadpool_map(&e->threadpool, cell_set_super_mapper, cells, nr_cells, - sizeof(struct cell), 0, NULL); + sizeof(struct cell), 0, e); /* Append hierarchical tasks to each cell. */ threadpool_map(&e->threadpool, engine_make_hierarchical_tasks_mapper, cells, @@ -2619,16 +3029,43 @@ void engine_maketasks(struct engine *e) { /* Get a handle on the proxy. */ struct proxy *p = &e->proxies[pid]; - /* Loop through the proxy's incoming cells and add the - recv tasks. */ for (int k = 0; k < p->nr_cells_in; k++) - engine_addtasks_recv(e, p->cells_in[k], NULL, NULL, NULL, NULL); + engine_addtasks_recv_timestep(e, p->cells_in[k], NULL); - /* Loop through the proxy's outgoing cells and add the - send tasks. */ for (int k = 0; k < p->nr_cells_out; k++) - engine_addtasks_send(e, p->cells_out[k], p->cells_in[0], NULL, NULL, - NULL, NULL); + engine_addtasks_send_timestep(e, p->cells_out[k], p->cells_in[0], NULL); + + /* Loop through the proxy's incoming cells and add the + recv tasks for the cells in the proxy that have a hydro connection. */ + if (e->policy & engine_policy_hydro) + for (int k = 0; k < p->nr_cells_in; k++) + if (p->cells_in_type[k] & proxy_cell_type_hydro) + engine_addtasks_recv_hydro(e, p->cells_in[k], NULL, NULL, NULL); + + /* Loop through the proxy's incoming cells and add the + recv tasks for the cells in the proxy that have a gravity connection. + */ + if (e->policy & engine_policy_self_gravity) + for (int k = 0; k < p->nr_cells_in; k++) + if (p->cells_in_type[k] & proxy_cell_type_gravity) + engine_addtasks_recv_gravity(e, p->cells_in[k], NULL); + + /* Loop through the proxy's outgoing cells and add the + send tasks for the cells in the proxy that have a hydro connection. */ + if (e->policy & engine_policy_hydro) + for (int k = 0; k < p->nr_cells_out; k++) + if (p->cells_out_type[k] & proxy_cell_type_hydro) + engine_addtasks_send_hydro(e, p->cells_out[k], p->cells_in[0], NULL, + NULL, NULL); + + /* Loop through the proxy's outgoing cells and add the + send tasks for the cells in the proxy that have a gravity connection. + */ + if (e->policy & engine_policy_self_gravity) + for (int k = 0; k < p->nr_cells_out; k++) + if (p->cells_out_type[k] & proxy_cell_type_gravity) + engine_addtasks_send_gravity(e, p->cells_out[k], p->cells_in[0], + NULL); } } #endif @@ -2677,24 +3114,42 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (ci->nodeID != engine_rank) error("Non-local self task found"); - /* Set this task's skip. */ - if (cell_is_active(ci, e)) scheduler_activate(s, t); - /* Activate the hydro drift */ if (t->type == task_type_self && t->subtype == task_subtype_density) { - cell_activate_drift_part(ci, s); - } - /* Activate the gravity drift */ - else if (t->type == task_type_self && t->subtype == task_subtype_grav) { - cell_activate_subcell_grav_tasks(t->ci, NULL, s); + if (cell_is_active_hydro(ci, e)) { + scheduler_activate(s, t); + cell_activate_drift_part(ci, s); + } } + /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_self && t->subtype == task_subtype_density) { - cell_activate_subcell_tasks(ci, NULL, s); + if (cell_is_active_hydro(ci, e)) { + scheduler_activate(s, t); + cell_activate_subcell_hydro_tasks(ci, NULL, s); + } + } - } else if (t->type == task_type_sub_self && - t->subtype == task_subtype_grav) { + else if (t->type == task_type_self && t->subtype == task_subtype_force) { + if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); + } + + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_force) { + if (cell_is_active_hydro(ci, e)) scheduler_activate(s, t); + } + + /* Activate the gravity drift */ + else if (t->type == task_type_self && t->subtype == task_subtype_grav) { + if (cell_is_active_gravity(ci, e)) { + scheduler_activate(s, t); + cell_activate_subcell_grav_tasks(t->ci, NULL, s); + } + } + + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_grav) { error("Invalid task sub-type encountered"); } } @@ -2705,12 +3160,17 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Local pointers. */ struct cell *ci = t->ci; struct cell *cj = t->cj; - const int ci_active = cell_is_active(ci, e); - const int cj_active = cell_is_active(cj, e); + const int ci_active_hydro = cell_is_active_hydro(ci, e); + const int cj_active_hydro = cell_is_active_hydro(cj, e); + const int ci_active_gravity = cell_is_active_gravity(ci, e); + const int cj_active_gravity = cell_is_active_gravity(cj, e); /* Only activate tasks that involve a local active cell. */ - if ((ci_active && ci->nodeID == engine_rank) || - (cj_active && cj->nodeID == engine_rank)) { + if ((t->subtype == task_subtype_density || + t->subtype == task_subtype_force) && + ((ci_active_hydro && ci->nodeID == engine_rank) || + (cj_active_hydro && cj->nodeID == engine_rank))) { + scheduler_activate(s, t); /* Set the correct sorting flags */ @@ -2730,19 +3190,28 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cell_activate_sorts(ci, t->flags, s); cell_activate_sorts(cj, t->flags, s); - } else if (t->type == task_type_pair && - t->subtype == task_subtype_grav) { - /* Activate the gravity drift */ - cell_activate_subcell_grav_tasks(t->ci, t->cj, s); } /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair && t->subtype == task_subtype_density) { - cell_activate_subcell_tasks(t->ci, t->cj, s); + cell_activate_subcell_hydro_tasks(t->ci, t->cj, s); + } + } + + if ((t->subtype == task_subtype_grav) && + ((ci_active_gravity && ci->nodeID == engine_rank) || + (cj_active_gravity && cj->nodeID == engine_rank))) { + + scheduler_activate(s, t); + + if (t->type == task_type_pair && t->subtype == task_subtype_grav) { + /* Activate the gravity drift */ + cell_activate_subcell_grav_tasks(t->ci, t->cj, s); + } - } else if (t->type == task_type_sub_pair && - t->subtype == task_subtype_grav) { + else if (t->type == task_type_sub_pair && + t->subtype == task_subtype_grav) { error("Invalid task sub-type encountered"); } } @@ -2758,9 +3227,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (ci->nodeID != engine_rank) { /* If the local cell is active, receive data from the foreign cell. */ - if (cj_active) { + if (cj_active_hydro) { scheduler_activate(s, ci->recv_xv); - if (ci_active) { + if (ci_active_hydro) { scheduler_activate(s, ci->recv_rho); #ifdef EXTRA_HYDRO_LOOP scheduler_activate(s, ci->recv_gradient); @@ -2769,10 +3238,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } /* If the foreign cell is active, we want its ti_end values. */ - if (ci_active) scheduler_activate(s, ci->recv_ti); + if (ci_active_hydro) scheduler_activate(s, ci->recv_ti); /* Is the foreign cell active and will need stuff from us? */ - if (ci_active) { + if (ci_active_hydro) { struct link *l = scheduler_activate_send(s, cj->send_xv, ci->nodeID); @@ -2783,7 +3252,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cell_activate_drift_part(l->t->ci, s); /* If the local cell is also active, more stuff will be needed. */ - if (cj_active) { + if (cj_active_hydro) { scheduler_activate_send(s, cj->send_rho, ci->nodeID); #ifdef EXTRA_HYDRO_LOOP @@ -2793,14 +3262,15 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } /* If the local cell is active, send its ti_end values. */ - if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID); + if (cj_active_hydro) + scheduler_activate_send(s, cj->send_ti, ci->nodeID); } else if (cj->nodeID != engine_rank) { /* If the local cell is active, receive data from the foreign cell. */ - if (ci_active) { + if (ci_active_hydro) { scheduler_activate(s, cj->recv_xv); - if (cj_active) { + if (cj_active_hydro) { scheduler_activate(s, cj->recv_rho); #ifdef EXTRA_HYDRO_LOOP scheduler_activate(s, cj->recv_gradient); @@ -2809,10 +3279,10 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } /* If the foreign cell is active, we want its ti_end values. */ - if (cj_active) scheduler_activate(s, cj->recv_ti); + if (cj_active_hydro) scheduler_activate(s, cj->recv_ti); /* Is the foreign cell active and will need stuff from us? */ - if (cj_active) { + if (cj_active_hydro) { struct link *l = scheduler_activate_send(s, ci->send_xv, cj->nodeID); @@ -2823,7 +3293,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cell_activate_drift_part(l->t->ci, s); /* If the local cell is also active, more stuff will be needed. */ - if (ci_active) { + if (ci_active_hydro) { scheduler_activate_send(s, ci->send_rho, cj->nodeID); @@ -2834,32 +3304,97 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } /* If the local cell is active, send its ti_end values. */ - if (ci_active) scheduler_activate_send(s, ci->send_ti, cj->nodeID); + if (ci_active_hydro) + scheduler_activate_send(s, ci->send_ti, cj->nodeID); + } +#endif + } + + /* Only interested in gravity tasks as of here. */ + if (t->subtype == task_subtype_grav) { + +#ifdef WITH_MPI + /* Activate the send/recv tasks. */ + if (ci->nodeID != engine_rank) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (cj_active_gravity) { + scheduler_activate(s, ci->recv_grav); + } + + /* If the foreign cell is active, we want its ti_end values. */ + if (ci_active_gravity) scheduler_activate(s, ci->recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (ci_active_gravity) { + + struct link *l = + scheduler_activate_send(s, cj->send_grav, ci->nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_gpart(l->t->ci, s); + } + + /* If the local cell is active, send its ti_end values. */ + if (cj_active_gravity) + scheduler_activate_send(s, cj->send_ti, ci->nodeID); + + } else if (cj->nodeID != engine_rank) { + + /* If the local cell is active, receive data from the foreign cell. */ + if (ci_active_gravity) { + scheduler_activate(s, cj->recv_grav); + } + + /* If the foreign cell is active, we want its ti_end values. */ + if (cj_active_gravity) scheduler_activate(s, cj->recv_ti); + + /* Is the foreign cell active and will need stuff from us? */ + if (cj_active_gravity) { + + struct link *l = + scheduler_activate_send(s, ci->send_grav, cj->nodeID); + + /* Drift the cell which will be sent at the level at which it is + sent, i.e. drift the cell specified in the send task (l->t) + itself. */ + cell_activate_drift_gpart(l->t->ci, s); + } + + /* If the local cell is active, send its ti_end values. */ + if (ci_active_gravity) + scheduler_activate_send(s, ci->send_ti, cj->nodeID); } #endif } } /* Kick/init ? */ - else if (t->type == task_type_kick1 || t->type == task_type_kick2 || - t->type == task_type_init_grav) { - if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + else if (t->type == task_type_kick1 || t->type == task_type_kick2) { + + if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) + scheduler_activate(s, t); } /* Hydro ghost tasks ? */ - else if (t->type == task_type_ghost || t->type == task_type_extra_ghost) { - if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + else if (t->type == task_type_ghost || t->type == task_type_extra_ghost || + t->type == task_type_ghost_in || t->type == task_type_ghost_out) { + if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t); } /* Gravity stuff ? */ else if (t->type == task_type_grav_down || - t->type == task_type_grav_long_range) { - if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + t->type == task_type_grav_long_range || + t->type == task_type_init_grav) { + if (cell_is_active_gravity(t->ci, e)) scheduler_activate(s, t); } /* Periodic gravity stuff (Note this is not linked to a cell) ? */ else if (t->type == task_type_grav_top_level || - t->type == task_type_grav_ghost) { + t->type == task_type_grav_ghost_in || + t->type == task_type_grav_ghost_out) { scheduler_activate(s, t); } @@ -2868,12 +3403,13 @@ void engine_marktasks_mapper(void *map_data, int num_elements, t->ci->updated = 0; t->ci->g_updated = 0; t->ci->s_updated = 0; - if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + if (cell_is_active_hydro(t->ci, e) || cell_is_active_gravity(t->ci, e)) + scheduler_activate(s, t); } /* Subgrid tasks */ else if (t->type == task_type_cooling || t->type == task_type_sourceterms) { - if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + if (cell_is_active_hydro(t->ci, e)) scheduler_activate(s, t); } } } @@ -3077,9 +3613,15 @@ void engine_rebuild(struct engine *e, int clean_h_values) { /* Initial cleaning up session ? */ if (clean_h_values) space_sanitize(e->s); -/* If in parallel, exchange the cell structure. */ +/* If in parallel, exchange the cell structure, top-level and neighbouring + * multipoles. */ #ifdef WITH_MPI engine_exchange_cells(e); + + if (e->policy & engine_policy_self_gravity) engine_exchange_top_multipoles(e); + + if (e->policy & engine_policy_self_gravity) + engine_exchange_proxy_multipoles(e); #endif /* Re-build the tasks. */ @@ -3101,7 +3643,7 @@ void engine_rebuild(struct engine *e, int clean_h_values) { error("engine_marktasks failed after space_rebuild."); /* Print the status of the system */ - // if (e->verbose) engine_print_task_counts(e); + if (e->verbose) engine_print_task_counts(e); /* Flag that a rebuild has taken place */ e->step_props |= engine_step_prop_rebuild; @@ -3183,7 +3725,10 @@ void engine_collect_end_of_step_recurse(struct cell *c) { /* Counters for the different quantities. */ int updated = 0, g_updated = 0, s_updated = 0; - integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; /* Collect the values from the progeny. */ for (int k = 0; k < 8; k++) { @@ -3194,9 +3739,12 @@ void engine_collect_end_of_step_recurse(struct cell *c) { engine_collect_end_of_step_recurse(cp); /* And update */ - ti_end_min = min(ti_end_min, cp->ti_end_min); - ti_end_max = max(ti_end_max, cp->ti_end_max); - ti_beg_max = max(ti_beg_max, cp->ti_beg_max); + ti_hydro_end_min = min(ti_hydro_end_min, cp->ti_hydro_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, cp->ti_hydro_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, cp->ti_hydro_beg_max); + ti_gravity_end_min = min(ti_gravity_end_min, cp->ti_gravity_end_min); + ti_gravity_end_max = max(ti_gravity_end_max, cp->ti_gravity_end_max); + ti_gravity_beg_max = max(ti_gravity_beg_max, cp->ti_gravity_beg_max); updated += cp->updated; g_updated += cp->g_updated; s_updated += cp->s_updated; @@ -3209,9 +3757,12 @@ void engine_collect_end_of_step_recurse(struct cell *c) { } /* Store the collected values in the cell. */ - c->ti_end_min = ti_end_min; - c->ti_end_max = ti_end_max; - c->ti_beg_max = ti_beg_max; + c->ti_hydro_end_min = ti_hydro_end_min; + c->ti_hydro_end_max = ti_hydro_end_max; + c->ti_hydro_beg_max = ti_hydro_beg_max; + c->ti_gravity_end_min = ti_gravity_end_min; + c->ti_gravity_end_max = ti_gravity_end_max; + c->ti_gravity_beg_max = ti_gravity_beg_max; c->updated = updated; c->g_updated = g_updated; c->s_updated = s_updated; @@ -3227,7 +3778,10 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, /* Local collectible */ int updates = 0, g_updates = 0, s_updates = 0; - integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &s->cells_top[local_cells[ind]]; @@ -3238,9 +3792,12 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, engine_collect_end_of_step_recurse(c); /* And aggregate */ - ti_end_min = min(ti_end_min, c->ti_end_min); - ti_end_max = max(ti_end_max, c->ti_end_max); - ti_beg_max = max(ti_beg_max, c->ti_beg_max); + ti_hydro_end_min = min(ti_hydro_end_min, c->ti_hydro_end_min); + ti_hydro_end_max = max(ti_hydro_end_max, c->ti_hydro_end_max); + ti_hydro_beg_max = max(ti_hydro_beg_max, c->ti_hydro_beg_max); + ti_gravity_end_min = min(ti_gravity_end_min, c->ti_gravity_end_min); + ti_gravity_end_max = max(ti_gravity_end_max, c->ti_gravity_end_max); + ti_gravity_beg_max = max(ti_gravity_beg_max, c->ti_gravity_beg_max); updates += c->updated; g_updates += c->g_updated; s_updates += c->s_updated; @@ -3258,9 +3815,15 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, data->updates += updates; data->g_updates += g_updates; data->s_updates += s_updates; - data->ti_end_min = min(ti_end_min, data->ti_end_min); - data->ti_end_max = max(ti_end_max, data->ti_end_max); - data->ti_beg_max = max(ti_beg_max, data->ti_beg_max); + data->ti_hydro_end_min = min(ti_hydro_end_min, data->ti_hydro_end_min); + data->ti_hydro_end_max = max(ti_hydro_end_max, data->ti_hydro_end_max); + data->ti_hydro_beg_max = max(ti_hydro_beg_max, data->ti_hydro_beg_max); + data->ti_gravity_end_min = + min(ti_gravity_end_min, data->ti_gravity_end_min); + data->ti_gravity_end_max = + max(ti_gravity_end_max, data->ti_gravity_end_max); + data->ti_gravity_beg_max = + max(ti_gravity_beg_max, data->ti_gravity_beg_max); } if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space"); } @@ -3288,7 +3851,10 @@ void engine_collect_end_of_step(struct engine *e, int apply) { const struct space *s = e->s; struct end_of_step_data data; data.updates = 0, data.g_updates = 0, data.s_updates = 0; - data.ti_end_min = max_nr_timesteps, data.ti_end_max = 0, data.ti_beg_max = 0; + data.ti_hydro_end_min = max_nr_timesteps, data.ti_hydro_end_max = 0, + data.ti_hydro_beg_max = 0; + data.ti_gravity_end_min = max_nr_timesteps, data.ti_gravity_end_max = 0, + data.ti_gravity_beg_max = 0; data.e = e; /* Collect information from the local top-level cells */ @@ -3297,8 +3863,10 @@ void engine_collect_end_of_step(struct engine *e, int apply) { /* Store these in the temporary collection group. */ collectgroup1_init(&e->collect_group1, data.updates, data.g_updates, - data.s_updates, data.ti_end_min, data.ti_end_max, - data.ti_beg_max, e->forcerebuild); + data.s_updates, data.ti_hydro_end_min, + data.ti_hydro_end_max, data.ti_hydro_beg_max, + data.ti_gravity_end_min, data.ti_gravity_end_max, + data.ti_gravity_beg_max, e->forcerebuild); /* Aggregate collective data from the different nodes for this step. */ #ifdef WITH_MPI @@ -3306,43 +3874,47 @@ void engine_collect_end_of_step(struct engine *e, int apply) { #ifdef SWIFT_DEBUG_CHECKS { - /* Check the above using the original MPI calls. */ - integertime_t in_i[1], out_i[1]; - in_i[0] = 0; - out_i[0] = data.ti_end_min; - if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate ti_end_min."); - if (in_i[0] != (long long)e->collect_group1.ti_end_min) - error("Failed to get same ti_end_min, is %lld, should be %lld", in_i[0], - e->collect_group1.ti_end_min); - - long long in_ll[3], out_ll[3]; - out_ll[0] = data.updates; - out_ll[1] = data.g_updates; - out_ll[2] = data.s_updates; - if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate particle counts."); - if (in_ll[0] != (long long)e->collect_group1.updates) - error("Failed to get same updates, is %lld, should be %ld", in_ll[0], - e->collect_group1.updates); - if (in_ll[1] != (long long)e->collect_group1.g_updates) - error("Failed to get same g_updates, is %lld, should be %ld", in_ll[1], - e->collect_group1.g_updates); - if (in_ll[2] != (long long)e->collect_group1.s_updates) - error("Failed to get same s_updates, is %lld, should be %ld", in_ll[2], - e->collect_group1.s_updates); - - int buff = 0; - if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate the rebuild flag across nodes."); - if (!!buff != !!e->collect_group1.forcerebuild) - error( - "Failed to get same rebuild flag from all nodes, is %d," - "should be %d", - buff, e->collect_group1.forcerebuild); + /* /\* Check the above using the original MPI calls. *\/ */ + /* integertime_t in_i[1], out_i[1]; */ + /* in_i[0] = 0; */ + /* out_i[0] = data.ti_end_min; */ + /* if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN, */ + /* MPI_COMM_WORLD) != MPI_SUCCESS) */ + /* error("Failed to aggregate ti_end_min."); */ + /* if (in_i[0] != (long long)e->collect_group1.ti_end_min) */ + /* error("Failed to get same ti_end_min, is %lld, should be %lld", + * in_i[0], */ + /* e->collect_group1.ti_end_min); */ + + /* long long in_ll[3], out_ll[3]; */ + /* out_ll[0] = data.updates; */ + /* out_ll[1] = data.g_updates; */ + /* out_ll[2] = data.s_updates; */ + /* if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM, */ + /* MPI_COMM_WORLD) != MPI_SUCCESS) */ + /* error("Failed to aggregate particle counts."); */ + /* if (in_ll[0] != (long long)e->collect_group1.updates) */ + /* error("Failed to get same updates, is %lld, should be %ld", in_ll[0], + */ + /* e->collect_group1.updates); */ + /* if (in_ll[1] != (long long)e->collect_group1.g_updates) */ + /* error("Failed to get same g_updates, is %lld, should be %ld", in_ll[1], + */ + /* e->collect_group1.g_updates); */ + /* if (in_ll[2] != (long long)e->collect_group1.s_updates) */ + /* error("Failed to get same s_updates, is %lld, should be %ld", in_ll[2], + */ + /* e->collect_group1.s_updates); */ + + /* int buff = 0; */ + /* if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, */ + /* MPI_COMM_WORLD) != MPI_SUCCESS) */ + /* error("Failed to aggregate the rebuild flag across nodes."); */ + /* if (!!buff != !!e->collect_group1.forcerebuild) */ + /* error( */ + /* "Failed to get same rebuild flag from all nodes, is %d," */ + /* "should be %d", */ + /* buff, e->collect_group1.forcerebuild); */ } #endif #endif @@ -3429,7 +4001,8 @@ void engine_skip_force_and_kick(struct engine *e) { t->type == task_type_timestep || t->subtype == task_subtype_force || t->subtype == task_subtype_grav || t->type == task_type_grav_long_range || - t->type == task_type_grav_ghost || + t->type == task_type_grav_ghost_in || + t->type == task_type_grav_ghost_out || t->type == task_type_grav_top_level || t->type == task_type_grav_down || t->type == task_type_cooling || t->type == task_type_sourceterms) t->skip = 1; @@ -3561,15 +4134,16 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs, #ifdef SWIFT_DEBUG_CHECKS /* Check that we have the correct total mass in the top-level multipoles */ - size_t num_gpart_mpole = 0; + long long num_gpart_mpole = 0; if (e->policy & engine_policy_self_gravity) { for (int i = 0; i < e->s->nr_cells; ++i) num_gpart_mpole += e->s->cells_top[i].multipole->m_pole.num_gpart; - if (num_gpart_mpole != e->s->nr_gparts) + if (num_gpart_mpole != e->total_nr_gparts) error( - "Multipoles don't contain the total number of gpart s->nr_gpart=%zd, " - "m_poles=%zd", - e->s->nr_gparts, num_gpart_mpole); + "Top-level multipoles don't contain the total number of gpart " + "s->nr_gpart=%lld, " + "m_poles=%lld", + e->total_nr_gparts, num_gpart_mpole); } #endif @@ -3756,15 +4330,15 @@ void engine_step(struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS /* Check that we have the correct total mass in the top-level multipoles */ - size_t num_gpart_mpole = 0; + long long num_gpart_mpole = 0; if (e->policy & engine_policy_self_gravity) { for (int i = 0; i < e->s->nr_cells; ++i) num_gpart_mpole += e->s->cells_top[i].multipole->m_pole.num_gpart; - if (num_gpart_mpole != e->s->nr_gparts) + if (num_gpart_mpole != e->total_nr_gparts) error( - "Multipoles don't contain the total number of gpart mpoles=%zd " - "ngparts=%zd", - num_gpart_mpole, e->s->nr_gparts); + "Multipoles don't contain the total number of gpart mpoles=%lld " + "ngparts=%lld", + num_gpart_mpole, e->total_nr_gparts); } #endif @@ -3871,9 +4445,13 @@ void engine_unskip(struct engine *e) { threadpool_map(&e->threadpool, runner_do_unskip_mapper, e->s->local_cells_top, e->s->nr_local_cells, sizeof(int), 1, e); - /* And the top level gravity FFT one */ - if (e->s->periodic && (e->policy & engine_policy_self_gravity)) - scheduler_activate(&e->sched, e->s->grav_top_level); + /* And the top level gravity FFT one when periodicity is on.*/ + if (e->s->periodic && (e->policy & engine_policy_self_gravity)) { + + /* Only if there are other tasks (i.e. something happens on this node) */ + if (e->sched.active_count > 0) + scheduler_activate(&e->sched, e->s->grav_top_level); + } if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -3902,10 +4480,11 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements, /* Drift all the g-particles */ cell_drift_gpart(c, e, 1); + } - /* Drift the multipoles */ - if (e->policy & engine_policy_self_gravity) - cell_drift_all_multipoles(c, e); + /* Drift the multipoles */ + if (e->policy & engine_policy_self_gravity) { + cell_drift_all_multipoles(c, e); } } } @@ -3959,7 +4538,7 @@ void engine_do_drift_top_multipoles_mapper(void *map_data, int num_elements, for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &cells[ind]; - if (c != NULL && c->nodeID == e->nodeID) { + if (c != NULL) { /* Drift the multipole at this level only */ if (c->ti_old_multipole != e->ti_current) cell_drift_multipole(c, e); @@ -4030,11 +4609,24 @@ void engine_reconstruct_multipoles(struct engine *e) { void engine_makeproxies(struct engine *e) { #ifdef WITH_MPI - const int *cdim = e->s->cdim; + const int nodeID = e->nodeID; const struct space *s = e->s; + const int *cdim = s->cdim; + const int periodic = s->periodic; + + /* Get some info about the physics */ + const double *dim = s->dim; + const struct gravity_props *props = e->gravity_properties; + const double theta_crit2 = props->theta_crit2; + const int with_hydro = (e->policy & engine_policy_hydro); + const int with_gravity = (e->policy & engine_policy_self_gravity); + + /* Handle on the cells and proxies */ struct cell *cells = s->cells_top; struct proxy *proxies = e->proxies; - ticks tic = getticks(); + + /* Let's time this */ + const ticks tic = getticks(); /* Prepare the proxies and the proxy index. */ if (e->proxy_ind == NULL) @@ -4043,33 +4635,53 @@ void engine_makeproxies(struct engine *e) { for (int k = 0; k < e->nr_nodes; k++) e->proxy_ind[k] = -1; e->nr_proxies = 0; - /* The following loop is super-clunky, but it's necessary - to ensure that the order of the send and recv cells in - the proxies is identical for all nodes! */ + /* Compute how many cells away we need to walk */ + int delta = 1; /*hydro case */ + if (with_gravity) { + const double distance = 2.5 * cells[0].width[0] / props->theta_crit; + delta = (int)(distance / cells[0].width[0]) + 1; + } + + /* Let's be verbose about this choice */ + if (e->verbose) + message("Looking for proxies up to %d top-level cells away", delta); /* Loop over each cell in the space. */ int ind[3]; - for (ind[0] = 0; ind[0] < cdim[0]; ind[0]++) - for (ind[1] = 0; ind[1] < cdim[1]; ind[1]++) + for (ind[0] = 0; ind[0] < cdim[0]; ind[0]++) { + for (ind[1] = 0; ind[1] < cdim[1]; ind[1]++) { for (ind[2] = 0; ind[2] < cdim[2]; ind[2]++) { /* Get the cell ID. */ const int cid = cell_getid(cdim, ind[0], ind[1], ind[2]); + double CoM_i[3] = {0., 0., 0.}; + double r_max_i = 0.; + + if (with_gravity) { + + /* Get ci's multipole */ + const struct gravity_tensors *multi_i = cells[cid].multipole; + CoM_i[0] = multi_i->CoM[0]; + CoM_i[1] = multi_i->CoM[1]; + CoM_i[2] = multi_i->CoM[2]; + r_max_i = multi_i->r_max; + } + /* Loop over all its neighbours (periodic). */ - for (int i = -1; i <= 1; i++) { + for (int i = -delta; i <= delta; i++) { int ii = ind[0] + i; if (ii >= cdim[0]) ii -= cdim[0]; else if (ii < 0) ii += cdim[0]; - for (int j = -1; j <= 1; j++) { + for (int j = -delta; j <= delta; j++) { int jj = ind[1] + j; if (jj >= cdim[1]) jj -= cdim[1]; else if (jj < 0) jj += cdim[1]; - for (int k = -1; k <= 1; k++) { + for (int k = -delta; k <= delta; k++) { int kk = ind[2] + k; if (kk >= cdim[2]) kk -= cdim[2]; @@ -4079,45 +4691,127 @@ void engine_makeproxies(struct engine *e) { /* Get the cell ID. */ const int cjd = cell_getid(cdim, ii, jj, kk); + /* Early abort (same cell) */ + if (cid == cjd) continue; + + /* Early abort (both same node) */ + if (cells[cid].nodeID == nodeID && cells[cjd].nodeID == nodeID) + continue; + + /* Early abort (both foreign node) */ + if (cells[cid].nodeID != nodeID && cells[cjd].nodeID != nodeID) + continue; + + int proxy_type = 0; + + /* In the hydro case, only care about neighbours */ + if (with_hydro) { + + /* This is super-ugly but checks for direct neighbours */ + /* with periodic BC */ + if (((abs(ind[0] - ii) <= 1 || + abs(ind[0] - ii - cdim[0]) <= 1 || + abs(ind[0] - ii + cdim[0]) <= 1) && + (abs(ind[1] - jj) <= 1 || + abs(ind[1] - jj - cdim[1]) <= 1 || + abs(ind[1] - jj + cdim[1]) <= 1) && + (abs(ind[2] - kk) <= 1 || + abs(ind[2] - kk - cdim[2]) <= 1 || + abs(ind[2] - kk + cdim[2]) <= 1))) + proxy_type |= (int)proxy_cell_type_hydro; + } + + /* In the gravity case, check distances using the MAC. */ + if (with_gravity) { + + /* Get cj's multipole */ + const struct gravity_tensors *multi_j = cells[cjd].multipole; + const double CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], + multi_j->CoM[2]}; + const double r_max_j = multi_j->r_max; + + /* Let's compute the current distance between the cell pair*/ + double dx = CoM_i[0] - CoM_j[0]; + double dy = CoM_i[1] - CoM_j[1]; + double dz = CoM_i[2] - CoM_j[2]; + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + const double r2 = dx * dx + dy * dy + dz * dz; + + /* Are we too close for M2L? */ + if (!gravity_M2L_accept(r_max_i, r_max_j, theta_crit2, r2)) + proxy_type |= (int)proxy_cell_type_gravity; + } + + /* Abort if not in range at all */ + if (proxy_type == proxy_cell_type_none) continue; + /* Add to proxies? */ - if (cells[cid].nodeID == e->nodeID && - cells[cjd].nodeID != e->nodeID) { + if (cells[cid].nodeID == nodeID && cells[cjd].nodeID != nodeID) { + + /* Do we already have a relationship with this node? */ int pid = e->proxy_ind[cells[cjd].nodeID]; if (pid < 0) { if (e->nr_proxies == engine_maxproxies) error("Maximum number of proxies exceeded."); + + /* Ok, start a new proxy for this pair of nodes */ proxy_init(&proxies[e->nr_proxies], e->nodeID, cells[cjd].nodeID); + + /* Store the information */ e->proxy_ind[cells[cjd].nodeID] = e->nr_proxies; pid = e->nr_proxies; e->nr_proxies += 1; } - proxy_addcell_in(&proxies[pid], &cells[cjd]); - proxy_addcell_out(&proxies[pid], &cells[cid]); + + /* Add the cell to the proxy */ + proxy_addcell_in(&proxies[pid], &cells[cjd], proxy_type); + proxy_addcell_out(&proxies[pid], &cells[cid], proxy_type); + + /* Store info about where to send the cell */ cells[cid].sendto |= (1ULL << pid); } - if (cells[cjd].nodeID == e->nodeID && - cells[cid].nodeID != e->nodeID) { + /* Same for the symmetric case? */ + if (cells[cjd].nodeID == nodeID && cells[cid].nodeID != nodeID) { + + /* Do we already have a relationship with this node? */ int pid = e->proxy_ind[cells[cid].nodeID]; if (pid < 0) { if (e->nr_proxies == engine_maxproxies) error("Maximum number of proxies exceeded."); + + /* Ok, start a new proxy for this pair of nodes */ proxy_init(&proxies[e->nr_proxies], e->nodeID, cells[cid].nodeID); + + /* Store the information */ e->proxy_ind[cells[cid].nodeID] = e->nr_proxies; pid = e->nr_proxies; e->nr_proxies += 1; } - proxy_addcell_in(&proxies[pid], &cells[cid]); - proxy_addcell_out(&proxies[pid], &cells[cjd]); + + /* Add the cell to the proxy */ + proxy_addcell_in(&proxies[pid], &cells[cid], proxy_type); + proxy_addcell_out(&proxies[pid], &cells[cjd], proxy_type); + + /* Store info about where to send the cell */ cells[cjd].sendto |= (1ULL << pid); } } } } } + } + } + /* Be clear about the time */ if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -4344,8 +5038,8 @@ void engine_unpin() { */ void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, - int nr_threads, int Ngas, int Ndm, int with_aff, int policy, - int verbose, struct repartition *reparttype, + int nr_threads, long long Ngas, long long Ndm, int with_aff, + int policy, int verbose, struct repartition *reparttype, const struct unit_system *internal_units, const struct phys_const *physical_constants, const struct hydro_props *hydro, diff --git a/src/engine.h b/src/engine.h index f80589ad3fad508f48a6fc3eda676a929b824c6e..a571f2b24d57b2720c3c77ebd7600a3830e4d2a3 100644 --- a/src/engine.h +++ b/src/engine.h @@ -147,13 +147,31 @@ struct engine { double timeBase; double timeBase_inv; - /* Minimal ti_end for the next time-step */ + /* Minimal hydro ti_end for the next time-step */ + integertime_t ti_hydro_end_min; + + /* Maximal hydro ti_end for the next time-step */ + integertime_t ti_hydro_end_max; + + /* Maximal hydro ti_beg for the next time-step */ + integertime_t ti_hydro_beg_max; + + /* Minimal gravity ti_end for the next time-step */ + integertime_t ti_gravity_end_min; + + /* Maximal gravity ti_end for the next time-step */ + integertime_t ti_gravity_end_max; + + /* Maximal gravity ti_beg for the next time-step */ + integertime_t ti_gravity_beg_max; + + /* Minimal overall ti_end for the next time-step */ integertime_t ti_end_min; - /* Maximal ti_end for the next time-step */ + /* Maximal overall ti_end for the next time-step */ integertime_t ti_end_max; - /* Maximal ti_beg for the next time-step */ + /* Maximal overall ti_beg for the next time-step */ integertime_t ti_beg_max; /* Number of particles updated in the previous step */ @@ -163,7 +181,7 @@ struct engine { int step_props; /* Total numbers of particles in the system. */ - size_t total_nr_parts, total_nr_gparts; + long long total_nr_parts, total_nr_gparts; /* The internal system of units */ const struct unit_system *internal_units; @@ -281,8 +299,8 @@ void engine_print_stats(struct engine *e); void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, - int nr_threads, int Ngas, int Ndm, int with_aff, int policy, - int verbose, struct repartition *reparttype, + int nr_threads, long long Ngas, long long Ndm, int with_aff, + int policy, int verbose, struct repartition *reparttype, const struct unit_system *internal_units, const struct phys_const *physical_constants, const struct hydro_props *hydro, diff --git a/src/partition.c b/src/partition.c index 0b7e5ca76bed0993b56b3825bb17914b06fe7e9c..8b67cf4866eff332b5c5deea1451c48f520bce9a 100644 --- a/src/partition.c +++ b/src/partition.c @@ -577,9 +577,12 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, int cid = ci - cells; /* Different weights for different tasks. */ - if (t->type == task_type_ghost || t->type == task_type_kick1 || - t->type == task_type_kick2 || t->type == task_type_timestep || - t->type == task_type_drift_part || t->type == task_type_drift_gpart) { + if (t->type == task_type_drift_part || t->type == task_type_drift_gpart || + t->type == task_type_ghost || t->type == task_type_extra_ghost || + t->type == task_type_kick1 || t->type == task_type_kick2 || + t->type == task_type_timestep || t->type == task_type_init_grav || + t->type == task_type_grav_down || + t->type == task_type_grav_long_range) { /* Particle updates add only to vertex weight. */ if (taskvweights) weights_v[cid] += w; @@ -614,41 +617,46 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w; } - if (timebins) { - /* Add weights to edge for all cells based on the expected - * interaction time (calculated as the time to the last expected - * time) as we want to avoid having active cells on the edges, so we - * cut for that. Note that weight is added to the local and remote - * cells, as we want to keep both away from any cuts, this can - * overflow int, so take care. */ - int dti = num_time_bins - get_time_bin(ci->ti_end_min); - int dtj = num_time_bins - get_time_bin(cj->ti_end_min); - double dt = (double)(1 << dti) + (double)(1 << dtj); - - /* ci */ - int kk; - for (kk = 26 * cid; inds[kk] != cjd; kk++) - ; - weights_e[kk] += dt; - - /* cj */ - for (kk = 26 * cjd; inds[kk] != cid; kk++) - ; - weights_e[kk] += dt; - } else { - - /* Add weights from task costs to the edge. */ - - /* ci */ - int kk; - for (kk = 26 * cid; inds[kk] != cjd; kk++) - ; - weights_e[kk] += w; - - /* cj */ - for (kk = 26 * cjd; inds[kk] != cid; kk++) - ; - weights_e[kk] += w; + /* Find indices of ci/cj neighbours. Note with gravity these cells may + * not be neighbours, in that case we ignore any edge weight for that + * pair. */ + int ik = -1; + for (int k = 26 * cid; k < 26 * nr_cells; k++) { + if (inds[k] == cjd) { + ik = k; + break; + } + } + + /* cj */ + int jk = -1; + for (int k = 26 * cjd; k < 26 * nr_cells; k++) { + if (inds[k] == cid) { + jk = k; + break; + } + } + if (ik != -1 && jk != -1) { + + if (timebins) { + /* Add weights to edge for all cells based on the expected + * interaction time (calculated as the time to the last expected + * time) as we want to avoid having active cells on the edges, so + * we cut for that. Note that weight is added to the local and + * remote cells, as we want to keep both away from any cuts, this + * can overflow int, so take care. */ + int dti = num_time_bins - get_time_bin(ci->ti_hydro_end_min); + int dtj = num_time_bins - get_time_bin(cj->ti_hydro_end_min); + double dt = (double)(1 << dti) + (double)(1 << dtj); + weights_e[ik] += dt; + weights_e[jk] += dt; + + } else { + + /* Add weights from task costs to the edge. */ + weights_e[ik] += w; + weights_e[jk] += w; + } } } } diff --git a/src/proxy.c b/src/proxy.c index dd6faa3055cb17a0a3050d9e62d107d7489a4326..4c110d1a23cbe337978147567b2e6ad8a6d6ba65 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -124,26 +124,44 @@ void proxy_cells_exch2(struct proxy *p) { * * @param p The #proxy. * @param c The #cell. + * @param type Why is this cell in the proxy (hdro, gravity, ...) ? */ -void proxy_addcell_in(struct proxy *p, struct cell *c) { +void proxy_addcell_in(struct proxy *p, struct cell *c, int type) { + + if (type == proxy_cell_type_none) error("Invalid type for proxy"); /* Check if the cell is already registered with the proxy. */ for (int k = 0; k < p->nr_cells_in; k++) - if (p->cells_in[k] == c) return; + if (p->cells_in[k] == c) { + + /* Update the type */ + p->cells_in_type[k] |= type; + return; + } /* Do we need to grow the number of in cells? */ if (p->nr_cells_in == p->size_cells_in) { + p->size_cells_in *= proxy_buffgrow; - struct cell **temp; - if ((temp = malloc(sizeof(struct cell *) * p->size_cells_in)) == NULL) + + struct cell **temp_cell; + if ((temp_cell = malloc(sizeof(struct cell *) * p->size_cells_in)) == NULL) error("Failed to allocate incoming cell list."); - memcpy(temp, p->cells_in, sizeof(struct cell *) * p->nr_cells_in); + memcpy(temp_cell, p->cells_in, sizeof(struct cell *) * p->nr_cells_in); free(p->cells_in); - p->cells_in = temp; + p->cells_in = temp_cell; + + int *temp_type; + if ((temp_type = malloc(sizeof(int) * p->size_cells_in)) == NULL) + error("Failed to allocate incoming cell type list."); + memcpy(temp_type, p->cells_in_type, sizeof(int) * p->nr_cells_in); + free(p->cells_in_type); + p->cells_in_type = temp_type; } /* Add the cell. */ p->cells_in[p->nr_cells_in] = c; + p->cells_in_type[p->nr_cells_in] = type; p->nr_cells_in += 1; } @@ -152,26 +170,43 @@ void proxy_addcell_in(struct proxy *p, struct cell *c) { * * @param p The #proxy. * @param c The #cell. + * @param type Why is this cell in the proxy (hdro, gravity, ...) ? */ -void proxy_addcell_out(struct proxy *p, struct cell *c) { +void proxy_addcell_out(struct proxy *p, struct cell *c, int type) { + + if (type == proxy_cell_type_none) error("Invalid type for proxy"); /* Check if the cell is already registered with the proxy. */ for (int k = 0; k < p->nr_cells_out; k++) - if (p->cells_out[k] == c) return; + if (p->cells_out[k] == c) { + + /* Update the type */ + p->cells_out_type[k] |= type; + return; + } /* Do we need to grow the number of out cells? */ if (p->nr_cells_out == p->size_cells_out) { p->size_cells_out *= proxy_buffgrow; - struct cell **temp; - if ((temp = malloc(sizeof(struct cell *) * p->size_cells_out)) == NULL) + + struct cell **temp_cell; + if ((temp_cell = malloc(sizeof(struct cell *) * p->size_cells_out)) == NULL) error("Failed to allocate outgoing cell list."); - memcpy(temp, p->cells_out, sizeof(struct cell *) * p->nr_cells_out); + memcpy(temp_cell, p->cells_out, sizeof(struct cell *) * p->nr_cells_out); free(p->cells_out); - p->cells_out = temp; + p->cells_out = temp_cell; + + int *temp_type; + if ((temp_type = malloc(sizeof(int) * p->size_cells_out)) == NULL) + error("Failed to allocate outgoing cell type list."); + memcpy(temp_type, p->cells_out_type, sizeof(int) * p->nr_cells_out); + free(p->cells_out_type); + p->cells_out_type = temp_type; } /* Add the cell. */ p->cells_out[p->nr_cells_out] = c; + p->cells_out_type[p->nr_cells_out] = type; p->nr_cells_out += 1; } @@ -433,6 +468,9 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { if ((p->cells_in = (struct cell **)malloc(sizeof(void *) * p->size_cells_in)) == NULL) error("Failed to allocate cells_in buffer."); + if ((p->cells_in_type = (int *)malloc(sizeof(int) * p->size_cells_in)) == + NULL) + error("Failed to allocate cells_in_type buffer."); } p->nr_cells_in = 0; if (p->cells_out == NULL) { @@ -440,6 +478,9 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { if ((p->cells_out = (struct cell **)malloc(sizeof(void *) * p->size_cells_out)) == NULL) error("Failed to allocate cells_out buffer."); + if ((p->cells_out_type = (int *)malloc(sizeof(int) * p->size_cells_out)) == + NULL) + error("Failed to allocate cells_out_type buffer."); } p->nr_cells_out = 0; diff --git a/src/proxy.h b/src/proxy.h index a245077193878bb669b474944965badceffcee80..b45f6fcca86b0320a49b5c2b879539cbf8c73116 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -36,6 +36,15 @@ #define proxy_tag_sparts 4 #define proxy_tag_cells 5 +/** + * @brief The different reasons a cell can be in a proxy + */ +enum proxy_cell_type { + proxy_cell_type_none = 0, + proxy_cell_type_hydro = (1 << 0), + proxy_cell_type_gravity = (1 << 1), +}; + /* Data structure for the proxy. */ struct proxy { @@ -44,11 +53,13 @@ struct proxy { /* Incoming cells. */ struct cell **cells_in; + int *cells_in_type; struct pcell *pcells_in; int nr_cells_in, size_cells_in, size_pcells_in; /* Outgoing cells. */ struct cell **cells_out; + int *cells_out_type; struct pcell *pcells_out; int nr_cells_out, size_cells_out, size_pcells_out; @@ -87,8 +98,8 @@ void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N); void proxy_sparts_load(struct proxy *p, const struct spart *sparts, int N); void proxy_parts_exch1(struct proxy *p); void proxy_parts_exch2(struct proxy *p); -void proxy_addcell_in(struct proxy *p, struct cell *c); -void proxy_addcell_out(struct proxy *p, struct cell *c); +void proxy_addcell_in(struct proxy *p, struct cell *c, int type); +void proxy_addcell_out(struct proxy *p, struct cell *c, int type); void proxy_cells_exch1(struct proxy *p); void proxy_cells_exch2(struct proxy *p); diff --git a/src/runner.c b/src/runner.c index 392bb261bf156441a6baafa8e795c59405eb0082..a8ea47e8ce449e716d5f454b4af4bd618dc4fcc9 100644 --- a/src/runner.c +++ b/src/runner.c @@ -139,7 +139,7 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_gravity(c, e)) return; /* Recurse? */ if (c->split) { @@ -185,7 +185,7 @@ void runner_do_cooling(struct runner *r, struct cell *c, int timer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; /* Recurse? */ if (c->split) { @@ -304,7 +304,8 @@ void runner_check_sorts(struct cell *c, int flags) { if (flags & ~c->sorted) error("Inconsistent sort flags (downward)!"); if (c->split) for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_check_sorts(c->progeny[k], c->sorted); + if (c->progeny[k] != NULL && c->progeny[k]->count > 0) + runner_check_sorts(c->progeny[k], c->sorted); #else error("Calling debugging code without debugging flag activated."); #endif @@ -343,7 +344,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, /* Check that the particles have been moved to the current time */ if (flags && !cell_are_part_drifted(c, r->e)) - error("Sorting un-drifted cell"); + error("Sorting un-drifted cell c->nodeID=%d", c->nodeID); #ifdef SWIFT_DEBUG_CHECKS /* Make sure the sort flags are consistent (downward). */ @@ -376,7 +377,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int cleanup, float dx_max_sort = 0.0f; float dx_max_sort_old = 0.0f; for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { + if (c->progeny[k] != NULL && c->progeny[k]->count > 0) { /* Only propagate cleanup if the progeny is stale. */ runner_do_sort(r, c->progeny[k], flags, cleanup && (c->progeny[k]->dx_max_sort > @@ -550,10 +551,7 @@ void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { #endif /* Anything to do here? */ - if (!cell_is_active(c, e)) return; - - /* Drift the multipole */ - cell_drift_multipole(c, e); + if (!cell_is_active_gravity(c, e)) return; /* Reset the gravity acceleration tensors */ gravity_field_tensors_init(&c->multipole->pot, e->ti_current); @@ -587,7 +585,7 @@ void runner_do_extra_ghost(struct runner *r, struct cell *c, int timer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; /* Recurse? */ if (c->split) { @@ -640,7 +638,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; /* Recurse? */ if (c->split) { @@ -833,34 +831,62 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { } /** - * @brief Unskip any tasks associated with active cells. + * @brief Unskip any hydro tasks associated with active cells. * * @param c The cell. * @param e The engine. */ -static void runner_do_unskip(struct cell *c, struct engine *e) { +static void runner_do_unskip_hydro(struct cell *c, struct engine *e) { /* Ignore empty cells. */ - if (c->count == 0 && c->gcount == 0) return; + if (c->count == 0) return; /* Skip inactive cells. */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; /* Recurse */ if (c->split) { for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) { struct cell *cp = c->progeny[k]; - runner_do_unskip(cp, e); + runner_do_unskip_hydro(cp, e); } } } /* Unskip any active tasks. */ - const int forcerebuild = cell_unskip_tasks(c, &e->sched); + const int forcerebuild = cell_unskip_hydro_tasks(c, &e->sched); if (forcerebuild) atomic_inc(&e->forcerebuild); } +/** + * @brief Unskip any gravity tasks associated with active cells. + * + * @param c The cell. + * @param e The engine. + */ +static void runner_do_unskip_gravity(struct cell *c, struct engine *e) { + + /* Ignore empty cells. */ + if (c->gcount == 0) return; + + /* Skip inactive cells. */ + if (!cell_is_active_gravity(c, e)) return; + + /* Recurse */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + runner_do_unskip_gravity(cp, e); + } + } + } + + /* Unskip any active tasks. */ + cell_unskip_gravity_tasks(c, &e->sched); +} + /** * @brief Mapper function to unskip active tasks. * @@ -877,7 +903,12 @@ void runner_do_unskip_mapper(void *map_data, int num_elements, for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &s->cells_top[local_cells[ind]]; - if (c != NULL) runner_do_unskip(c, e); + if (c != NULL) { + if (e->policy & engine_policy_hydro) runner_do_unskip_hydro(c, e); + if (e->policy & + (engine_policy_self_gravity | engine_policy_external_gravity)) + runner_do_unskip_gravity(c, e); + } } } /** @@ -935,7 +966,7 @@ void runner_do_kick1(struct runner *r, struct cell *c, int timer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_starting(c, e)) return; + if (!cell_is_starting_hydro(c, e) && !cell_is_starting_gravity(c, e)) return; /* Recurse? */ if (c->split) { @@ -1060,7 +1091,7 @@ void runner_do_kick2(struct runner *r, struct cell *c, int timer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return; /* Recurse? */ if (c->split) { @@ -1194,7 +1225,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) { + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) { c->updated = 0; c->g_updated = 0; c->s_updated = 0; @@ -1202,7 +1233,10 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { } int updated = 0, g_updated = 0, s_updated = 0; - integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; /* No children? */ if (!c->split) { @@ -1231,7 +1265,7 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { /* Update particle */ p->time_bin = get_time_bin(ti_new_step); - if (p->gpart != NULL) p->gpart->time_bin = get_time_bin(ti_new_step); + if (p->gpart != NULL) p->gpart->time_bin = p->time_bin; /* Tell the particle what the new physical time step is */ float dt = get_timestep(p->time_bin, timeBase); @@ -1242,11 +1276,23 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { if (p->gpart != NULL) g_updated++; /* What is the next sync-point ? */ - ti_end_min = min(ti_current + ti_new_step, ti_end_min); - ti_end_max = max(ti_current + ti_new_step, ti_end_max); + ti_hydro_end_min = min(ti_current + ti_new_step, ti_hydro_end_min); + ti_hydro_end_max = max(ti_current + ti_new_step, ti_hydro_end_max); /* What is the next starting point for this cell ? */ - ti_beg_max = max(ti_current, ti_beg_max); + ti_hydro_beg_max = max(ti_current, ti_hydro_beg_max); + + if (p->gpart != NULL) { + + /* What is the next sync-point ? */ + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); + } } else { /* part is inactive */ @@ -1254,15 +1300,25 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { const integertime_t ti_end = get_integer_time_end(ti_current, p->time_bin); - /* What is the next sync-point ? */ - ti_end_min = min(ti_end, ti_end_min); - ti_end_max = max(ti_end, ti_end_max); - const integertime_t ti_beg = get_integer_time_begin(ti_current + 1, p->time_bin); + /* What is the next sync-point ? */ + ti_hydro_end_min = min(ti_end, ti_hydro_end_min); + ti_hydro_end_max = max(ti_end, ti_hydro_end_max); + /* What is the next starting point for this cell ? */ - ti_beg_max = max(ti_beg, ti_beg_max); + ti_hydro_beg_max = max(ti_beg, ti_hydro_beg_max); + + if (p->gpart != NULL) { + + /* What is the next sync-point ? */ + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); + + /* What is the next starting point for this cell ? */ + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); + } } } @@ -1297,11 +1353,13 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { g_updated++; /* What is the next sync-point ? */ - ti_end_min = min(ti_current + ti_new_step, ti_end_min); - ti_end_max = max(ti_current + ti_new_step, ti_end_max); + ti_gravity_end_min = + min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = + max(ti_current + ti_new_step, ti_gravity_end_max); /* What is the next starting point for this cell ? */ - ti_beg_max = max(ti_current, ti_beg_max); + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); } else { /* gpart is inactive */ @@ -1309,14 +1367,14 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { get_integer_time_end(ti_current, gp->time_bin); /* What is the next sync-point ? */ - ti_end_min = min(ti_end, ti_end_min); - ti_end_max = max(ti_end, ti_end_max); + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); const integertime_t ti_beg = get_integer_time_begin(ti_current + 1, gp->time_bin); /* What is the next starting point for this cell ? */ - ti_beg_max = max(ti_beg, ti_beg_max); + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); } } } @@ -1350,11 +1408,11 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { g_updated++; /* What is the next sync-point ? */ - ti_end_min = min(ti_current + ti_new_step, ti_end_min); - ti_end_max = max(ti_current + ti_new_step, ti_end_max); + ti_gravity_end_min = min(ti_current + ti_new_step, ti_gravity_end_min); + ti_gravity_end_max = max(ti_current + ti_new_step, ti_gravity_end_max); /* What is the next starting point for this cell ? */ - ti_beg_max = max(ti_current, ti_beg_max); + ti_gravity_beg_max = max(ti_current, ti_gravity_beg_max); } else { /* star particle is inactive */ @@ -1362,14 +1420,14 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { get_integer_time_end(ti_current, sp->time_bin); /* What is the next sync-point ? */ - ti_end_min = min(ti_end, ti_end_min); - ti_end_max = max(ti_end, ti_end_max); + ti_gravity_end_min = min(ti_end, ti_gravity_end_min); + ti_gravity_end_max = max(ti_end, ti_gravity_end_max); const integertime_t ti_beg = get_integer_time_begin(ti_current + 1, sp->time_bin); /* What is the next starting point for this cell ? */ - ti_beg_max = max(ti_beg, ti_beg_max); + ti_gravity_beg_max = max(ti_beg, ti_gravity_beg_max); } } } else { @@ -1386,9 +1444,12 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { updated += cp->updated; g_updated += cp->g_updated; s_updated += cp->s_updated; - ti_end_min = min(cp->ti_end_min, ti_end_min); - ti_end_max = max(cp->ti_end_max, ti_end_max); - ti_beg_max = max(cp->ti_beg_max, ti_beg_max); + ti_hydro_end_min = min(cp->ti_hydro_end_min, ti_hydro_end_min); + ti_hydro_end_max = max(cp->ti_hydro_end_max, ti_hydro_end_max); + ti_hydro_beg_max = max(cp->ti_hydro_beg_max, ti_hydro_beg_max); + ti_gravity_end_min = min(cp->ti_gravity_end_min, ti_gravity_end_min); + ti_gravity_end_max = max(cp->ti_gravity_end_max, ti_gravity_end_max); + ti_gravity_beg_max = max(cp->ti_gravity_beg_max, ti_gravity_beg_max); } } @@ -1396,9 +1457,12 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { c->updated = updated; c->g_updated = g_updated; c->s_updated = s_updated; - c->ti_end_min = ti_end_min; - c->ti_end_max = ti_end_max; - c->ti_beg_max = ti_beg_max; + c->ti_hydro_end_min = ti_hydro_end_min; + c->ti_hydro_end_max = ti_hydro_end_max; + c->ti_hydro_beg_max = ti_hydro_beg_max; + c->ti_gravity_end_min = ti_gravity_end_min; + c->ti_gravity_end_max = ti_gravity_end_max; + c->ti_gravity_beg_max = ti_gravity_beg_max; if (timer) TIMER_TOC(timer_timestep); } @@ -1425,7 +1489,7 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e) && !cell_is_active_gravity(c, e)) return; /* Recurse? */ if (c->split) { @@ -1481,14 +1545,14 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { /* Check that this gpart has interacted with all the other * particles (via direct or multipoles) in the box */ - if (gp->num_interacted != (long long)e->s->nr_gparts) + if (gp->num_interacted != e->total_nr_gparts) error( "g-particle (id=%lld, type=%s) did not interact " "gravitationally " "with all other gparts gp->num_interacted=%lld, " - "total_gparts=%zd", + "total_gparts=%lld (local num_gparts=%zd)", gp->id_or_neg_offset, part_type_names[gp->type], - gp->num_interacted, e->s->nr_gparts); + gp->num_interacted, e->total_nr_gparts, e->s->nr_gparts); } #endif } @@ -1529,8 +1593,8 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, TIMER_TIC; - integertime_t ti_end_min = max_nr_timesteps; - integertime_t ti_end_max = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps; + integertime_t ti_hydro_end_max = 0; timebin_t time_bin_min = num_time_bins; timebin_t time_bin_max = 0; float h_max = 0.f; @@ -1554,33 +1618,35 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, } /* Convert into a time */ - ti_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_end_max = get_integer_time_end(ti_current, time_bin_max); + ti_hydro_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_hydro_end_max = get_integer_time_end(ti_current, time_bin_max); } /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { + if (c->progeny[k] != NULL && c->progeny[k]->count > 0) { runner_do_recv_part(r, c->progeny[k], clear_sorts, 0); - ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); - ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + ti_hydro_end_min = + min(ti_hydro_end_min, c->progeny[k]->ti_hydro_end_min); + ti_hydro_end_max = + max(ti_hydro_end_max, c->progeny[k]->ti_hydro_end_max); h_max = max(h_max, c->progeny[k]->h_max); } } } #ifdef SWIFT_DEBUG_CHECKS - if (ti_end_min < ti_current) + if (ti_hydro_end_min < ti_current) error( "Received a cell at an incorrect time c->ti_end_min=%lld, " "e->ti_current=%lld.", - ti_end_min, ti_current); + ti_hydro_end_min, ti_current); #endif /* ... and store. */ - c->ti_end_min = ti_end_min; - c->ti_end_max = ti_end_max; + // c->ti_hydro_end_min = ti_hydro_end_min; + // c->ti_hydro_end_max = ti_hydro_end_max; c->ti_old_part = ti_current; c->h_max = h_max; @@ -1608,8 +1674,8 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { TIMER_TIC; - integertime_t ti_end_min = max_nr_timesteps; - integertime_t ti_end_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps; + integertime_t ti_gravity_end_max = 0; timebin_t time_bin_min = num_time_bins; timebin_t time_bin_max = 0; @@ -1633,32 +1699,34 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { } /* Convert into a time */ - ti_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_end_max = get_integer_time_end(ti_current, time_bin_max); + ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max); } /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { + if (c->progeny[k] != NULL && c->progeny[k]->gcount > 0) { runner_do_recv_gpart(r, c->progeny[k], 0); - ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); - ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + ti_gravity_end_min = + min(ti_gravity_end_min, c->progeny[k]->ti_gravity_end_min); + ti_gravity_end_max = + max(ti_gravity_end_max, c->progeny[k]->ti_gravity_end_max); } } } #ifdef SWIFT_DEBUG_CHECKS - if (ti_end_min < ti_current) + if (ti_gravity_end_min < ti_current) error( "Received a cell at an incorrect time c->ti_end_min=%lld, " "e->ti_current=%lld.", - ti_end_min, ti_current); + ti_gravity_end_min, ti_current); #endif /* ... and store. */ - c->ti_end_min = ti_end_min; - c->ti_end_max = ti_end_max; + // c->ti_gravity_end_min = ti_gravity_end_min; + // c->ti_gravity_end_max = ti_gravity_end_max; c->ti_old_gpart = ti_current; if (timer) TIMER_TOC(timer_dorecv_gpart); @@ -1685,8 +1753,8 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { TIMER_TIC; - integertime_t ti_end_min = max_nr_timesteps; - integertime_t ti_end_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps; + integertime_t ti_gravity_end_max = 0; timebin_t time_bin_min = num_time_bins; timebin_t time_bin_max = 0; @@ -1710,32 +1778,34 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { } /* Convert into a time */ - ti_end_min = get_integer_time_end(ti_current, time_bin_min); - ti_end_max = get_integer_time_end(ti_current, time_bin_max); + ti_gravity_end_min = get_integer_time_end(ti_current, time_bin_min); + ti_gravity_end_max = get_integer_time_end(ti_current, time_bin_max); } /* Otherwise, recurse and collect. */ else { for (int k = 0; k < 8; k++) { - if (c->progeny[k] != NULL) { + if (c->progeny[k] != NULL && c->progeny[k]->scount > 0) { runner_do_recv_spart(r, c->progeny[k], 0); - ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); - ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + ti_gravity_end_min = + min(ti_gravity_end_min, c->progeny[k]->ti_gravity_end_min); + ti_gravity_end_max = + max(ti_gravity_end_max, c->progeny[k]->ti_gravity_end_max); } } } #ifdef SWIFT_DEBUG_CHECKS - if (ti_end_min < ti_current) + if (ti_gravity_end_min < ti_current) error( "Received a cell at an incorrect time c->ti_end_min=%lld, " "e->ti_current=%lld.", - ti_end_min, ti_current); + ti_gravity_end_min, ti_current); #endif /* ... and store. */ - c->ti_end_min = ti_end_min; - c->ti_end_max = ti_end_max; + c->ti_gravity_end_min = ti_gravity_end_min; + c->ti_gravity_end_max = ti_gravity_end_max; c->ti_old_gpart = ti_current; if (timer) TIMER_TOC(timer_dorecv_spart); @@ -1804,54 +1874,62 @@ void *runner_main(void *data) { /* Check that we haven't scheduled an inactive task */ #ifdef SWIFT_DEBUG_CHECKS t->ti_run = e->ti_current; -#ifndef WITH_MPI - if (t->type == task_type_grav_top_level) { - if (ci != NULL || cj != NULL) - error("Top-level gravity task associated with a cell"); - } else if (ci == NULL && cj == NULL) { - - error("Task not associated with cells!"); - } else if (cj == NULL) { /* self */ - - if (!cell_is_active(ci, e) && t->type != task_type_sort && - t->type != task_type_send && t->type != task_type_recv && - t->type != task_type_kick1 && t->type != task_type_drift_part && - t->type != task_type_drift_gpart) - error( - "Task (type='%s/%s') should have been skipped ti_current=%lld " - "c->ti_end_min=%lld", - taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, - ci->ti_end_min); - - /* Special case for sorts */ - if (!cell_is_active(ci, e) && t->type == task_type_sort && - !(ci->do_sort || ci->do_sub_sort)) - error( - "Task (type='%s/%s') should have been skipped ti_current=%lld " - "c->ti_end_min=%lld t->flags=%d", - taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, - ci->ti_end_min, t->flags); - - /* Special case for kick1 */ - if (!cell_is_starting(ci, e) && t->type == task_type_kick1 && - t->flags == 0) - error( - "Task (type='%s/%s') should have been skipped ti_current=%lld " - "c->ti_end_min=%lld t->flags=%d", - taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, - ci->ti_end_min, t->flags); - - } else { /* pair */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) - - if (t->type != task_type_send && t->type != task_type_recv) - error( - "Task (type='%s/%s') should have been skipped ti_current=%lld " - "ci->ti_end_min=%lld cj->ti_end_min=%lld", - taskID_names[t->type], subtaskID_names[t->subtype], - e->ti_current, ci->ti_end_min, cj->ti_end_min); - } -#endif +/* #ifndef WITH_MPI */ +/* if (t->type == task_type_grav_top_level) { */ +/* if (ci != NULL || cj != NULL) */ +/* error("Top-level gravity task associated with a cell"); */ +/* } else if (ci == NULL && cj == NULL) { */ + +/* error("Task not associated with cells!"); */ +/* } else if (cj == NULL) { /\* self *\/ */ + +/* if (!cell_is_active_hydro(ci, e) && t->type != task_type_sort && */ +/* t->type != task_type_send && t->type != task_type_recv && */ +/* t->type != task_type_kick1 && t->type != task_type_drift_part && + */ +/* t->type != task_type_drift_gpart) */ +/* error( */ +/* "Task (type='%s/%s') should have been skipped ti_current=%lld " + */ +/* "c->ti_end_min=%lld", */ +/* taskID_names[t->type], subtaskID_names[t->subtype], + * e->ti_current, */ +/* ci->ti_end_min); */ + +/* /\* Special case for sorts *\/ */ +/* if (!cell_is_active_hydro(ci, e) && t->type == task_type_sort && */ +/* !(ci->do_sort || ci->do_sub_sort)) */ +/* error( */ +/* "Task (type='%s/%s') should have been skipped ti_current=%lld " + */ +/* "c->ti_end_min=%lld t->flags=%d", */ +/* taskID_names[t->type], subtaskID_names[t->subtype], + * e->ti_current, */ +/* ci->ti_end_min, t->flags); */ + +/* /\* Special case for kick1 *\/ */ +/* if (!cell_is_starting(ci, e) && t->type == task_type_kick1 && */ +/* t->flags == 0) */ +/* error( */ +/* "Task (type='%s/%s') should have been skipped ti_current=%lld " + */ +/* "c->ti_end_min=%lld t->flags=%d", */ +/* taskID_names[t->type], subtaskID_names[t->subtype], + * e->ti_current, */ +/* ci->ti_end_min, t->flags); */ + +/* } else { /\* pair *\/ */ +/* if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) */ + +/* if (t->type != task_type_send && t->type != task_type_recv) */ +/* error( */ +/* "Task (type='%s/%s') should have been skipped ti_current=%lld + * " */ +/* "ci->ti_end_min=%lld cj->ti_end_min=%lld", */ +/* taskID_names[t->type], subtaskID_names[t->subtype], */ +/* e->ti_current, ci->ti_end_min, cj->ti_end_min); */ +/* } */ +/* #endif */ #endif /* Different types of tasks... */ @@ -1970,7 +2048,8 @@ void *runner_main(void *data) { } else if (t->subtype == task_subtype_spart) { runner_do_recv_spart(r, ci, 1); } else if (t->subtype == task_subtype_multipole) { - ci->ti_old_multipole = e->ti_current; + cell_unpack_multipoles(ci, t->buff); + free(t->buff); } else { error("Unknown/invalid task subtype (%d).", t->subtype); } diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 6181706103d1994fd3d59f3326a8193cca6f6a10..09b6a83ec29dd4f57b8a850ee77a2e5692e1c4ec 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -136,7 +136,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; const int count_i = ci->count; const int count_j = cj->count; @@ -224,7 +224,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; const int count_i = ci->count; const int count_j = cj->count; @@ -314,7 +314,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; const int count = c->count; struct part *restrict parts = c->parts; @@ -392,7 +392,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; const int count = c->count; struct part *restrict parts = c->parts; @@ -802,7 +802,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const double dj_min = sort_j[0].d; const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); - if (cell_is_active(ci, e)) { + if (cell_is_active_hydro(ci, e)) { /* Loop over the parts in ci. */ for (int pid = count_i - 1; @@ -882,7 +882,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, } /* loop over the parts in ci. */ } /* Cell ci is active */ - if (cell_is_active(cj, e)) { + if (cell_is_active_hydro(cj, e)) { /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; @@ -979,7 +979,7 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; /* Check that cells are drifted. */ if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) @@ -1103,11 +1103,11 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, int count_active_i = 0, count_active_j = 0; struct entry *restrict sort_active_i = NULL, *restrict sort_active_j = NULL; - if (cell_is_all_active(ci, e)) { + if (cell_is_all_active_hydro(ci, e)) { /* If everybody is active don't bother copying */ sort_active_i = sort_i; count_active_i = count_i; - } else if (cell_is_active(ci, e)) { + } else if (cell_is_active_hydro(ci, e)) { if (posix_memalign((void *)&sort_active_i, SWIFT_CACHE_ALIGNMENT, sizeof(struct entry) * count_i) != 0) error("Failed to allocate active sortlists."); @@ -1121,11 +1121,11 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, } } - if (cell_is_all_active(cj, e)) { + if (cell_is_all_active_hydro(cj, e)) { /* If everybody is active don't bother copying */ sort_active_j = sort_j; count_active_j = count_j; - } else if (cell_is_active(cj, e)) { + } else if (cell_is_active_hydro(cj, e)) { if (posix_memalign((void *)&sort_active_j, SWIFT_CACHE_ALIGNMENT, sizeof(struct entry) * count_j) != 0) error("Failed to allocate active sortlists."); @@ -1442,8 +1442,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, } /* Loop over all cj */ /* Clean-up if necessary */ - if (cell_is_active(ci, e) && !cell_is_all_active(ci, e)) free(sort_active_i); - if (cell_is_active(cj, e) && !cell_is_all_active(cj, e)) free(sort_active_j); + if (cell_is_active_hydro(ci, e) && !cell_is_all_active_hydro(ci, e)) + free(sort_active_i); + if (cell_is_active_hydro(cj, e) && !cell_is_all_active_hydro(cj, e)) + free(sort_active_j); TIMER_TOC(TIMER_DOPAIR); } @@ -1462,7 +1464,7 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; /* Check that cells are drifted. */ if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) @@ -1670,7 +1672,11 @@ void DOSELF1_BRANCH(struct runner *r, struct cell *c) { const struct engine *restrict e = r->e; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); /* Check that cells are drifted. */ if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); @@ -1817,7 +1823,11 @@ void DOSELF2_BRANCH(struct runner *r, struct cell *c) { const struct engine *restrict e = r->e; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; + + /* Did we mess up the recursion? */ + if (c->h_max_old * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); /* Check that cells are drifted. */ if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); @@ -1852,7 +1862,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, TIMER_TIC; /* Should we even bother? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; if (ci->count == 0 || cj->count == 0) return; /* Get the type of pair if not specified explicitly. */ @@ -2061,7 +2071,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, } /* Otherwise, compute the pair directly. */ - else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) { /* Make sure both cells are drifted to the current timestep. */ if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) @@ -2094,7 +2104,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { TIMER_TIC; /* Should we even bother? */ - if (ci->count == 0 || !cell_is_active(ci, r->e)) return; + if (ci->count == 0 || !cell_is_active_hydro(ci, r->e)) return; /* Recurse? */ if (cell_can_recurse_in_self_task(ci)) { @@ -2142,7 +2152,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, TIMER_TIC; /* Should we even bother? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return; if (ci->count == 0 || cj->count == 0) return; /* Get the type of pair if not specified explicitly. */ @@ -2351,7 +2361,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, } /* Otherwise, compute the pair directly. */ - else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) { /* Make sure both cells are drifted to the current timestep. */ if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) @@ -2384,7 +2394,7 @@ void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) { TIMER_TIC; /* Should we even bother? */ - if (ci->count == 0 || !cell_is_active(ci, r->e)) return; + if (ci->count == 0 || !cell_is_active_hydro(ci, r->e)) return; /* Recurse? */ if (cell_can_recurse_in_self_task(ci)) { @@ -2416,7 +2426,9 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, TIMER_TIC; /* Should we even bother? */ - if (!cell_is_active(ci, e) && (cj == NULL || !cell_is_active(cj, e))) return; + if (!cell_is_active_hydro(ci, e) && + (cj == NULL || !cell_is_active_hydro(cj, e))) + return; if (ci->count == 0 || (cj != NULL && cj->count == 0)) return; /* Find out in which sub-cell of ci the parts are. */ @@ -2970,7 +2982,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, } /* Otherwise, compute the pair directly. */ - else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) { /* Do any of the cells need to be drifted first? */ if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); diff --git a/src/runner_doiact_fft.c b/src/runner_doiact_fft.c index a8a882e4e765c2679b02347c96c2c7f1f49339f7..945bbb9d892ebe0768cc5cf7b466dcd0192aefed 100644 --- a/src/runner_doiact_fft.c +++ b/src/runner_doiact_fft.c @@ -57,23 +57,30 @@ __attribute__((always_inline)) INLINE static int row_major_id(int i, int j, * @param rho The density mesh. * @param N the size of the mesh along one axis. * @param fac The width of a mesh cell. + * @param dim The dimensions of the simulation box. */ __attribute__((always_inline)) INLINE static void multipole_to_mesh_CIC( - const struct gravity_tensors* m, double* rho, int N, double fac) { + const struct gravity_tensors* m, double* rho, int N, double fac, + const double dim[3]) { - int i = (int)(fac * m->CoM[0]); + /* Box wrap the multipole's position */ + const double CoM_x = box_wrap(m->CoM[0], 0., dim[0]); + const double CoM_y = box_wrap(m->CoM[1], 0., dim[1]); + const double CoM_z = box_wrap(m->CoM[2], 0., dim[2]); + + int i = (int)(fac * CoM_x); if (i >= N) i = N - 1; - const double dx = fac * m->CoM[0] - i; + const double dx = fac * CoM_x - i; const double tx = 1. - dx; - int j = (int)(fac * m->CoM[1]); + int j = (int)(fac * CoM_y); if (j >= N) j = N - 1; - const double dy = fac * m->CoM[1] - j; + const double dy = fac * CoM_y - j; const double ty = 1. - dy; - int k = (int)(fac * m->CoM[2]); + int k = (int)(fac * CoM_z); if (k >= N) k = N - 1; - const double dz = fac * m->CoM[2] - k; + const double dz = fac * CoM_z - k; const double tz = 1. - dz; #ifdef SWIFT_DEBUG_CHECKS @@ -101,23 +108,30 @@ __attribute__((always_inline)) INLINE static void multipole_to_mesh_CIC( * @param pot The potential mesh. * @param N the size of the mesh along one axis. * @param fac width of a mesh cell. + * @param dim The dimensions of the simulation box. */ __attribute__((always_inline)) INLINE static void mesh_to_multipole_CIC( - struct gravity_tensors* m, double* pot, int N, double fac) { + struct gravity_tensors* m, const double* pot, int N, double fac, + const double dim[3]) { + + /* Box wrap the multipole's position */ + const double CoM_x = box_wrap(m->CoM[0], 0., dim[0]); + const double CoM_y = box_wrap(m->CoM[1], 0., dim[1]); + const double CoM_z = box_wrap(m->CoM[2], 0., dim[2]); - int i = (int)(fac * m->CoM[0]); + int i = (int)(fac * CoM_x); if (i >= N) i = N - 1; - const double dx = fac * m->CoM[0] - i; + const double dx = fac * CoM_x - i; const double tx = 1. - dx; - int j = (int)(fac * m->CoM[1]); + int j = (int)(fac * CoM_y); if (j >= N) j = N - 1; - const double dy = fac * m->CoM[1] - j; + const double dy = fac * CoM_y - j; const double ty = 1. - dy; - int k = (int)(fac * m->CoM[2]); + int k = (int)(fac * CoM_z); if (k >= N) k = N - 1; - const double dz = fac * m->CoM[2] - k; + const double dz = fac * CoM_z - k; const double tz = 1. - dz; #ifdef SWIFT_DEBUG_CHECKS @@ -155,6 +169,7 @@ void runner_do_grav_fft(struct runner* r, int timer) { const double a_smooth = e->gravity_properties->a_smooth; const double box_size = s->dim[0]; const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; TIMER_TIC; @@ -195,7 +210,7 @@ void runner_do_grav_fft(struct runner* r, int timer) { /* Do a CIC mesh assignment of the multipoles */ bzero(rho, N * N * N * sizeof(double)); for (int i = 0; i < nr_cells; ++i) - multipole_to_mesh_CIC(&multipoles[i], rho, N, cell_fac); + multipole_to_mesh_CIC(&multipoles[i], rho, N, cell_fac, dim); /* Fourier transform to go to magic-land */ fftw_execute(forward_plan); @@ -272,7 +287,7 @@ void runner_do_grav_fft(struct runner* r, int timer) { /* Get the potential from the mesh using CIC */ for (int i = 0; i < nr_cells; ++i) - mesh_to_multipole_CIC(&multipoles[i], rho, N, cell_fac); + mesh_to_multipole_CIC(&multipoles[i], rho, N, cell_fac, dim); /* Clean-up the mess */ fftw_destroy_plan(forward_plan); diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 5d4de609eb05511b579c44b117231f541e566a2c..384e7fa8429f6675846e10646fb873e8377b070a 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -58,7 +58,7 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { struct cell *cp = c->progeny[k]; /* Do we have a progenitor with any active g-particles ? */ - if (cp != NULL && cell_is_active(cp, e)) { + if (cp != NULL && cell_is_active_gravity(cp, e)) { #ifdef SWIFT_DEBUG_CHECKS if (cp->ti_old_multipole != e->ti_current) @@ -140,7 +140,7 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(ci, e)) return; + if (!cell_is_active_gravity(ci, e) || ci->nodeID != engine_rank) return; /* Short-cut to the multipole */ const struct multipole *multi_j = &cj->multipole->m_pole; @@ -148,7 +148,8 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, #ifdef SWIFT_DEBUG_CHECKS if (ci == cj) error("Interacting a cell with itself using M2L"); - if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set."); + if (multi_j->num_gpart == 0) + error("Multipole does not seem to have been set."); if (ci->multipole->pot.ti_init != e->ti_current) error("ci->grav tensor not initialised."); @@ -435,7 +436,7 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) return; /* Check that we are not doing something stupid */ if (ci->split || cj->split) error("Running P-P on splitable cells"); @@ -464,8 +465,8 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { space_getsid(s, &ci, &cj, cell_shift); /* Record activity status */ - const int ci_active = cell_is_active(ci, e); - const int cj_active = cell_is_active(cj, e); + const int ci_active = cell_is_active_gravity(ci, e); + const int cj_active = cell_is_active_gravity(cj, e); /* Do we need to drift the multipoles ? */ if (cj_active && ci->ti_old_multipole != e->ti_current) @@ -638,7 +639,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { /* Cell properties */ const int gcount = c->gcount; struct gpart *restrict gparts = c->gparts; - const int c_active = cell_is_active(c, e); + const int c_active = cell_is_active_gravity(c, e); const double loc[3] = {c->loc[0] + 0.5 * c->width[0], c->loc[1] + 0.5 * c->width[1], c->loc[2] + 0.5 * c->width[2]}; @@ -764,7 +765,7 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { /* Cell properties */ const int gcount = c->gcount; struct gpart *restrict gparts = c->gparts; - const int c_active = cell_is_active(c, e); + const int c_active = cell_is_active_gravity(c, e); const double loc[3] = {c->loc[0] + 0.5 * c->width[0], c->loc[1] + 0.5 * c->width[1], c->loc[2] + 0.5 * c->width[2]}; @@ -891,7 +892,7 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { #endif /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_gravity(c, e)) return; /* Check that we are not doing something stupid */ if (c->split) error("Running P-P on a splitable cell"); @@ -934,6 +935,7 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, /* Some constants */ const struct engine *e = r->e; const struct space *s = e->s; + const int nodeID = e->nodeID; const int periodic = s->periodic; const double cell_width = s->width[0]; const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; @@ -942,6 +944,11 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, const double max_distance = props->a_smooth * props->r_cut_max * cell_width; const double max_distance2 = max_distance * max_distance; + /* Anything to do here? */ + if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) || + (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID))) + return; + #ifdef SWIFT_DEBUG_CHECKS const int gcount_i = ci->gcount; @@ -954,17 +961,14 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, /* Sanity check */ if (ci == cj) error("Pair interaction between a cell and itself."); - if (cell_is_active(ci, e) && ci->ti_old_multipole != e->ti_current) + if (cell_is_active_gravity(ci, e) && ci->ti_old_multipole != e->ti_current) error("ci->multipole not drifted."); - if (cell_is_active(cj, e) && cj->ti_old_multipole != e->ti_current) + if (cell_is_active_gravity(cj, e) && cj->ti_old_multipole != e->ti_current) error("cj->multipole not drifted."); #endif TIMER_TIC; - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - /* Recover the multipole information */ struct gravity_tensors *const multi_i = ci->multipole; struct gravity_tensors *const multi_j = cj->multipole; @@ -987,9 +991,9 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, #ifdef SWIFT_DEBUG_CHECKS /* Need to account for the interactions we missed */ - if (cell_is_active(ci, e)) + if (cell_is_active_gravity(ci, e)) multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; - if (cell_is_active(cj, e)) + if (cell_is_active_gravity(cj, e)) multi_j->pot.num_interacted += multi_i->m_pole.num_gpart; #endif return; @@ -1090,7 +1094,7 @@ void runner_doself_grav(struct runner *r, struct cell *c, int gettimer) { TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_gravity(c, e)) return; /* If the cell is split, interact each progeny with itself, and with each of its siblings. */ @@ -1144,11 +1148,14 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { TIMER_TIC; /* Recover the list of top-level cells */ - struct cell *cells = e->s->cells_top; - const int nr_cells = e->s->nr_cells; + struct cell *cells = s->cells_top; + const int nr_cells = s->nr_cells; /* Anything to do here? */ - if (!cell_is_active(ci, e)) return; + if (!cell_is_active_gravity(ci, e)) return; + + if (ci->nodeID != engine_rank) + error("Non-local cell in long-range gravity task!"); /* Check multipole has been drifted */ if (ci->ti_old_multipole != e->ti_current) @@ -1163,14 +1170,14 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { /* Loop over all the top-level cells and go for a M-M interaction if * well-separated */ - for (int i = 0; i < nr_cells; ++i) { + for (int n = 0; n < nr_cells; ++n) { /* Handle on the top-level cell and it's gravity business*/ - struct cell *cj = &cells[i]; + struct cell *cj = &cells[n]; const struct gravity_tensors *const multi_j = cj->multipole; - /* Avoid stupid cases */ - if (ci == cj || cj->gcount == 0) continue; + /* Avoid self contributions */ + if (ci == cj) continue; /* Get the distance between the CoMs at the last rebuild*/ double dx_r = CoM_rebuild_i[0] - multi_j->CoM_rebuild[0]; diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index faf782894898b5784bdeb05669170c08106dc64d..05ad1ba7c1d4f171e993b870f11acbbe8707a3ef 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -535,7 +535,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( TIMER_TIC; /* Anything to do here? */ - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); @@ -992,7 +992,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( TIMER_TIC; - if (!cell_is_active(c, e)) return; + if (!cell_is_active_hydro(c, e)) return; if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); @@ -1200,8 +1200,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); - const int active_ci = cell_is_active(ci, e); - const int active_cj = cell_is_active(cj, e); + const int active_ci = cell_is_active_hydro(ci, e); + const int active_cj = cell_is_active_hydro(cj, e); #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ @@ -1574,8 +1574,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); - const int active_ci = cell_is_active(ci, e); - const int active_cj = cell_is_active(cj, e); + const int active_ci = cell_is_active_hydro(ci, e); + const int active_cj = cell_is_active_hydro(cj, e); #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ diff --git a/src/scheduler.c b/src/scheduler.c index 5af9e9c9e19248838bb98c3ee2305d7dc54324f0..5beab065ffe0c135b2cbb247c531728c5238b865 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -150,7 +150,7 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { /* Write header */ fprintf(f, "digraph task_dep {\n"); - fprintf(f, "label=\"Task dependencies for SWIFT %s\"", git_revision()); + fprintf(f, "label=\"Task dependencies for SWIFT %s\";\n", git_revision()); fprintf(f, "\t compound=true;\n"); fprintf(f, "\t ratio=0.66;\n"); fprintf(f, "\t node[nodesep=0.15];\n"); @@ -217,10 +217,82 @@ void scheduler_write_dependencies(struct scheduler *s, int verbose) { /* Write to the ffile */ fprintf(f, "\t %s->%s;\n", ta_name, tb_name); + + /* Change colour of implicit tasks */ + if (ta->implicit) + fprintf(f, "\t %s [style = filled];\n\t %s [color = lightgrey];\n", + ta_name, ta_name); + if (tb->implicit) + fprintf(f, "\t %s [style = filled];\n\t %s [color = lightgrey];\n", + tb_name, tb_name); } } } + int density_cluster[4] = {0}; + int force_cluster[4] = {0}; + int gravity_cluster[4] = {0}; + + /* Modify the style of some tasks on the plot */ + for (int type = 0; type < task_type_count; ++type) { + + for (int subtype = 0; subtype < task_subtype_count; ++subtype) { + + const int ind = 2 * (type * task_subtype_count + subtype) * max_nber_dep; + + /* Does this task/sub-task exist? */ + if (table[ind] != -1) { + + /* Make MPI tasks a different shape */ + if (type == task_type_send || type == task_type_recv) + fprintf(f, "\t \"%s %s\" [shape = diamond];\n", taskID_names[type], + subtaskID_names[subtype]); + + for (int k = 0; k < 4; ++k) { + if (type == task_type_self + k && subtype == task_subtype_density) + density_cluster[k] = 1; + if (type == task_type_self + k && subtype == task_subtype_force) + force_cluster[k] = 1; + if (type == task_type_self + k && subtype == task_subtype_grav) + gravity_cluster[k] = 1; + } + if (type == task_type_grav_top_level) gravity_cluster[2] = 1; + if (type == task_type_grav_long_range) gravity_cluster[3] = 1; + } + } + } + + /* Make a cluster for the density tasks */ + fprintf(f, "\t subgraph cluster0{\n"); + fprintf(f, "\t\t label=\"\";\n"); + for (int k = 0; k < 4; ++k) + if (density_cluster[k]) + fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k], + subtaskID_names[task_subtype_density]); + fprintf(f, "\t};\n"); + + /* Make a cluster for the force tasks */ + fprintf(f, "\t subgraph cluster1{\n"); + fprintf(f, "\t\t label=\"\";\n"); + for (int k = 0; k < 4; ++k) + if (force_cluster[k]) + fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k], + subtaskID_names[task_subtype_force]); + fprintf(f, "\t};\n"); + + /* Make a cluster for the gravity tasks */ + fprintf(f, "\t subgraph cluster2{\n"); + fprintf(f, "\t\t label=\"\";\n"); + for (int k = 0; k < 2; ++k) + if (gravity_cluster[k]) + fprintf(f, "\t\t \"%s %s\";\n", taskID_names[task_type_self + k], + subtaskID_names[task_subtype_grav]); + if (gravity_cluster[2]) + fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_top_level]); + if (gravity_cluster[3]) + fprintf(f, "\t\t %s;\n", taskID_names[task_type_grav_long_range]); + fprintf(f, "\t};\n"); + /* Be clean */ fprintf(f, "}"); fclose(f); @@ -814,7 +886,8 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements, } else if (t->subtype == task_subtype_grav) { scheduler_splittask_gravity(t, s); } else if (t->type == task_type_grav_top_level || - t->type == task_type_grav_ghost) { + t->type == task_type_grav_ghost_in || + t->type == task_type_grav_ghost_out) { /* For future use */ } else { error("Unexpected task sub-type"); @@ -1113,15 +1186,32 @@ void scheduler_reweight(struct scheduler *s, int verbose) { cost = wscale * intrinsics_popcount(t->flags) * t->ci->count * (sizeof(int) * 8 - intrinsics_clz(t->ci->count)); break; + case task_type_self: - cost = 1 * wscale * t->ci->count * t->ci->count; + if (t->subtype == task_subtype_grav) + cost = 1 * wscale * t->ci->gcount * t->ci->gcount; + else if (t->subtype == task_subtype_external_grav) + cost = 1 * wscale * t->ci->gcount; + else + cost = 1 * wscale * t->ci->count * t->ci->count; break; + case task_type_pair: - if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) - cost = 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; - else - cost = 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; + if (t->subtype == task_subtype_grav) { + if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) + cost = 3 * wscale * t->ci->gcount * t->cj->gcount; + else + cost = 2 * wscale * t->ci->gcount * t->cj->gcount; + } else { + if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) + cost = + 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; + else + cost = + 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; + } break; + case task_type_sub_pair: if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { if (t->flags < 0) @@ -1137,11 +1227,15 @@ void scheduler_reweight(struct scheduler *s, int verbose) { 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; } break; + case task_type_sub_self: cost = 1 * wscale * t->ci->count * t->ci->count; break; case task_type_ghost: - if (t->ci == t->ci->super) cost = wscale * t->ci->count; + if (t->ci == t->ci->super_hydro) cost = wscale * t->ci->count; + break; + case task_type_extra_ghost: + if (t->ci == t->ci->super_hydro) cost = wscale * t->ci->count; break; case task_type_drift_part: cost = wscale * t->ci->count; @@ -1149,14 +1243,23 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_drift_gpart: cost = wscale * t->ci->gcount; break; + case task_type_init_grav: + cost = wscale * t->ci->gcount; + break; + case task_type_grav_down: + cost = wscale * t->ci->gcount; + break; + case task_type_grav_long_range: + cost = wscale * t->ci->gcount; + break; case task_type_kick1: - cost = wscale * t->ci->count; + cost = wscale * t->ci->count + wscale * t->ci->gcount; break; case task_type_kick2: - cost = wscale * t->ci->count; + cost = wscale * t->ci->count + wscale * t->ci->gcount; break; case task_type_timestep: - cost = wscale * t->ci->count; + cost = wscale * t->ci->count + wscale * t->ci->gcount; break; case task_type_send: cost = 10 * wscale * t->ci->count * t->ci->count; @@ -1259,73 +1362,82 @@ void scheduler_start(struct scheduler *s) { scheduler_rewait_mapper(s->tid_active, s->active_count, s); } -/* Check we have not missed an active task */ -#ifdef SWIFT_DEBUG_CHECKS - - const integertime_t ti_current = s->space->e->ti_current; - - if (ti_current > 0) { - - for (int k = 0; k < s->nr_tasks; k++) { - - struct task *t = &s->tasks[k]; - struct cell *ci = t->ci; - struct cell *cj = t->cj; - - if (t->type == task_type_none) continue; - - /* Don't check MPI stuff */ - if (t->type == task_type_send || t->type == task_type_recv) continue; - - /* Don't check the FFT task */ - if (t->type == task_type_grav_top_level || - t->type == task_type_grav_ghost) - continue; - - if (ci == NULL && cj == NULL) { - - error("Task not associated with cells!"); - - } else if (cj == NULL) { /* self */ - - if (ci->ti_end_min == ti_current && t->skip && - t->type != task_type_sort && t->type != task_type_drift_part && - t->type != task_type_drift_gpart) - error( - "Task (type='%s/%s') should not have been skipped " - "ti_current=%lld " - "c->ti_end_min=%lld", - taskID_names[t->type], subtaskID_names[t->subtype], ti_current, - ci->ti_end_min); - - /* Special treatment for sort tasks */ - /* if (ci->ti_end_min == ti_current && t->skip && - t->type == task_type_sort && t->flags == 0) - error( - "Task (type='%s/%s') should not have been skipped " - "ti_current=%lld " - "c->ti_end_min=%lld t->flags=%d", - taskID_names[t->type], subtaskID_names[t->subtype], ti_current, - ci->ti_end_min, t->flags); */ - - } else { /* pair */ - - if (t->skip) { - - /* Check that the pair is active if the local cell is active */ - if ((ci->ti_end_min == ti_current && ci->nodeID == engine_rank) || - (cj->ti_end_min == ti_current && cj->nodeID == engine_rank)) - error( - "Task (type='%s/%s') should not have been skipped " - "ti_current=%lld " - "ci->ti_end_min=%lld cj->ti_end_min=%lld", - taskID_names[t->type], subtaskID_names[t->subtype], ti_current, - ci->ti_end_min, cj->ti_end_min); - } - } - } - } -#endif + /* Check we have not missed an active task */ + /* #ifdef SWIFT_DEBUG_CHECKS */ + + /* const integertime_t ti_current = s->space->e->ti_current; */ + + /* if (ti_current > 0) { */ + + /* for (int k = 0; k < s->nr_tasks; k++) { */ + + /* struct task *t = &s->tasks[k]; */ + /* struct cell *ci = t->ci; */ + /* struct cell *cj = t->cj; */ + + /* if (t->type == task_type_none) continue; */ + + /* /\* Don't check MPI stuff *\/ */ + /* if (t->type == task_type_send || t->type == task_type_recv) continue; + */ + + /* /\* Don't check the FFT task *\/ */ + /* if (t->type == task_type_grav_top_level || */ + /* t->type == task_type_grav_ghost_in || */ + /* t->type == task_type_grav_ghost_out) */ + /* continue; */ + + /* if (ci == NULL && cj == NULL) { */ + + /* error("Task not associated with cells!"); */ + + /* } else if (cj == NULL) { /\* self *\/ */ + + /* if (ci->ti_hydro_end_min == ti_current && t->skip && */ + /* t->type != task_type_sort && t->type != task_type_drift_part && + */ + /* t->type != task_type_drift_gpart) */ + /* error( */ + /* "Task (type='%s/%s') should not have been skipped " */ + /* "ti_current=%lld " */ + /* "c->ti_hydro_end_min=%lld", */ + /* taskID_names[t->type], subtaskID_names[t->subtype], + * ti_current, */ + /* ci->ti_hydro_end_min); */ + + /* /\* Special treatment for sort tasks *\/ */ + /* /\* if (ci->ti_end_min == ti_current && t->skip && */ + /* t->type == task_type_sort && t->flags == 0) */ + /* error( */ + /* "Task (type='%s/%s') should not have been skipped " */ + /* "ti_current=%lld " */ + /* "c->ti_end_min=%lld t->flags=%d", */ + /* taskID_names[t->type], subtaskID_names[t->subtype], + * ti_current, */ + /* ci->ti_end_min, t->flags); *\/ */ + + /* } else { /\* pair *\/ */ + + /* if (t->skip) { */ + + /* /\* Check that the pair is active if the local cell is active *\/ + */ + /* if ((ci->ti_hydro_end_min == ti_current && ci->nodeID == + * engine_rank) || */ + /* (cj->ti_hydro_end_min == ti_current && cj->nodeID == + * engine_rank)) */ + /* error( */ + /* "Task (type='%s/%s') should not have been skipped " */ + /* "ti_current=%lld " */ + /* "ci->ti_hydro_end_min=%lld cj->ti_hydro_end_min=%lld", */ + /* taskID_names[t->type], subtaskID_names[t->subtype], + * ti_current, */ + /* ci->ti_hydro_end_min, cj->ti_hydro_end_min); */ + /* } */ + /* } */ + /* } */ + /* } */ + /* #endif */ /* Loop over the tasks and enqueue whoever is ready. */ if (s->active_count > 1000) { @@ -1381,12 +1493,21 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { switch (t->type) { case task_type_self: case task_type_sub_self: + if (t->subtype == task_subtype_grav) + qid = t->ci->super_gravity->owner; + else + qid = t->ci->super_hydro->owner; + break; case task_type_sort: case task_type_ghost: - case task_type_kick1: - case task_type_kick2: case task_type_drift_part: + qid = t->ci->super_hydro->owner; + break; case task_type_drift_gpart: + qid = t->ci->super_gravity->owner; + break; + case task_type_kick1: + case task_type_kick2: case task_type_timestep: qid = t->ci->super->owner; break; @@ -1419,8 +1540,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { err = MPI_Irecv(t->ci->sparts, t->ci->scount, spart_mpi_type, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); } else if (t->subtype == task_subtype_multipole) { - err = MPI_Irecv(t->ci->multipole, 1, multipole_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + t->buff = malloc(sizeof(struct gravity_tensors) * t->ci->pcell_size); + err = MPI_Irecv( + t->buff, sizeof(struct gravity_tensors) * t->ci->pcell_size, + MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); } else { error("Unknown communication sub-type"); } @@ -1473,13 +1596,11 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { err = MPI_Issend(t->ci->sparts, t->ci->scount, spart_mpi_type, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); } else if (t->subtype == task_subtype_multipole) { - if ((t->ci->scount * sizeof(struct gravity_tensors)) > - s->mpi_message_limit) - err = MPI_Isend(t->ci->multipole, 1, multipole_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); - else - err = MPI_Issend(t->ci->multipole, 1, multipole_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + t->buff = malloc(sizeof(struct gravity_tensors) * t->ci->pcell_size); + cell_pack_multipoles(t->ci, t->buff); + err = MPI_Isend( + t->buff, t->ci->pcell_size * sizeof(struct gravity_tensors), + MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); } else { error("Unknown communication sub-type"); } diff --git a/src/space.c b/src/space.c index b9fe639b195226da3d3168581ebd26f3273d28eb..2c7b64badd317190ccc13fda4ac0e9064b78408d 100644 --- a/src/space.c +++ b/src/space.c @@ -64,6 +64,9 @@ int space_subsize_pair = space_subsize_pair_default; int space_subsize_self = space_subsize_self_default; int space_subsize_self_grav = space_subsize_self_grav_default; int space_maxsize = space_maxsize_default; +#ifdef SWIFT_DEBUG_CHECKS +int last_cell_id; +#endif /** * @brief Interval stack necessary for parallel particle sorting. @@ -225,15 +228,18 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->drift_gpart = NULL; c->cooling = NULL; c->sourceterms = NULL; - c->grav_ghost[0] = NULL; - c->grav_ghost[1] = NULL; + c->grav_ghost_in = NULL; + c->grav_ghost_out = NULL; c->grav_long_range = NULL; c->grav_down = NULL; c->super = c; + c->super_hydro = c; + c->super_gravity = c; c->parts = NULL; c->xparts = NULL; c->gparts = NULL; c->sparts = NULL; + if (s->gravity) bzero(c->multipole, sizeof(struct gravity_tensors)); for (int i = 0; i < 13; i++) if (c->sort[i] != NULL) { free(c->sort[i]); @@ -243,11 +249,13 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->recv_xv = NULL; c->recv_rho = NULL; c->recv_gradient = NULL; + c->recv_grav = NULL; c->recv_ti = NULL; c->send_xv = NULL; c->send_rho = NULL; c->send_gradient = NULL; + c->send_grav = NULL; c->send_ti = NULL; #endif } @@ -457,6 +465,8 @@ void space_regrid(struct space *s, int verbose) { c->gcount = 0; c->scount = 0; c->super = c; + c->super_hydro = c; + c->super_gravity = c; c->ti_old_part = ti_old; c->ti_old_gpart = ti_old; c->ti_old_multipole = ti_old; @@ -1990,7 +2000,10 @@ void space_split_recursive(struct space *s, struct cell *c, const int depth = c->depth; int maxdepth = 0; float h_max = 0.0f; - integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0; + integertime_t ti_hydro_end_min = max_nr_timesteps, ti_hydro_end_max = 0, + ti_hydro_beg_max = 0; + integertime_t ti_gravity_end_min = max_nr_timesteps, ti_gravity_end_max = 0, + ti_gravity_beg_max = 0; struct part *parts = c->parts; struct gpart *gparts = c->gparts; struct spart *sparts = c->sparts; @@ -2079,6 +2092,11 @@ void space_split_recursive(struct space *s, struct cell *c, cp->nodeID = c->nodeID; cp->parent = c; cp->super = NULL; + cp->super_hydro = NULL; + cp->super_gravity = NULL; +#ifdef SWIFT_DEBUG_CHECKS + cp->cellID = last_cell_id++; +#endif } /* Split the cell data. */ @@ -2100,9 +2118,18 @@ void space_split_recursive(struct space *s, struct cell *c, progeny_gbuff += c->progeny[k]->gcount; progeny_sbuff += c->progeny[k]->scount; h_max = max(h_max, c->progeny[k]->h_max); - ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); - ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); - ti_beg_max = max(ti_beg_max, c->progeny[k]->ti_beg_max); + ti_hydro_end_min = + min(ti_hydro_end_min, c->progeny[k]->ti_hydro_end_min); + ti_hydro_end_max = + max(ti_hydro_end_max, c->progeny[k]->ti_hydro_end_max); + ti_hydro_beg_max = + max(ti_hydro_beg_max, c->progeny[k]->ti_hydro_beg_max); + ti_gravity_end_min = + min(ti_gravity_end_min, c->progeny[k]->ti_gravity_end_min); + ti_gravity_end_max = + max(ti_gravity_end_max, c->progeny[k]->ti_gravity_end_max); + ti_gravity_beg_max = + max(ti_gravity_beg_max, c->progeny[k]->ti_gravity_beg_max); if (c->progeny[k]->maxdepth > maxdepth) maxdepth = c->progeny[k]->maxdepth; } @@ -2221,9 +2248,13 @@ void space_split_recursive(struct space *s, struct cell *c, } /* Convert into integer times */ - ti_end_min = get_integer_time_end(e->ti_current, time_bin_min); - ti_end_max = get_integer_time_end(e->ti_current, time_bin_max); - ti_beg_max = get_integer_time_begin(e->ti_current + 1, time_bin_max); + ti_hydro_end_min = get_integer_time_end(e->ti_current, time_bin_min); + ti_hydro_end_max = get_integer_time_end(e->ti_current, time_bin_max); + ti_hydro_beg_max = get_integer_time_begin(e->ti_current + 1, time_bin_max); + ti_gravity_end_min = get_integer_time_end(e->ti_current, time_bin_min); + ti_gravity_end_max = get_integer_time_end(e->ti_current, time_bin_max); + ti_gravity_beg_max = + get_integer_time_begin(e->ti_current + 1, time_bin_max); /* Construct the multipole and the centre of mass*/ if (s->gravity) { @@ -2241,10 +2272,12 @@ void space_split_recursive(struct space *s, struct cell *c, c->multipole->r_max = sqrt(dx * dx + dy * dy + dz * dz); } else { gravity_multipole_init(&c->multipole->m_pole); - c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.; - c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.; - c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; - c->multipole->r_max = 0.; + if (c->nodeID == engine_rank) { + c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.; + c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.; + c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; + c->multipole->r_max = 0.; + } } c->multipole->r_max_rebuild = c->multipole->r_max; c->multipole->CoM_rebuild[0] = c->multipole->CoM[0]; @@ -2255,9 +2288,12 @@ void space_split_recursive(struct space *s, struct cell *c, /* Set the values for this cell. */ c->h_max = h_max; - c->ti_end_min = ti_end_min; - c->ti_end_max = ti_end_max; - c->ti_beg_max = ti_beg_max; + c->ti_hydro_end_min = ti_hydro_end_min; + c->ti_hydro_end_max = ti_hydro_end_max; + c->ti_hydro_beg_max = ti_hydro_beg_max; + c->ti_gravity_end_min = ti_gravity_end_min; + c->ti_gravity_end_max = ti_gravity_end_max; + c->ti_gravity_beg_max = ti_gravity_beg_max; c->maxdepth = maxdepth; /* Set ownership according to the start of the parts array. */ diff --git a/src/task.c b/src/task.c index 43da1d35680783d977ea743dd4f43c52f0f291bc..72beceea303589fd1370e26a65dbecdde4edcb35 100644 --- a/src/task.c +++ b/src/task.c @@ -48,14 +48,13 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", - "pair", "sub_self", "sub_pair", - "init_grav", "ghost", "extra_ghost", - "drift_part", "drift_gpart", "kick1", - "kick2", "timestep", "send", - "recv", "grav_top_level", "grav_long_range", - "grav_ghost", "grav_mm", "grav_down", - "cooling", "sourceterms"}; + "none", "sort", "self", "pair", + "sub_self", "sub_pair", "init_grav", "ghost_in", + "ghost", "ghost_out", "extra_ghost", "drift_part", + "drift_gpart", "kick1", "kick2", "timestep", + "send", "recv", "grav_top_level", "grav_long_range", + "grav_ghost_in", "grav_ghost_out", "grav_mm", "grav_down", + "cooling", "sourceterms"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { diff --git a/src/task.h b/src/task.h index dee888c9f16dd69785a31371da15078af4e0af0c..1b42058f52085053f82a3d875b694a396156635e 100644 --- a/src/task.h +++ b/src/task.h @@ -46,7 +46,9 @@ enum task_types { task_type_sub_self, task_type_sub_pair, task_type_init_grav, + task_type_ghost_in, task_type_ghost, + task_type_ghost_out, task_type_extra_ghost, task_type_drift_part, task_type_drift_gpart, @@ -57,7 +59,8 @@ enum task_types { task_type_recv, task_type_grav_top_level, task_type_grav_long_range, - task_type_grav_ghost, + task_type_grav_ghost_in, + task_type_grav_ghost_out, task_type_grav_mm, task_type_grav_down, task_type_cooling, diff --git a/src/timeline.h b/src/timeline.h index 0f38ff3e9421c122b61caf74290c170b62b2dd92..352056cf340e7bbbce336e03e67a060f172155b1 100644 --- a/src/timeline.h +++ b/src/timeline.h @@ -32,7 +32,7 @@ typedef long long integertime_t; typedef char timebin_t; /*! The number of time bins */ -#define num_time_bins 56 +#define num_time_bins 26 /*! The maximal number of timesteps in a simulation */ #define max_nr_timesteps (1LL << (num_time_bins + 1)) diff --git a/tests/test125cells.c b/tests/test125cells.c index bf1c219fab9927b81bb087684f6ad0d747e958bc..e6bf35cd29270b97dcd771dbedb5f8c26bd8a325 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -354,8 +354,8 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->loc[2] = offset[2]; cell->ti_old_part = 8; - cell->ti_end_min = 8; - cell->ti_end_max = 8; + cell->ti_hydro_end_min = 8; + cell->ti_hydro_end_max = 8; // shuffle_particles(cell->parts, cell->count); diff --git a/tests/test27cells.c b/tests/test27cells.c index f9e3ba1dd78e67a11414c88efe3c604b02b1aa16..1952103634104d89b955ed1c47b0836d894ef533 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -182,8 +182,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[2] = offset[2]; cell->ti_old_part = 8; - cell->ti_end_min = 8; - cell->ti_end_max = 8; + cell->ti_hydro_end_min = 8; + cell->ti_hydro_end_max = 8; shuffle_particles(cell->parts, cell->count); diff --git a/tests/testActivePair.c b/tests/testActivePair.c index fd2244db971f6a5f7cfeb62b9661c33b7ee88145..e9367db302a32130df84e4381d1ab255681504de 100644 --- a/tests/testActivePair.c +++ b/tests/testActivePair.c @@ -141,8 +141,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[2] = offset[2]; cell->ti_old_part = 8; - cell->ti_end_min = 8; - cell->ti_end_max = 10; + cell->ti_hydro_end_min = 8; + cell->ti_hydro_end_max = 10; shuffle_particles(cell->parts, cell->count); diff --git a/tests/testDump.c b/tests/testDump.c index b1406ba5f5e6652c6e2727d6b161215f4e905d62..7d193ac32f4069dced2ddb2d89db91328f3ff223 100644 --- a/tests/testDump.c +++ b/tests/testDump.c @@ -40,7 +40,8 @@ void dump_mapper(void *map_data, int num_elements, void *extra_data) { size_t offset; char *out_string = dump_get(d, 7, &offset); char out_buff[8]; - snprintf(out_buff, 8, "%06zi\n", offset / 7); + /* modulo due to bug in gcc, should be removed */ + snprintf(out_buff, 8, "%06zi\n", (offset / 7) % 1000000); memcpy(out_string, out_buff, 7); } diff --git a/tests/testInteractions.c b/tests/testInteractions.c index 864cdd6aaf8c6cdaed6c4d6462cdaed3f6ab1a5b..71d5728f6721b5817e17b74b23415561ba39c21a 100644 --- a/tests/testInteractions.c +++ b/tests/testInteractions.c @@ -673,6 +673,7 @@ int main(int argc, char *argv[]) { break; case 's': sscanf(optarg, "%lf", &spacing); + break; case 'n': sscanf(optarg, "%zu", &count); break; diff --git a/tests/testPeriodicBC.c b/tests/testPeriodicBC.c index ded767c5d657c0a05dc227d1ee8facbfa5f4de63..f3687eac6be87cd60c12404c19916c8101c42dc1 100644 --- a/tests/testPeriodicBC.c +++ b/tests/testPeriodicBC.c @@ -171,8 +171,8 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->loc[2] = offset[2]; cell->ti_old_part = 8; - cell->ti_end_min = 8; - cell->ti_end_max = 8; + cell->ti_hydro_end_min = 8; + cell->ti_hydro_end_max = 8; shuffle_particles(cell->parts, cell->count); diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index e890c7c1a834ec7ca13ed2e8a509b7ea42db28fd..08d6abaa7521de2a7d12fd9672db0d24a5a20a97 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -77,8 +77,8 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { cell->width[1] = cellSize; cell->width[2] = cellSize; - cell->ti_end_min = 1; - cell->ti_end_max = 1; + cell->ti_hydro_end_min = 1; + cell->ti_hydro_end_max = 1; cell->sorted = 0; for (int k = 0; k < 13; k++) cell->sort[k] = NULL;