diff --git a/examples/main.c b/examples/main.c index 690cbba113e0f90037b57d80bc4f3245559efa03..37d9ff3346d94f8a2588f5bdfabf7406be557d26 100644 --- a/examples/main.c +++ b/examples/main.c @@ -798,8 +798,8 @@ int main(int argc, char *argv[]) { /* Initialize the engine with the space and policies. */ if (myrank == 0) clocks_gettime(&tic); - engine_init(&e, &s, params, N_total[0], N_total[1], engine_policies, - talking, &reparttype, &us, &prog_const, &cosmo, + engine_init(&e, &s, params, N_total[0], N_total[1], N_total[2], + engine_policies, talking, &reparttype, &us, &prog_const, &cosmo, &hydro_properties, &gravity_properties, &potential, &cooling_func, &chemistry, &sourceterms); engine_config(0, &e, params, nr_nodes, myrank, nr_threads, with_aff, diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index c7dd4846add74f13f53f293a1cc5ed1b871340c8..363dfe5fac67cfae3455bce30fbac24a317eae71 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -59,11 +59,12 @@ Scheduler: # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) TimeIntegration: - time_begin: 0. # The starting time of the simulation (in internal units). - time_end: 1. # The end time of the simulation (in internal units). - dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). - dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). - + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-2 # The maximal time-step size of the simulation (in internal units). + max_dt_RMS_factor: 0.25 # (Optional) Dimensionless factor for the maximal displacement allowed based on the RMS velocities. + # Parameters governing the snapshots Snapshots: basename: output # Common part of the name of output files diff --git a/src/atomic.h b/src/atomic.h index 36e336b721184678677e69c9ea5001460eb6efa1..6af25c718055320b7b34ed9cee2786da070e97d1 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -36,47 +36,86 @@ /** * @brief Atomic min operation on floats. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. */ -__attribute__((always_inline)) INLINE void atomic_min_f(volatile float* x, - float y) { - float test, new; - float old = *x; +__attribute__((always_inline)) INLINE static void atomic_min_f( + volatile float* address, float y) { + + int* int_ptr = (int*)address; + + typedef union { + float as_float; + int as_int; + } cast_type; + + cast_type assumed, old, new; + old.as_float = *address; + do { - test = old; - new = min(old, y); - if (new == old) return; - old = atomic_cas((int*)x, test, new); - } while (test != old); + assumed.as_int = old.as_int; + new.as_float = min(old.as_float, y); + old.as_int = atomic_cas(int_ptr, assumed.as_int, new.as_int); + } while (assumed.as_int != old.as_int); } /** * @brief Atomic max operation on floats. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. */ -__attribute__((always_inline)) INLINE void atomic_max_f(volatile float* x, - float y) { - float test, new; - float old = *x; +__attribute__((always_inline)) INLINE static void atomic_max_f( + volatile float* address, float y) { + + int* int_ptr = (int*)address; + + typedef union { + float as_float; + int as_int; + } cast_type; + + cast_type assumed, old, new; + old.as_float = *address; + do { - test = old; - new = max(old, y); - if (new == old) return; - old = atomic_cas((int*)x, test, new); - } while (test != old); + assumed.as_int = old.as_int; + new.as_float = max(old.as_float, y); + old.as_int = atomic_cas(int_ptr, assumed.as_int, new.as_int); + } while (assumed.as_int != old.as_int); } /** * @brief Atomic add operation on floats. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. */ -__attribute__((always_inline)) INLINE void atomic_add_f(volatile float* x, - float y) { - float test, new; - float old = *x; +__attribute__((always_inline)) INLINE static void atomic_add_f( + volatile float* address, float y) { + + int* int_ptr = (int*)address; + + typedef union { + float as_float; + int as_int; + } cast_type; + + cast_type assumed, old, new; + old.as_float = *address; + do { - test = old; - new = old + y; - if (new == old) return; - old = atomic_cas((int*)x, test, new); - } while (test != old); + assumed.as_int = old.as_int; + new.as_float = old.as_float + y; + old.as_int = atomic_cas(int_ptr, assumed.as_int, new.as_int); + } while (assumed.as_int != old.as_int); } #endif /* SWIFT_ATOMIC_H */ diff --git a/src/engine.c b/src/engine.c index bb9a478b4933106c3753a31388bdf78117366147..5dafce3ce1af480895eb9fa7d7a44e35bc18b547 100644 --- a/src/engine.c +++ b/src/engine.c @@ -5361,7 +5361,8 @@ void engine_unpin() { * @param s The #space in which this #runner will run. * @param params The parsed parameter file. * @param Ngas total number of gas particles in the simulation. - * @param Ndm total number of gravity particles in the simulation. + * @param Ngparts total number of gravity particles in the simulation. + * @param Nstars total number of star particles in the simulation. * @param policy The queuing policy to use. * @param verbose Is this #engine talkative ? * @param reparttype What type of repartition algorithm are we using ? @@ -5377,7 +5378,7 @@ void engine_unpin() { */ void engine_init(struct engine *e, struct space *s, const struct swift_params *params, long long Ngas, - long long Ndm, int policy, int verbose, + long long Ngparts, long long Nstars, int policy, int verbose, struct repartition *reparttype, const struct unit_system *internal_units, const struct phys_const *physical_constants, @@ -5396,7 +5397,8 @@ void engine_init(struct engine *e, struct space *s, e->policy = policy; e->step = 0; e->total_nr_parts = Ngas; - e->total_nr_gparts = Ndm; + e->total_nr_gparts = Ngparts; + e->total_nr_sparts = Nstars; e->proxy_ind = NULL; e->nr_proxies = 0; e->reparttype = reparttype; @@ -5427,7 +5429,7 @@ void engine_init(struct engine *e, struct space *s, e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max"); e->dt_max_RMS_displacement = FLT_MAX; e->max_RMS_displacement_factor = parser_get_opt_param_double( - params, "TimeIntegration:max_dt_RMS_factor", 1.); + params, "TimeIntegration:max_dt_RMS_factor", 0.25); e->a_first_statistics = parser_get_opt_param_double(params, "Statistics:scale_factor_first", 0.1); e->time_first_statistics = @@ -6136,7 +6138,107 @@ void engine_compute_next_statistics_time(struct engine *e) { */ void engine_recompute_displacement_constraint(struct engine *e) { - message("max_dt_RMS_displacement = %e", e->dt_max_RMS_displacement); + /* Get the cosmological information */ + const struct cosmology *cosmo = e->cosmology; + const float Om = cosmo->Omega_m; + const float Ob = cosmo->Omega_b; + const float rho_crit = cosmo->critical_density; + const float a = cosmo->a; + + /* Start by reducing the minimal mass of each particle type */ + float min_mass[swift_type_count] = {e->s->min_part_mass, + e->s->min_gpart_mass, + FLT_MAX, + FLT_MAX, + e->s->min_spart_mass, + FLT_MAX}; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that the minimal mass collection worked */ + float min_part_mass_check = FLT_MAX; + for (size_t i = 0; i < e->s->nr_parts; ++i) + min_part_mass_check = + min(min_part_mass_check, hydro_get_mass(&e->s->parts[i])); + if (min_part_mass_check != min_mass[swift_type_gas]) + error("Error collecting minimal mass of gas particles."); +#endif + +#ifdef WITH_MPI + MPI_Allreduce(MPI_IN_PLACE, min_mass, swift_type_count, MPI_FLOAT, MPI_MIN, + MPI_COMM_WORLD); +#endif + + /* Do the same for the velocity norm sum */ + float vel_norm[swift_type_count] = {e->s->sum_part_vel_norm, + e->s->sum_gpart_vel_norm, + 0.f, + 0.f, + e->s->sum_spart_vel_norm, + 0.f}; +#ifdef WITH_MPI + MPI_Allreduce(MPI_IN_PLACE, vel_norm, swift_type_count, MPI_FLOAT, MPI_SUM, + MPI_COMM_WORLD); +#endif + + /* Get the counts of each particle types */ + const long long total_nr_dm_gparts = + e->total_nr_gparts - e->total_nr_parts - e->total_nr_sparts; + float count_parts[swift_type_count] = { + e->total_nr_parts, total_nr_dm_gparts, 0.f, 0.f, e->total_nr_sparts, 0.f}; + + /* Count of particles for the two species */ + const float N_dm = count_parts[1]; + const float N_b = count_parts[0] + count_parts[4]; + + /* Peculiar motion norm for the two species */ + const float vel_norm_dm = vel_norm[1]; + const float vel_norm_b = vel_norm[0] + vel_norm[4]; + + /* Mesh forces smoothing scale */ + const float a_smooth = + e->gravity_properties->a_smooth * e->s->dim[0] / e->s->cdim[0]; + + float dt_dm = FLT_MAX, dt_b = FLT_MAX; + + /* DM case */ + if (N_dm > 0.f) { + + /* Minimal mass for the DM */ + const float min_mass_dm = min_mass[1]; + + /* Inter-particle sepration for the DM */ + const float d_dm = cbrtf(min_mass_dm / ((Om - Ob) * rho_crit)); + + /* RMS peculiar motion for the DM */ + const float rms_vel_dm = vel_norm_dm / N_dm; + + /* Time-step based on maximum displacement */ + dt_dm = a * a * min(a_smooth, d_dm) / sqrtf(rms_vel_dm); + } + + /* Baryon case */ + if (N_b > 0.f) { + + /* Minimal mass for the baryons */ + const float min_mass_b = min(min_mass[0], min_mass[4]); + + /* Inter-particle sepration for the baryons */ + const float d_b = cbrtf(min_mass_b / (Ob * rho_crit)); + + /* RMS peculiar motion for the baryons */ + const float rms_vel_b = vel_norm_b / N_b; + + /* Time-step based on maximum displacement */ + dt_b = a * a * min(a_smooth, d_b) / sqrtf(rms_vel_b); + } + + /* Use the minimum */ + const float dt = min(dt_dm, dt_b); + + /* Apply the dimensionless factor */ + e->dt_max_RMS_displacement = dt * e->max_RMS_displacement_factor; + + if (e->verbose) + message("max_dt_RMS_displacement = %e", e->dt_max_RMS_displacement); } /** diff --git a/src/engine.h b/src/engine.h index a6707e0426d485a61464c440915c94d6e74a580b..db3db3158748414fb9b048feabcc2695ae0873a5 100644 --- a/src/engine.h +++ b/src/engine.h @@ -192,7 +192,7 @@ struct engine { int step_props; /* Total numbers of particles in the system. */ - long long total_nr_parts, total_nr_gparts; + long long total_nr_parts, total_nr_gparts, total_nr_sparts; /* The internal system of units */ const struct unit_system *internal_units; @@ -350,7 +350,7 @@ void engine_print_stats(struct engine *e); void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, long long Ngas, - long long Ndm, int policy, int verbose, + long long Ngparts, long long Nstars, int policy, int verbose, struct repartition *reparttype, const struct unit_system *internal_units, const struct phys_const *physical_constants, diff --git a/src/space.c b/src/space.c index f0c5d4c8b91e329e9b593266a4cec9e678798af5..5dbbfd22913aae68fc5598c0235c31c02a532259 100644 --- a/src/space.c +++ b/src/space.c @@ -1087,8 +1087,9 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, if (cell_counts == NULL) error("Failed to allocate temporary cell count buffer."); - /* Init the local minimal part mass */ + /* Init the local collectors */ float min_mass = FLT_MAX; + float sum_vel_norm = 0.f; /* Loop over the parts. */ for (int k = 0; k < nr_parts; k++) { @@ -1125,6 +1126,9 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, /* Compute minimal mass */ min_mass = min(min_mass, hydro_get_mass(p)); + /* Compute sum of velocity norm */ + sum_vel_norm += p->v[0] * p->v[0] + p->v[1] * p->v[1] + p->v[2] * p->v[2]; + /* Update the position */ p->x[0] = pos_x; p->x[1] = pos_y; @@ -1136,8 +1140,9 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); free(cell_counts); - /* Write back thee minimal part mass */ + /* Write back the minimal part mass and velocity sum */ atomic_min_f(&s->min_part_mass, min_mass); + atomic_add_f(&s->sum_part_vel_norm, sum_vel_norm); } /** @@ -1170,8 +1175,9 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, if (cell_counts == NULL) error("Failed to allocate temporary cell count buffer."); - /* Init the local minimal part mass */ + /* Init the local collectors */ float min_mass = FLT_MAX; + float sum_vel_norm = 0.f; for (int k = 0; k < nr_gparts; k++) { @@ -1205,7 +1211,12 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, #endif /* Compute minimal mass */ - if (gp->type == swift_type_dark_matter) min_mass = min(min_mass, gp->mass); + if (gp->type == swift_type_dark_matter) { + min_mass = min(min_mass, gp->mass); + sum_vel_norm += gp->v_full[0] * gp->v_full[0] + + gp->v_full[1] * gp->v_full[1] + + gp->v_full[2] * gp->v_full[2]; + } /* Update the position */ gp->x[0] = pos_x; @@ -1218,8 +1229,9 @@ void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts, if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); free(cell_counts); - /* Write back thee minimal part mass */ + /* Write back the minimal part mass and velocity sum */ atomic_min_f(&s->min_gpart_mass, min_mass); + atomic_add_f(&s->sum_gpart_vel_norm, sum_vel_norm); } /** @@ -1252,8 +1264,9 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, if (cell_counts == NULL) error("Failed to allocate temporary cell count buffer."); - /* Init the local minimal part mass */ + /* Init the local collectors */ float min_mass = FLT_MAX; + float sum_vel_norm = 0.f; for (int k = 0; k < nr_sparts; k++) { @@ -1289,6 +1302,10 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, /* Compute minimal mass */ min_mass = min(min_mass, sp->mass); + /* Compute sum of velocity norm */ + sum_vel_norm += + sp->v[0] * sp->v[0] + sp->v[1] * sp->v[1] + sp->v[2] * sp->v[2]; + /* Update the position */ sp->x[0] = pos_x; sp->x[1] = pos_y; @@ -1300,8 +1317,9 @@ void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts, if (cell_counts[k]) atomic_add(&data->cell_counts[k], cell_counts[k]); free(cell_counts); - /* Write back thee minimal part mass */ + /* Write back the minimal part mass and velocity sum */ atomic_min_f(&s->min_spart_mass, min_mass); + atomic_add_f(&s->sum_spart_vel_norm, sum_vel_norm); } /** @@ -1320,8 +1338,9 @@ void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, const ticks tic = getticks(); - /* Re-set the minimal mass */ + /* Re-set the counters */ s->min_part_mass = FLT_MAX; + s->sum_part_vel_norm = 0.f; /* Pack the extra information */ struct index_data data; @@ -1354,8 +1373,9 @@ void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts, const ticks tic = getticks(); - /* Re-set the minimal mass */ + /* Re-set the counters */ s->min_gpart_mass = FLT_MAX; + s->sum_gpart_vel_norm = 0.f; /* Pack the extra information */ struct index_data data; @@ -1388,8 +1408,9 @@ void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, const ticks tic = getticks(); - /* Re-set the minimal mass */ + /* Re-set the counters */ s->min_spart_mass = FLT_MAX; + s->sum_spart_vel_norm = 0.f; /* Pack the extra information */ struct index_data data; @@ -2685,6 +2706,12 @@ void space_init(struct space *s, const struct swift_params *params, s->nr_sparts = Nspart; s->size_sparts = Nspart; s->sparts = sparts; + s->min_part_mass = FLT_MAX; + s->min_gpart_mass = FLT_MAX; + s->min_spart_mass = FLT_MAX; + s->sum_part_vel_norm = 0.f; + s->sum_gpart_vel_norm = 0.f; + s->sum_spart_vel_norm = 0.f; s->nr_queues = 1; /* Temporary value until engine construction */ /* Are we generating gas from the DM-only ICs? */ diff --git a/src/space.h b/src/space.h index 2f17854e38cf26c24fb463bddb25d995ec144b69..df9cb2ca461e0ba2e917c25ec3c336ba884708da 100644 --- a/src/space.h +++ b/src/space.h @@ -155,6 +155,15 @@ struct space { /*! Minimal mass of all the #spart */ float min_spart_mass; + /*! Sum of the norm of the velocity of all the #part */ + float sum_part_vel_norm; + + /*! Sum of the norm of the velocity of all the dark-matter #gpart */ + float sum_gpart_vel_norm; + + /*! Sum of the norm of the velocity of all the #spart */ + float sum_spart_vel_norm; + /*! General-purpose lock for this space. */ swift_lock_type lock;