diff --git a/src/cache.h b/src/cache.h index 3eb1e194dd4232319ac1d4a4323ca8099f044063..c939da28589c1421e0e4241dca124a8320d5c87b 100644 --- a/src/cache.h +++ b/src/cache.h @@ -349,10 +349,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]); z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]); h[i] = parts_i[idx].h; - m[i] = parts_i[idx].mass; vx[i] = parts_i[idx].v[0]; vy[i] = parts_i[idx].v[1]; vz[i] = parts_i[idx].v[2]; +#ifdef GADGET2_SPH + m[i] = parts_i[idx].mass; +#endif } #ifdef SWIFT_DEBUG_CHECKS @@ -431,10 +433,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]); zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]); hj[i] = parts_j[idx].h; - mj[i] = parts_j[idx].mass; vxj[i] = parts_j[idx].v[0]; vyj[i] = parts_j[idx].v[1]; vzj[i] = parts_j[idx].v[2]; +#ifdef GADGET2_SPH + mj[i] = parts_j[idx].mass; +#endif } #ifdef SWIFT_DEBUG_CHECKS @@ -572,15 +576,17 @@ cache_read_two_partial_cells_sorted_force( y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]); z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]); h[i] = parts_i[idx].h; - m[i] = parts_i[idx].mass; vx[i] = parts_i[idx].v[0]; vy[i] = parts_i[idx].v[1]; vz[i] = parts_i[idx].v[2]; +#ifdef GADGET2_SPH + m[i] = parts_i[idx].mass; rho[i] = parts_i[idx].rho; grad_h[i] = parts_i[idx].force.f; pOrho2[i] = parts_i[idx].force.P_over_rho2; balsara[i] = parts_i[idx].force.balsara; soundspeed[i] = parts_i[idx].force.soundspeed; +#endif } /* Pad cache with fake particles that exist outside the cell so will not @@ -635,15 +641,17 @@ cache_read_two_partial_cells_sorted_force( yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]); zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]); hj[i] = parts_j[idx].h; - mj[i] = parts_j[idx].mass; vxj[i] = parts_j[idx].v[0]; vyj[i] = parts_j[idx].v[1]; vzj[i] = parts_j[idx].v[2]; +#ifdef GADGET2_SPH + mj[i] = parts_j[idx].mass; rhoj[i] = parts_j[idx].rho; grad_hj[i] = parts_j[idx].force.f; pOrho2j[i] = parts_j[idx].force.P_over_rho2; balsaraj[i] = parts_j[idx].force.balsara; soundspeedj[i] = parts_j[idx].force.soundspeed; +#endif } /* Pad cache with fake particles that exist outside the cell so will not diff --git a/src/common_io.c b/src/common_io.c index 917f2a63755f88c300ed7ff654bfe6e8cf3c464f..47e04b868b9d610983b977bd6455c1c78d70bb41 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -40,10 +40,13 @@ #include "common_io.h" /* Local includes. */ +#include "engine.h" #include "error.h" #include "hydro.h" +#include "io_properties.h" #include "kernel_hydro.h" #include "part.h" +#include "threadpool.h" #include "units.h" #include "version.h" @@ -266,7 +269,6 @@ void io_write_attribute_f(hid_t grp, const char* name, float data) { * @param name The name of the attribute * @param data The value to write */ - void io_write_attribute_i(hid_t grp, const char* name, int data) { io_write_attribute(grp, name, INT, &data, 1); } @@ -295,16 +297,19 @@ void io_write_attribute_s(hid_t grp, const char* name, const char* str) { * @brief Reads the Unit System from an IC file. * @param h_file The (opened) HDF5 file from which to read. * @param us The unit_system to fill. + * @param mpi_rank The MPI rank we are on. * * If the 'Units' group does not exist in the ICs, cgs units will be assumed */ -void io_read_unit_system(hid_t h_file, struct unit_system* us) { +void io_read_unit_system(hid_t h_file, struct unit_system* us, int mpi_rank) { /* First check if it exists as this is *not* required. */ const htri_t exists = H5Lexists(h_file, "/Units", H5P_DEFAULT); if (exists == 0) { - message("'Units' group not found in ICs. Assuming CGS unit system."); + + if (mpi_rank == 0) + message("'Units' group not found in ICs. Assuming CGS unit system."); /* Default to CGS */ us->UnitMass_in_cgs = 1.; @@ -319,7 +324,7 @@ void io_read_unit_system(hid_t h_file, struct unit_system* us) { exists); } - message("Reading IC units from ICs."); + if (mpi_rank == 0) message("Reading IC units from ICs."); hid_t h_grp = H5Gopen(h_file, "/Units", H5P_DEFAULT); /* Ok, Read the damn thing */ @@ -401,6 +406,208 @@ void io_write_code_description(hid_t h_file) { H5Gclose(h_grpcode); } +/** + * @brief Mapper function to copy #part or #gpart fields into a buffer. + */ +void io_copy_mapper(void* restrict temp, int N, void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const size_t typeSize = io_sizeof_type(props.type); + const size_t copySize = typeSize * props.dimension; + + /* How far are we with this chunk? */ + char* restrict temp_c = temp; + const ptrdiff_t delta = (temp_c - props.start_temp_c) / copySize; + + for (int k = 0; k < N; k++) { + memcpy(&temp_c[k * copySize], props.field + (delta + k) * props.partSize, + copySize); + } +} + +/** + * @brief Mapper function to copy #part into a buffer of floats using a + * conversion function. + */ +void io_convert_part_f_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct part* restrict parts = props.parts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + float* restrict temp_f = temp; + const ptrdiff_t delta = (temp_f - props.start_temp_f) / dim; + + for (int i = 0; i < N; i++) + props.convert_part_f(e, parts + delta + i, &temp_f[i * dim]); +} + +/** + * @brief Mapper function to copy #part into a buffer of doubles using a + * conversion function. + */ +void io_convert_part_d_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct part* restrict parts = props.parts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + double* restrict temp_d = temp; + const ptrdiff_t delta = (temp_d - props.start_temp_d) / dim; + + for (int i = 0; i < N; i++) + props.convert_part_d(e, parts + delta + i, &temp_d[i * dim]); +} + +/** + * @brief Mapper function to copy #gpart into a buffer of floats using a + * conversion function. + */ +void io_convert_gpart_f_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct gpart* restrict gparts = props.gparts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + float* restrict temp_f = temp; + const ptrdiff_t delta = (temp_f - props.start_temp_f) / dim; + + for (int i = 0; i < N; i++) + props.convert_gpart_f(e, gparts + delta + i, &temp_f[i * dim]); +} + +/** + * @brief Mapper function to copy #gpart into a buffer of doubles using a + * conversion function. + */ +void io_convert_gpart_d_mapper(void* restrict temp, int N, + void* restrict extra_data) { + + const struct io_props props = *((const struct io_props*)extra_data); + const struct gpart* restrict gparts = props.gparts; + const struct engine* e = props.e; + const size_t dim = props.dimension; + + /* How far are we with this chunk? */ + double* restrict temp_d = temp; + const ptrdiff_t delta = (temp_d - props.start_temp_d) / dim; + + for (int i = 0; i < N; i++) + props.convert_gpart_d(e, gparts + delta + i, &temp_d[i * dim]); +} + +/** + * @brief Copy the particle data into a temporary buffer ready for i/o. + * + * @param temp The buffer to be filled. Must be allocated and aligned properly. + * @param e The #engine. + * @param props The #io_props corresponding to the particle field we are + * copying. + * @param N The number of particles to copy + * @param internal_units The system of units used internally. + * @param snapshot_units The system of units used for the snapshots. + */ +void io_copy_temp_buffer(void* temp, const struct engine* e, + struct io_props props, size_t N, + const struct unit_system* internal_units, + const struct unit_system* snapshot_units) { + + const size_t typeSize = io_sizeof_type(props.type); + const size_t copySize = typeSize * props.dimension; + const size_t num_elements = N * props.dimension; + + /* Copy particle data to temporary buffer */ + if (props.conversion == 0) { /* No conversion */ + + /* Prepare some parameters */ + char* temp_c = temp; + props.start_temp_c = temp_c; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, io_copy_mapper, temp_c, + N, copySize, 0, (void*)&props); + + } else { /* Converting particle to data */ + + if (props.convert_part_f != NULL) { + + /* Prepare some parameters */ + float* temp_f = temp; + props.start_temp_f = temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_part_f_mapper, temp_f, N, copySize, 0, + (void*)&props); + + } else if (props.convert_part_d != NULL) { + + /* Prepare some parameters */ + double* temp_d = temp; + props.start_temp_d = temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_part_d_mapper, temp_d, N, copySize, 0, + (void*)&props); + + } else if (props.convert_gpart_f != NULL) { + + /* Prepare some parameters */ + float* temp_f = temp; + props.start_temp_f = temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_gpart_f_mapper, temp_f, N, copySize, 0, + (void*)&props); + + } else if (props.convert_gpart_d != NULL) { + + /* Prepare some parameters */ + double* temp_d = temp; + props.start_temp_d = temp; + props.e = e; + + /* Copy the whole thing into a buffer */ + threadpool_map((struct threadpool*)&e->threadpool, + io_convert_gpart_d_mapper, temp_d, N, copySize, 0, + (void*)&props); + + } else { + error("Missing conversion function"); + } + } + + /* Unit conversion if necessary */ + const double factor = + units_conversion_factor(internal_units, snapshot_units, props.units); + if (factor != 1.) { + + /* message("Converting ! factor=%e", factor); */ + + if (io_is_double_precision(props.type)) { + swift_declare_aligned_ptr(double, temp_d, temp, IO_BUFFER_ALIGNMENT); + for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor; + } else { + swift_declare_aligned_ptr(float, temp_f, temp, IO_BUFFER_ALIGNMENT); + for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor; + } + } +} + #endif /* HAVE_HDF5 */ /* ------------------------------------------------------------------------------------------------ diff --git a/src/common_io.h b/src/common_io.h index 577d1664db20196c8d0c48ef5b5e4e960b0e195e..2e6d4247a5e2d0f558f6bfa601fd6c57d80e05d9 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -30,6 +30,11 @@ #define FIELD_BUFFER_SIZE 200 #define PARTICLE_GROUP_BUFFER_SIZE 50 #define FILENAME_BUFFER_SIZE 150 +#define IO_BUFFER_ALIGNMENT 1024 + +/* Avoid cyclic inclusion problems */ +struct io_props; +struct engine; #if defined(HAVE_HDF5) @@ -68,10 +73,15 @@ void io_write_attribute_s(hid_t grp, const char* name, const char* str); void io_write_code_description(hid_t h_file); -void io_read_unit_system(hid_t h_file, struct unit_system* us); +void io_read_unit_system(hid_t h_file, struct unit_system* us, int mpi_rank); void io_write_unit_system(hid_t h_grp, const struct unit_system* us, const char* groupName); +void io_copy_temp_buffer(void* temp, const struct engine* e, + const struct io_props props, size_t N, + const struct unit_system* internal_units, + const struct unit_system* snapshot_units); + #endif /* defined HDF5 */ void io_collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, diff --git a/src/engine.c b/src/engine.c index 76a2381312b1e0db27f389ce2441bc57a8398aac..009779111703511ee2c1617013fb21b8e3f3553c 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2624,9 +2624,8 @@ void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, /* Self-interaction? */ else if (t->type == task_type_self && t->subtype == task_subtype_density) { - /* Make all density tasks depend on the drift and the sorts. */ + /* Make the self-density tasks depend on the drift only. */ scheduler_addunlock(sched, t->ci->super->drift_part, t); - scheduler_addunlock(sched, t->ci->super->sorts, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop. */ @@ -5004,7 +5003,7 @@ void engine_init(struct engine *e, struct space *s, if (with_aff) engine_unpin(); #endif - if (with_aff) { + if (with_aff && nodeID == 0) { #ifdef HAVE_SETAFFINITY #ifdef WITH_MPI printf("[%04i] %s engine_init: cpu map is [ ", nodeID, diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h index 26eed8a0a680e13130dd257d8b31e6cd00392f6d..671ccc24e95f322d71a734c0a39eface2a93bcec 100644 --- a/src/gravity/Default/gravity_io.h +++ b/src/gravity/Default/gravity_io.h @@ -21,6 +21,20 @@ #include "io_properties.h" +void convert_gpart_pos(const struct engine* e, const struct gpart* gp, + double* ret) { + + if (e->s->periodic) { + ret[0] = box_wrap(gp->x[0], 0.0, e->s->dim[0]); + ret[1] = box_wrap(gp->x[1], 0.0, e->s->dim[1]); + ret[2] = box_wrap(gp->x[2], 0.0, e->s->dim[2]); + } else { + ret[0] = gp->x[0]; + ret[1] = gp->x[1]; + ret[2] = gp->x[2]; + } +} + /** * @brief Specifies which g-particle fields to read from a dataset * @@ -52,15 +66,15 @@ void darkmatter_read_particles(struct gpart* gparts, struct io_props* list, * @param list The list of i/o properties to write. * @param num_fields The number of i/o fields to write. */ -void darkmatter_write_particles(struct gpart* gparts, struct io_props* list, - int* num_fields) { +void darkmatter_write_particles(const struct gpart* gparts, + struct io_props* list, int* num_fields) { /* Say how much we want to read */ *num_fields = 5; /* List what we want to read */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - gparts, x); + list[0] = io_make_output_field_convert_gpart( + "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, gparts, convert_gpart_pos); list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, gparts, v_full); list[2] = diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h index 62e94b05ffea259aac99d4b3714e0eea7e7c955f..8c12f015bd69dc917a80c564422676a85ffcfb3b 100644 --- a/src/hydro/Default/hydro_io.h +++ b/src/hydro/Default/hydro_io.h @@ -53,6 +53,20 @@ void hydro_read_particles(struct part* parts, struct io_props* list, UNIT_CONV_DENSITY, parts, rho); } +void convert_part_pos(const struct engine* e, const struct part* p, + double* ret) { + + if (e->s->periodic) { + ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]); + ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]); + ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]); + } else { + ret[0] = p->x[0]; + ret[1] = p->x[1]; + ret[2] = p->x[2]; + } +} + /** * @brief Specifies which particle fields to write to a dataset * @@ -66,8 +80,8 @@ void hydro_write_particles(struct part* parts, struct io_props* list, *num_fields = 8; /* List what we want to write */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - parts, x); + list[0] = io_make_output_field_convert_part( + "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos); list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v); list[2] = diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 8b14c45614e4c09c48056c9398ed4bfe3ed90e38..5d2f76ab298cdd6199d95abe1e4435321302689e 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -125,6 +125,16 @@ struct part { /* Particle time-bin */ timebin_t time_bin; +#ifdef SWIFT_DEBUG_CHECKS + + /* Time of the last drift */ + integertime_t ti_drift; + + /* Time of the last kick */ + integertime_t ti_kick; + +#endif + } SWIFT_STRUCT_ALIGN; #endif /* SWIFT_DEFAULT_HYDRO_PART_H */ diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index 3e46b2351eb6a3871946dd9c69c8d108b10da0df..d89fe5c277b0273e53304d77a241e14d2c71fc5d 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -55,14 +55,28 @@ void hydro_read_particles(struct part* parts, struct io_props* list, UNIT_CONV_DENSITY, parts, rho); } -float convert_u(struct engine* e, struct part* p) { +void convert_u(const struct engine* e, const struct part* p, float* ret) { - return hydro_get_internal_energy(p); + ret[0] = hydro_get_internal_energy(p); } -float convert_P(struct engine* e, struct part* p) { +void convert_P(const struct engine* e, const struct part* p, float* ret) { - return hydro_get_pressure(p); + ret[0] = hydro_get_pressure(p); +} + +void convert_part_pos(const struct engine* e, const struct part* p, + double* ret) { + + if (e->s->periodic) { + ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]); + ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]); + ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]); + } else { + ret[0] = p->x[0]; + ret[1] = p->x[1]; + ret[2] = p->x[2]; + } } /** @@ -72,14 +86,14 @@ float convert_P(struct engine* e, struct part* p) { * @param list The list of i/o properties to write. * @param num_fields The number of i/o fields to write. */ -void hydro_write_particles(struct part* parts, struct io_props* list, +void hydro_write_particles(const struct part* parts, struct io_props* list, int* num_fields) { *num_fields = 10; /* List what we want to write */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - parts, x); + list[0] = io_make_output_field_convert_part( + "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos); list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v); list[2] = @@ -96,9 +110,9 @@ void hydro_write_particles(struct part* parts, struct io_props* list, io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho); list[8] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1, UNIT_CONV_ENERGY_PER_UNIT_MASS, - parts, rho, convert_u); + parts, convert_u); list[9] = io_make_output_field_convert_part( - "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, rho, convert_P); + "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, convert_P); } /** diff --git a/src/hydro/Gizmo/hydro_io.h b/src/hydro/Gizmo/hydro_io.h index ea0a083f37078859b236ca0799268066f6fa0385..458ea6151e4b00b9ccf85ad6eff4ccf58277ff7d 100644 --- a/src/hydro/Gizmo/hydro_io.h +++ b/src/hydro/Gizmo/hydro_io.h @@ -69,10 +69,11 @@ void hydro_read_particles(struct part* parts, struct io_props* list, * * @param e #engine. * @param p Particle. - * @return Internal energy of the particle + * @param ret (return) Internal energy of the particle */ -float convert_u(struct engine* e, struct part* p) { - return hydro_get_internal_energy(p); +void convert_u(const struct engine* e, const struct part* p, float* ret) { + + ret[0] = hydro_get_internal_energy(p); } /** @@ -80,10 +81,10 @@ float convert_u(struct engine* e, struct part* p) { * * @param e #engine. * @param p Particle. - * @return Entropic function of the particle + * @param ret (return) Entropic function of the particle */ -float convert_A(struct engine* e, struct part* p) { - return hydro_get_entropy(p); +void convert_A(const struct engine* e, const struct part* p, float* ret) { + ret[0] = hydro_get_entropy(p); } /** @@ -93,9 +94,9 @@ float convert_A(struct engine* e, struct part* p) { * @param p Particle. * @return Total energy of the particle */ -float convert_Etot(struct engine* e, struct part* p) { +void convert_Etot(const struct engine* e, const struct part* p, float* ret) { #ifdef GIZMO_TOTAL_ENERGY - return p->conserved.energy; + ret[0] = p->conserved.energy; #else float momentum2; @@ -103,10 +104,24 @@ float convert_Etot(struct engine* e, struct part* p) { p->conserved.momentum[1] * p->conserved.momentum[1] + p->conserved.momentum[2] * p->conserved.momentum[2]; - return p->conserved.energy + 0.5f * momentum2 / p->conserved.mass; + ret[0] = p->conserved.energy + 0.5f * momentum2 / p->conserved.mass; #endif } +void convert_part_pos(const struct engine* e, const struct part* p, + double* ret) { + + if (e->s->periodic) { + ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]); + ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]); + ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]); + } else { + ret[0] = p->x[0]; + ret[1] = p->x[1]; + ret[2] = p->x[2]; + } +} + /** * @brief Specifies which particle fields to write to a dataset * @@ -120,8 +135,8 @@ void hydro_write_particles(struct part* parts, struct io_props* list, *num_fields = 10; /* List what we want to write */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - parts, x); + list[0] = io_make_output_field_convert_part( + "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos); list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, primitives.v); list[2] = io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts, @@ -130,18 +145,17 @@ void hydro_write_particles(struct part* parts, struct io_props* list, parts, h); list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1, UNIT_CONV_ENERGY_PER_UNIT_MASS, - parts, primitives.P, convert_u); + parts, convert_u); list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, parts, id); list[6] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, primitives.rho); list[7] = io_make_output_field_convert_part( - "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, primitives.P, convert_A); + "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, convert_A); list[8] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, primitives.P); - list[9] = - io_make_output_field_convert_part("TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY, - parts, conserved.energy, convert_Etot); + list[9] = io_make_output_field_convert_part( + "TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY, parts, convert_Etot); } /** diff --git a/src/hydro/Minimal/hydro.h b/src/hydro/Minimal/hydro.h index 4d8ca5b05547467c973e17983774b64736060471..905412040cbb8ac9666e555d88cad9bc56de13ab 100644 --- a/src/hydro/Minimal/hydro.h +++ b/src/hydro/Minimal/hydro.h @@ -224,7 +224,7 @@ __attribute__((always_inline)) INLINE static void hydro_end_density( /* Finish the calculation by inserting the missing h-factors */ p->rho *= h_inv_dim; p->density.rho_dh *= h_inv_dim_plus_one; - p->density.wcount *= kernel_norm; + p->density.wcount *= h_inv_dim; p->density.wcount_dh *= h_inv_dim_plus_one; } diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index 2ec0cb11d1e3ccaaa09d9822c75b396364912df8..771e4ac2d066ea098edb8b2f11d7afd550783347 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -69,14 +69,28 @@ void hydro_read_particles(struct part* parts, struct io_props* list, UNIT_CONV_DENSITY, parts, rho); } -float convert_S(struct engine* e, struct part* p) { +void convert_S(const struct engine* e, const struct part* p, float* ret) { - return hydro_get_entropy(p); + ret[0] = hydro_get_entropy(p); } -float convert_P(struct engine* e, struct part* p) { +void convert_P(const struct engine* e, const struct part* p, float* ret) { - return hydro_get_pressure(p); + ret[0] = hydro_get_pressure(p); +} + +void convert_part_pos(const struct engine* e, const struct part* p, + double* ret) { + + if (e->s->periodic) { + ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]); + ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]); + ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]); + } else { + ret[0] = p->x[0]; + ret[1] = p->x[1]; + ret[2] = p->x[2]; + } } /** @@ -92,8 +106,8 @@ void hydro_write_particles(struct part* parts, struct io_props* list, *num_fields = 10; /* List what we want to write */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - parts, x); + list[0] = io_make_output_field_convert_part( + "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos); list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v); list[2] = @@ -108,11 +122,10 @@ void hydro_write_particles(struct part* parts, struct io_props* list, UNIT_CONV_ACCELERATION, parts, a_hydro); list[7] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho); - list[8] = io_make_output_field_convert_part("Entropy", FLOAT, 1, - UNIT_CONV_ENTROPY_PER_UNIT_MASS, - parts, rho, convert_S); + list[8] = io_make_output_field_convert_part( + "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY_PER_UNIT_MASS, parts, convert_S); list[9] = io_make_output_field_convert_part( - "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, rho, convert_P); + "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, convert_P); } /** diff --git a/src/hydro/PressureEntropy/hydro_io.h b/src/hydro/PressureEntropy/hydro_io.h index 10243750c01d6bc64664f9834bc4cc245c786f49..d3780261ba587cfac92ec44a65b13f0a4344b3c3 100644 --- a/src/hydro/PressureEntropy/hydro_io.h +++ b/src/hydro/PressureEntropy/hydro_io.h @@ -67,14 +67,28 @@ void hydro_read_particles(struct part* parts, struct io_props* list, UNIT_CONV_DENSITY, parts, rho); } -float convert_u(struct engine* e, struct part* p) { +void convert_u(const struct engine* e, const struct part* p, float* ret) { - return hydro_get_internal_energy(p); + ret[0] = hydro_get_internal_energy(p); } -float convert_P(struct engine* e, struct part* p) { +void convert_P(const struct engine* e, const struct part* p, float* ret) { - return hydro_get_pressure(p); + ret[0] = hydro_get_pressure(p); +} + +void convert_part_pos(const struct engine* e, const struct part* p, + double* ret) { + + if (e->s->periodic) { + ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]); + ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]); + ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]); + } else { + ret[0] = p->x[0]; + ret[1] = p->x[1]; + ret[2] = p->x[2]; + } } /** @@ -90,27 +104,27 @@ void hydro_write_particles(struct part* parts, struct io_props* list, *num_fields = 11; /* List what we want to write */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - parts, x); + list[0] = io_make_output_field_convert_part( + "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos); list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, v); list[2] = io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts, mass); list[3] = io_make_output_field("SmoothingLength", FLOAT, 1, UNIT_CONV_LENGTH, parts, h); - list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1, - UNIT_CONV_ENERGY_PER_UNIT_MASS, - parts, entropy, convert_u); + list[4] = io_make_output_field( + "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY_PER_UNIT_MASS, parts, entropy); list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, parts, id); list[6] = io_make_output_field("Acceleration", FLOAT, 3, UNIT_CONV_ACCELERATION, parts, a_hydro); list[7] = io_make_output_field("Density", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho); - list[8] = io_make_output_field( - "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY_PER_UNIT_MASS, parts, entropy); + list[8] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1, + UNIT_CONV_ENERGY_PER_UNIT_MASS, + parts, convert_u); list[9] = io_make_output_field_convert_part( - "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, rho, convert_P); + "Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, convert_P); list[10] = io_make_output_field("WeightedDensity", FLOAT, 1, UNIT_CONV_DENSITY, parts, rho_bar); } diff --git a/src/hydro/Shadowswift/hydro.h b/src/hydro/Shadowswift/hydro.h index abbcdcd2f7879d8063a906e44ab2fe6a3e675828..eb9938a341f1e6273f8ea03264bdda6dc69832e6 100644 --- a/src/hydro/Shadowswift/hydro.h +++ b/src/hydro/Shadowswift/hydro.h @@ -22,6 +22,7 @@ #include "approx_math.h" #include "equation_of_state.h" #include "hydro_gradients.h" +#include "hydro_properties.h" #include "hydro_space.h" #include "voronoi_algorithm.h" diff --git a/src/hydro/Shadowswift/hydro_io.h b/src/hydro/Shadowswift/hydro_io.h index de45b5d68c78f96cee3030eadef4b4410e550c22..f6f6fcc6c070b0add8e2b97678e5ef1ada636bfd 100644 --- a/src/hydro/Shadowswift/hydro_io.h +++ b/src/hydro/Shadowswift/hydro_io.h @@ -19,6 +19,7 @@ #include "adiabatic_index.h" #include "equation_of_state.h" +#include "hydro.h" #include "hydro_gradients.h" #include "hydro_slope_limiters.h" #include "io_properties.h" @@ -63,13 +64,8 @@ void hydro_read_particles(struct part* parts, struct io_props* list, * @param p Particle. * @return Internal energy of the particle */ -float convert_u(struct engine* e, struct part* p) { - if (p->primitives.rho > 0.) { - return gas_internal_energy_from_pressure(p->primitives.rho, - p->primitives.P); - } else { - return 0.; - } +void convert_u(const struct engine* e, const struct part* p, float* ret) { + ret[0] = hydro_get_internal_energy(p); } /** @@ -79,12 +75,8 @@ float convert_u(struct engine* e, struct part* p) { * @param p Particle. * @return Entropic function of the particle */ -float convert_A(struct engine* e, struct part* p) { - if (p->primitives.rho > 0.) { - return gas_entropy_from_pressure(p->primitives.rho, p->primitives.P); - } else { - return 0.; - } +void convert_A(const struct engine* e, const struct part* p, float* ret) { + ret[0] = hydro_get_entropy(p); } /** @@ -94,7 +86,7 @@ float convert_A(struct engine* e, struct part* p) { * @param p Particle. * @return Total energy of the particle */ -float convert_Etot(struct engine* e, struct part* p) { +void convert_Etot(const struct engine* e, const struct part* p, float* ret) { #ifdef SHADOWFAX_TOTAL_ENERGY return p->conserved.energy; #else @@ -105,13 +97,27 @@ float convert_Etot(struct engine* e, struct part* p) { p->conserved.momentum[1] * p->conserved.momentum[1] + p->conserved.momentum[2] * p->conserved.momentum[2]; - return p->conserved.energy + 0.5f * momentum2 / p->conserved.mass; + ret[0] = p->conserved.energy + 0.5f * momentum2 / p->conserved.mass; } else { - return 0.; + ret[0] = 0.; } #endif } +void convert_part_pos(const struct engine* e, const struct part* p, + double* ret) { + + if (e->s->periodic) { + ret[0] = box_wrap(p->x[0], 0.0, e->s->dim[0]); + ret[1] = box_wrap(p->x[1], 0.0, e->s->dim[1]); + ret[2] = box_wrap(p->x[2], 0.0, e->s->dim[2]); + } else { + ret[0] = p->x[0]; + ret[1] = p->x[1]; + ret[2] = p->x[2]; + } +} + /** * @brief Specifies which particle fields to write to a dataset * @@ -125,8 +131,8 @@ void hydro_write_particles(struct part* parts, struct io_props* list, *num_fields = 13; /* List what we want to write */ - list[0] = io_make_output_field("Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, - parts, x); + list[0] = io_make_output_field_convert_part( + "Coordinates", DOUBLE, 3, UNIT_CONV_LENGTH, parts, convert_part_pos); list[1] = io_make_output_field("Velocities", FLOAT, 3, UNIT_CONV_SPEED, parts, primitives.v); list[2] = io_make_output_field("Masses", FLOAT, 1, UNIT_CONV_MASS, parts, @@ -135,7 +141,7 @@ void hydro_write_particles(struct part* parts, struct io_props* list, parts, h); list[4] = io_make_output_field_convert_part("InternalEnergy", FLOAT, 1, UNIT_CONV_ENERGY_PER_UNIT_MASS, - parts, primitives.P, convert_u); + parts, convert_u); list[5] = io_make_output_field("ParticleIDs", ULONGLONG, 1, UNIT_CONV_NO_UNITS, parts, id); list[6] = io_make_output_field("Acceleration", FLOAT, 3, @@ -147,12 +153,11 @@ void hydro_write_particles(struct part* parts, struct io_props* list, list[9] = io_make_output_field("GradDensity", FLOAT, 3, UNIT_CONV_DENSITY, parts, primitives.gradients.rho); list[10] = io_make_output_field_convert_part( - "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, primitives.P, convert_A); + "Entropy", FLOAT, 1, UNIT_CONV_ENTROPY, parts, convert_A); list[11] = io_make_output_field("Pressure", FLOAT, 1, UNIT_CONV_PRESSURE, parts, primitives.P); - list[12] = - io_make_output_field_convert_part("TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY, - parts, conserved.energy, convert_Etot); + list[12] = io_make_output_field_convert_part( + "TotEnergy", FLOAT, 1, UNIT_CONV_ENERGY, parts, convert_Etot); } /** diff --git a/src/io_properties.h b/src/io_properties.h index 9fcf1a1ac67cae6afab6870369e51d06c752fc11..2142d8d555fb52258d3443f20d14c72cf7568045 100644 --- a/src/io_properties.h +++ b/src/io_properties.h @@ -16,19 +16,31 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ - #ifndef SWIFT_IO_PROPERTIES_H #define SWIFT_IO_PROPERTIES_H /* Config parameters. */ #include "../config.h" +/* Local includes. */ +#include "inline.h" + /** * @brief The two sorts of data present in the GADGET IC files: compulsory to * start a run or optional. */ enum DATA_IMPORTANCE { COMPULSORY = 1, OPTIONAL = 0, UNUSED = -1 }; +/* Helper typedefs */ +typedef void (*conversion_func_part_float)(const struct engine*, + const struct part*, float*); +typedef void (*conversion_func_part_double)(const struct engine*, + const struct part*, double*); +typedef void (*conversion_func_gpart_float)(const struct engine*, + const struct gpart*, float*); +typedef void (*conversion_func_gpart_double)(const struct engine*, + const struct gpart*, double*); + /** * @brief The properties of a given dataset for i/o */ @@ -52,18 +64,31 @@ struct io_props { /* Pointer to the field of the first particle in the array */ char* field; + /* Pointer to the start of the temporary buffer used in i/o */ + char* start_temp_c; + float* start_temp_f; + double* start_temp_d; + + /* Pointer to the engine */ + const struct engine* e; + /* The size of the particles */ size_t partSize; /* The particle arrays */ - struct part* parts; - struct gpart* gparts; + const struct part* parts; + const struct gpart* gparts; + + /* Are we converting? */ + int conversion; /* Conversion function for part */ - float (*convert_part)(struct engine*, struct part*); + conversion_func_part_float convert_part_f; + conversion_func_part_double convert_part_d; /* Conversion function for gpart */ - float (*convert_gpart)(struct engine*, struct gpart*); + conversion_func_gpart_float convert_gpart_f; + conversion_func_gpart_double convert_gpart_d; }; /** @@ -86,11 +111,10 @@ struct io_props { * * Do not call this function directly. Use the macro defined above. */ -struct io_props io_make_input_field_(char name[FIELD_BUFFER_SIZE], - enum IO_DATA_TYPE type, int dimension, - enum DATA_IMPORTANCE importance, - enum unit_conversion_factor units, - char* field, size_t partSize) { +INLINE static struct io_props io_make_input_field_( + char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum DATA_IMPORTANCE importance, enum unit_conversion_factor units, + char* field, size_t partSize) { struct io_props r; strcpy(r.name, name); r.type = type; @@ -101,8 +125,11 @@ struct io_props io_make_input_field_(char name[FIELD_BUFFER_SIZE], r.partSize = partSize; r.parts = NULL; r.gparts = NULL; - r.convert_part = NULL; - r.convert_gpart = NULL; + r.conversion = 0; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; return r; } @@ -126,10 +153,9 @@ struct io_props io_make_input_field_(char name[FIELD_BUFFER_SIZE], * * Do not call this function directly. Use the macro defined above. */ -struct io_props io_make_output_field_(char name[FIELD_BUFFER_SIZE], - enum IO_DATA_TYPE type, int dimension, - enum unit_conversion_factor units, - char* field, size_t partSize) { +INLINE static struct io_props io_make_output_field_( + char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, char* field, size_t partSize) { struct io_props r; strcpy(r.name, name); r.type = type; @@ -140,8 +166,11 @@ struct io_props io_make_output_field_(char name[FIELD_BUFFER_SIZE], r.partSize = partSize; r.parts = NULL; r.gparts = NULL; - r.convert_part = NULL; - r.convert_gpart = NULL; + r.conversion = 0; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; return r; } @@ -149,11 +178,10 @@ struct io_props io_make_output_field_(char name[FIELD_BUFFER_SIZE], /** * @brief Constructs an #io_props (with conversion) from its parameters */ -#define io_make_output_field_convert_part(name, type, dim, units, part, field, \ - convert) \ - io_make_output_field_convert_part_(name, type, dim, units, \ - (char*)(&(part[0]).field), \ - sizeof(part[0]), part, convert) +#define io_make_output_field_convert_part(name, type, dim, units, part, \ + convert) \ + io_make_output_field_convert_part_##type(name, type, dim, units, \ + sizeof(part[0]), part, convert) /** * @brief Construct an #io_props from its parameters @@ -162,17 +190,16 @@ struct io_props io_make_output_field_(char name[FIELD_BUFFER_SIZE], * @param type The type of the data * @param dimension Dataset dimension (1D, 3D, ...) * @param units The units of the dataset - * @param field Pointer to the field of the first particle * @param partSize The size in byte of the particle * @param parts The particle array * @param functionPtr The function used to convert a particle to a float * * Do not call this function directly. Use the macro defined above. */ -struct io_props io_make_output_field_convert_part_( +INLINE static struct io_props io_make_output_field_convert_part_FLOAT( char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, - enum unit_conversion_factor units, char* field, size_t partSize, - struct part* parts, float (*functionPtr)(struct engine*, struct part*)) { + enum unit_conversion_factor units, size_t partSize, + const struct part* parts, conversion_func_part_float functionPtr) { struct io_props r; strcpy(r.name, name); @@ -180,12 +207,52 @@ struct io_props io_make_output_field_convert_part_( r.dimension = dimension; r.importance = UNUSED; r.units = units; - r.field = field; + r.field = NULL; + r.partSize = partSize; + r.parts = parts; + r.gparts = NULL; + r.conversion = 1; + r.convert_part_f = functionPtr; + r.convert_part_d = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; + + return r; +} + +/** + * @brief Construct an #io_props from its parameters + * + * @param name Name of the field to read + * @param type The type of the data + * @param dimension Dataset dimension (1D, 3D, ...) + * @param units The units of the dataset + * @param partSize The size in byte of the particle + * @param parts The particle array + * @param functionPtr The function used to convert a particle to a float + * + * Do not call this function directly. Use the macro defined above. + */ +INLINE static struct io_props io_make_output_field_convert_part_DOUBLE( + char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, size_t partSize, + const struct part* parts, conversion_func_part_double functionPtr) { + + struct io_props r; + strcpy(r.name, name); + r.type = type; + r.dimension = dimension; + r.importance = UNUSED; + r.units = units; + r.field = NULL; r.partSize = partSize; r.parts = parts; r.gparts = NULL; - r.convert_part = functionPtr; - r.convert_gpart = NULL; + r.conversion = 1; + r.convert_part_f = NULL; + r.convert_part_d = functionPtr; + r.convert_gpart_f = NULL; + r.convert_gpart_d = NULL; return r; } @@ -193,11 +260,10 @@ struct io_props io_make_output_field_convert_part_( /** * @brief Constructs an #io_props (with conversion) from its parameters */ -#define io_make_output_field_convert_gpart(name, type, dim, units, part, \ - field, convert) \ - io_make_output_field_convert_gpart_(name, type, dim, units, \ - (char*)(&(part[0]).field), \ - sizeof(part[0]), gpart, convert) +#define io_make_output_field_convert_gpart(name, type, dim, units, gpart, \ + convert) \ + io_make_output_field_convert_gpart_##type(name, type, dim, units, \ + sizeof(gpart[0]), gpart, convert) /** * @brief Construct an #io_props from its parameters @@ -206,17 +272,16 @@ struct io_props io_make_output_field_convert_part_( * @param type The type of the data * @param dimension Dataset dimension (1D, 3D, ...) * @param units The units of the dataset - * @param field Pointer to the field of the first particle - * @param partSize The size in byte of the particle + * @param gpartSize The size in byte of the particle * @param gparts The particle array * @param functionPtr The function used to convert a g-particle to a float * * Do not call this function directly. Use the macro defined above. */ -struct io_props io_make_output_field_convert_gpart_( +INLINE static struct io_props io_make_output_field_convert_gpart_FLOAT( char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, - enum unit_conversion_factor units, char* field, size_t partSize, - struct gpart* gparts, float (*functionPtr)(struct engine*, struct gpart*)) { + enum unit_conversion_factor units, size_t gpartSize, + const struct gpart* gparts, conversion_func_gpart_float functionPtr) { struct io_props r; strcpy(r.name, name); @@ -224,12 +289,52 @@ struct io_props io_make_output_field_convert_gpart_( r.dimension = dimension; r.importance = UNUSED; r.units = units; - r.field = field; - r.partSize = partSize; + r.field = NULL; + r.partSize = gpartSize; + r.parts = NULL; + r.gparts = gparts; + r.conversion = 1; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_gpart_f = functionPtr; + r.convert_gpart_d = NULL; + + return r; +} + +/** + * @brief Construct an #io_props from its parameters + * + * @param name Name of the field to read + * @param type The type of the data + * @param dimension Dataset dimension (1D, 3D, ...) + * @param units The units of the dataset + * @param gpartSize The size in byte of the particle + * @param gparts The particle array + * @param functionPtr The function used to convert a g-particle to a float + * + * Do not call this function directly. Use the macro defined above. + */ +INLINE static struct io_props io_make_output_field_convert_gpart_DOUBLE( + char name[FIELD_BUFFER_SIZE], enum IO_DATA_TYPE type, int dimension, + enum unit_conversion_factor units, size_t gpartSize, + const struct gpart* gparts, conversion_func_gpart_double functionPtr) { + + struct io_props r; + strcpy(r.name, name); + r.type = type; + r.dimension = dimension; + r.importance = UNUSED; + r.units = units; + r.field = NULL; + r.partSize = gpartSize; r.parts = NULL; r.gparts = gparts; - r.convert_part = NULL; - r.convert_gpart = functionPtr; + r.conversion = 1; + r.convert_part_f = NULL; + r.convert_part_d = NULL; + r.convert_gpart_f = NULL; + r.convert_gpart_d = functionPtr; return r; } diff --git a/src/parallel_io.c b/src/parallel_io.c index b31d3b6351b00af4a042e4b28bf492258b17b073..e11c43c79badcc40bb76b7eee09348571e1841cc 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -51,6 +51,9 @@ #include "units.h" #include "xmf.h" +/* Are we timing the i/o? */ +//#define IO_SPEED_MEASUREMENT + /* The current limit of ROMIO (the underlying MPI-IO layer) is 2GB */ #define HDF5_PARALLEL_IO_MAX_BYTES 2000000000LL @@ -252,7 +255,6 @@ void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id, const struct unit_system* snapshot_units) { const size_t typeSize = io_sizeof_type(props.type); - const size_t copySize = typeSize * props.dimension; const size_t num_elements = N * props.dimension; /* Can't handle writes of more than 2GB */ @@ -263,44 +265,24 @@ void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id, /* Allocate temporary buffer */ void* temp = malloc(num_elements * typeSize); - if (temp == NULL) error("Unable to allocate memory for temporary buffer"); - - /* Copy particle data to temporary buffer */ - if (props.convert_part == NULL && - props.convert_gpart == NULL) { /* No conversion */ - - char* temp_c = temp; - for (size_t i = 0; i < N; ++i) - memcpy(&temp_c[i * copySize], props.field + i * props.partSize, copySize); - - } else if (props.convert_part != NULL) { /* conversion (for parts)*/ - - float* temp_f = temp; - for (size_t i = 0; i < N; ++i) - temp_f[i] = props.convert_part(e, &props.parts[i]); + if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT, + num_elements * typeSize) != 0) + error("Unable to allocate temporary i/o buffer"); - } else if (props.convert_gpart != NULL) { /* conversion (for gparts)*/ +#ifdef IO_SPEED_MEASUREMENT + MPI_Barrier(MPI_COMM_WORLD); + ticks tic = getticks(); +#endif - float* temp_f = temp; - for (size_t i = 0; i < N; ++i) - temp_f[i] = props.convert_gpart(e, &props.gparts[i]); - } + /* Copy the particle data to the temporary buffer */ + io_copy_temp_buffer(temp, e, props, N, internal_units, snapshot_units); - /* Unit conversion if necessary */ - const double factor = - units_conversion_factor(internal_units, snapshot_units, props.units); - if (factor != 1.) { - - /* message("Converting ! factor=%e", factor); */ - - if (io_is_double_precision(props.type)) { - double* temp_d = temp; - for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor; - } else { - float* temp_f = temp; - for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor; - } - } +#ifdef IO_SPEED_MEASUREMENT + MPI_Barrier(MPI_COMM_WORLD); + if(engine_rank == 0) + message("Copying for '%s' took %.3f %s." , props.name, + clocks_from_ticks(getticks() - tic), clocks_getunit()); +#endif /* Create data space */ const hid_t h_memspace = H5Screate(H5S_SIMPLE); @@ -343,10 +325,14 @@ void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id, } /* message("Writing %lld '%s', %zd elements = %zd bytes (int=%d) at offset - * %zd", */ - /* N, props.name, N * props.dimension, N * props.dimension * typeSize, */ + * %zd", N, props.name, N * props.dimension, N * props.dimension * typeSize, */ /* (int)(N * props.dimension * typeSize), offset); */ +#ifdef IO_SPEED_MEASUREMENT + MPI_Barrier(MPI_COMM_WORLD); + tic = getticks(); +#endif + /* Write temporary buffer to HDF5 dataspace */ h_err = H5Dwrite(h_data, io_hdf5_type(props.type), h_memspace, h_filespace, h_plist_id, temp); @@ -354,6 +340,18 @@ void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id, error("Error while writing data array '%s'.", props.name); } +#ifdef IO_SPEED_MEASUREMENT + MPI_Barrier(MPI_COMM_WORLD); + ticks toc = getticks(); + float ms = clocks_from_ticks(toc - tic); + int megaBytes = N * props.dimension * typeSize / (1024 * 1024); + int total = 0; + MPI_Reduce(&megaBytes, &total, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + if (engine_rank == 0) + message("H5Dwrite for '%s' (%d MB) took %.3f %s (speed = %f MB/s).", + props.name, total, ms, clocks_getunit(), total / (ms / 1000.)); +#endif + /* Free and close everything */ free(temp); H5Sclose(h_memspace); @@ -385,6 +383,10 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, const size_t typeSize = io_sizeof_type(props.type); +#ifdef IO_SPEED_MEASUREMENT + const ticks tic = getticks(); +#endif + /* Work out properties of the array in the file */ int rank; hsize_t shape_total[2]; @@ -497,6 +499,13 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, H5Pclose(h_prop); H5Dclose(h_data); H5Pclose(h_plist_id); + +#ifdef IO_SPEED_MEASUREMENT + MPI_Barrier(MPI_COMM_WORLD); + if(engine_rank == 0) + message("'%s' took %.3f %s." , props.name, + clocks_from_ticks(getticks() - tic), clocks_getunit()); +#endif } /** @@ -617,7 +626,7 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, /* Read the unit system used in the ICs */ struct unit_system* ic_units = malloc(sizeof(struct unit_system)); if (ic_units == NULL) error("Unable to allocate memory for IC unit system"); - io_read_unit_system(h_file, ic_units); + io_read_unit_system(h_file, ic_units, mpi_rank); /* Tell the user if a conversion will be needed */ if (mpi_rank == 0) { @@ -735,7 +744,9 @@ void read_ic_parallel(char* fileName, const struct unit_system* internal_units, break; default: - message("Particle Type %d not yet supported. Particles ignored", ptype); + if (mpi_rank == 0) + message("Particle Type %d not yet supported. Particles ignored", + ptype); } /* Read everything */ @@ -825,16 +836,42 @@ void write_output_parallel(struct engine* e, const char* baseName, /* Prepare the XMF file for the new entry */ if (mpi_rank == 0) xmfFile = xmf_prepare_file(baseName); - /* Open HDF5 file */ + /* Prepare some file-access properties */ hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS); - H5Pset_fapl_mpio(plist_id, comm, info); + + /* Set some MPI-IO parameters */ + // MPI_Info_set(info, "IBM_largeblock_io", "true"); + MPI_Info_set(info, "romio_cb_write", "enable"); + MPI_Info_set(info, "romio_ds_write", "disable"); + + /* Activate parallel i/o */ + hid_t h_err = H5Pset_fapl_mpio(plist_id, comm, info); + if (h_err < 0) error("Error setting parallel i/o"); + + /* Align on 4k pages. */ + h_err = H5Pset_alignment(plist_id, 1024, 4096); + if (h_err < 0) error("Error setting Hdf5 alignment"); + + /* Disable meta-data cache eviction */ + H5AC_cache_config_t mdc_config; + mdc_config.version = H5AC__CURR_CACHE_CONFIG_VERSION; + h_err = H5Pget_mdc_config(plist_id, &mdc_config); + if (h_err < 0) error("Error getting the MDC config"); + + mdc_config.evictions_enabled = 0; /* false */ + mdc_config.incr_mode = H5C_incr__off; + mdc_config.decr_mode = H5C_decr__off; + mdc_config.flash_incr_mode = H5C_flash_incr__off; + h_err = H5Pset_mdc_config(plist_id, &mdc_config); + if (h_err < 0) error("Error setting the MDC config"); + + /* Open HDF5 file with the chosen parameters */ h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); if (h_file < 0) { error("Error while opening file '%s'.", fileName); } - /* Compute offset in the file and total number of - * particles */ + /* Compute offset in the file and total number of particles */ size_t N[swift_type_count] = {Ngas, Ndm, 0, 0, Nstars, 0}; long long N_total[swift_type_count] = {0}; long long offset[swift_type_count] = {0}; @@ -847,8 +884,7 @@ void write_output_parallel(struct engine* e, const char* baseName, MPI_Bcast(&N_total, 6, MPI_LONG_LONG_INT, mpi_size - 1, comm); /* Now everybody konws its offset and the total number of - * particles of each - * type */ + * particles of each type */ /* Write the part of the XMF file corresponding to this * specific output */ diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 5c29f1d6feb4c2bc542397ef2f0d1b9ae2b49a65..ce175878fa4641ab0becd7132acbe856eb3fcd20 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -26,7 +26,8 @@ /* Local headers. */ #include "active.h" -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2); /** @@ -515,7 +516,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, *init_pj = last_pj; } -#endif /* WITH_VECTORIZATION */ +#endif /* WITH_VECTORIZATION && GADGET2_SPH */ /** * @brief Compute the cell self-interaction (non-symmetric) using vector @@ -527,7 +528,8 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, __attribute__((always_inline)) INLINE void runner_doself1_density_vec( struct runner *r, struct cell *restrict c) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + /* Get some local variables */ const struct engine *e = r->e; const timebin_t max_active_bin = e->max_active_bin; @@ -723,6 +725,11 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( } /* loop over all particles. */ TIMER_TOC(timer_doself_density); + +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } @@ -740,10 +747,11 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec( struct runner *r, struct cell *restrict c, struct part *restrict parts, int *restrict ind, int pi_count) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + const int count = c->count; - TIMER_TIC + TIMER_TIC; /* Get the particle cache from the runner and re-allocate * the cache if it is not big enough for the cell. */ @@ -925,6 +933,11 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec( } /* loop over all particles. */ TIMER_TOC(timer_doself_subset); + +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } @@ -938,7 +951,8 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec( __attribute__((always_inline)) INLINE void runner_doself2_force_vec( struct runner *r, struct cell *restrict c) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + const struct engine *e = r->e; const timebin_t max_active_bin = e->max_active_bin; struct part *restrict parts = c->parts; @@ -1097,6 +1111,11 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( } /* loop over all particles. */ TIMER_TOC(timer_doself_force); + +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } @@ -1114,7 +1133,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const double *shift) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + const struct engine *restrict e = r->e; const timebin_t max_active_bin = e->max_active_bin; @@ -1442,6 +1462,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, TIMER_TOC(timer_dopair_density); +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } @@ -1459,7 +1483,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const double *shift) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + const struct engine *restrict e = r->e; const timebin_t max_active_bin = e->max_active_bin; @@ -1816,5 +1841,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, TIMER_TOC(timer_dopair_density); } +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } diff --git a/src/serial_io.c b/src/serial_io.c index eb1e0e23fb34fd8d6a21230d9e38cfe82c47df1d..f8e3927c4a09a8a94afff5287a42f2b10afac9b4 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -176,9 +176,9 @@ void readArray(hid_t grp, const struct io_props props, size_t N, * Routines writing an output file *-----------------------------------------------------------------------------*/ -void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, - char* partTypeGroupName, const struct io_props props, - unsigned long long N_total, +void prepareArray(const struct engine* e, hid_t grp, char* fileName, + FILE* xmfFile, char* partTypeGroupName, + const struct io_props props, unsigned long long N_total, const struct unit_system* internal_units, const struct unit_system* snapshot_units) { @@ -282,14 +282,14 @@ void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, * @todo A better version using HDF5 hyper-slabs to write the file directly from * the part array will be written once the structures have been stabilized. */ -void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, - char* partTypeGroupName, const struct io_props props, size_t N, - long long N_total, int mpi_rank, long long offset, +void writeArray(const struct engine* e, hid_t grp, char* fileName, + FILE* xmfFile, char* partTypeGroupName, + const struct io_props props, size_t N, long long N_total, + int mpi_rank, long long offset, const struct unit_system* internal_units, const struct unit_system* snapshot_units) { const size_t typeSize = io_sizeof_type(props.type); - const size_t copySize = typeSize * props.dimension; const size_t num_elements = N * props.dimension; /* message("Writing '%s' array...", props.name); */ @@ -300,45 +300,13 @@ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, internal_units, snapshot_units); /* Allocate temporary buffer */ - void* temp = malloc(num_elements * io_sizeof_type(props.type)); - if (temp == NULL) error("Unable to allocate memory for temporary buffer"); - - /* Copy particle data to temporary buffer */ - if (props.convert_part == NULL && - props.convert_gpart == NULL) { /* No conversion */ - - char* temp_c = temp; - for (size_t i = 0; i < N; ++i) - memcpy(&temp_c[i * copySize], props.field + i * props.partSize, copySize); - - } else if (props.convert_part != NULL) { /* conversion (for parts)*/ - - float* temp_f = temp; - for (size_t i = 0; i < N; ++i) - temp_f[i] = props.convert_part(e, &props.parts[i]); - - } else if (props.convert_gpart != NULL) { /* conversion (for gparts)*/ - - float* temp_f = temp; - for (size_t i = 0; i < N; ++i) - temp_f[i] = props.convert_gpart(e, &props.gparts[i]); - } - - /* Unit conversion if necessary */ - const double factor = - units_conversion_factor(internal_units, snapshot_units, props.units); - if (factor != 1.) { - - /* message("Converting ! factor=%e", factor); */ + void* temp = NULL; + if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT, + num_elements * typeSize) != 0) + error("Unable to allocate temporary i/o buffer"); - if (io_is_double_precision(props.type)) { - double* temp_d = temp; - for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor; - } else { - float* temp_f = temp; - for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor; - } - } + /* Copy the particle data to the temporary buffer */ + io_copy_temp_buffer(temp, e, props, N, internal_units, snapshot_units); /* Construct information for the hyper-slab */ int rank; @@ -512,7 +480,7 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, /* Read the unit system used in the ICs */ if (ic_units == NULL) error("Unable to allocate memory for IC unit system"); - io_read_unit_system(h_file, ic_units); + io_read_unit_system(h_file, ic_units, mpi_rank); if (units_are_equal(ic_units, internal_units)) { @@ -656,8 +624,9 @@ void read_ic_serial(char* fileName, const struct unit_system* internal_units, break; default: - message("Particle Type %d not yet supported. Particles ignored", - ptype); + if (mpi_rank == 0) + message("Particle Type %d not yet supported. Particles ignored", + ptype); } /* Read everything */ diff --git a/src/single_io.c b/src/single_io.c index 194563352dff5570b8703f828fac95bccbf7409f..a8199af6a84d3f1bdffd746e5a1a49e6e2518dca 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -169,57 +169,25 @@ void readArray(hid_t h_grp, const struct io_props prop, size_t N, * @todo A better version using HDF5 hyper-slabs to write the file directly from * the part array will be written once the structures have been stabilized. */ -void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, - char* partTypeGroupName, const struct io_props props, size_t N, +void writeArray(const struct engine* e, hid_t grp, char* fileName, + FILE* xmfFile, char* partTypeGroupName, + const struct io_props props, size_t N, const struct unit_system* internal_units, const struct unit_system* snapshot_units) { const size_t typeSize = io_sizeof_type(props.type); - const size_t copySize = typeSize * props.dimension; const size_t num_elements = N * props.dimension; /* message("Writing '%s' array...", props.name); */ /* Allocate temporary buffer */ - void* temp = malloc(num_elements * io_sizeof_type(props.type)); - if (temp == NULL) error("Unable to allocate memory for temporary buffer"); - - /* Copy particle data to temporary buffer */ - if (props.convert_part == NULL && - props.convert_gpart == NULL) { /* No conversion */ - - char* temp_c = temp; - for (size_t i = 0; i < N; ++i) - memcpy(&temp_c[i * copySize], props.field + i * props.partSize, copySize); - - } else if (props.convert_part != NULL) { /* conversion (for parts)*/ - - float* temp_f = temp; - for (size_t i = 0; i < N; ++i) - temp_f[i] = props.convert_part(e, &props.parts[i]); - - } else if (props.convert_gpart != NULL) { /* conversion (for gparts)*/ - - float* temp_f = temp; - for (size_t i = 0; i < N; ++i) - temp_f[i] = props.convert_gpart(e, &props.gparts[i]); - } - - /* Unit conversion if necessary */ - const double factor = - units_conversion_factor(internal_units, snapshot_units, props.units); - if (factor != 1.) { - - /* message("Converting ! factor=%e", factor); */ + void* temp = NULL; + if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT, + num_elements * typeSize) != 0) + error("Unable to allocate temporary i/o buffer"); - if (io_is_double_precision(props.type)) { - double* temp_d = temp; - for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor; - } else { - float* temp_f = temp; - for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor; - } - } + /* Copy the particle data to the temporary buffer */ + io_copy_temp_buffer(temp, e, props, N, internal_units, snapshot_units); /* Create data space */ const hid_t h_space = H5Screate(H5S_SIMPLE); @@ -419,7 +387,7 @@ void read_ic_single(char* fileName, const struct unit_system* internal_units, /* Read the unit system used in the ICs */ struct unit_system* ic_units = malloc(sizeof(struct unit_system)); if (ic_units == NULL) error("Unable to allocate memory for IC unit system"); - io_read_unit_system(h_file, ic_units); + io_read_unit_system(h_file, ic_units, 0); /* Tell the user if a conversion will be needed */ if (units_are_equal(ic_units, internal_units)) { diff --git a/src/space.c b/src/space.c index 6051320e26c9c463cb85598ac8c106abd769d32e..5c9c6cc2f74e9803df4929ca7612dfc7b6f6f3cc 100644 --- a/src/space.c +++ b/src/space.c @@ -2933,14 +2933,15 @@ void space_init(struct space *s, const struct swift_params *params, hydro_space_init(&s->hs, s); ticks tic = getticks(); - message("first init..."); + if (verbose) message("first init..."); /* Set the particles in a state where they are ready for a run */ space_first_init_parts(s); space_first_init_xparts(s); space_first_init_gparts(s); space_first_init_sparts(s); - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); /* Init the space lock. */ if (lock_init(&s->lock) != 0) error("Failed to create space spin-lock."); @@ -2962,7 +2963,8 @@ void space_replicate(struct space *s, int replicate, int verbose) { if (replicate < 1) error("Invalid replicate value: %d", replicate); - message("Replicating space %d times along each axis.", replicate); + if (verbose) + message("Replicating space %d times along each axis.", replicate); const int factor = replicate * replicate * replicate;