parallel_io.c 73.6 KB
Newer Older
1
2
/*******************************************************************************
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk).
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
20
21
22
23
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

24
#if defined(HAVE_HDF5) && defined(WITH_MPI) && defined(HAVE_PARALLEL_HDF5)
25
26
27
28
29

/* Some standard headers. */
#include <hdf5.h>
#include <math.h>
#include <mpi.h>
30
31
32
33
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
34
#include <time.h>
35

36
37
38
39
/* This object's header. */
#include "parallel_io.h"

/* Local includes. */
40
#include "black_holes_io.h"
41
#include "chemistry_io.h"
42
#include "common_io.h"
43
#include "cooling_io.h"
44
#include "dimension.h"
45
#include "engine.h"
46
#include "error.h"
47
#include "fof_io.h"
48
#include "gravity_io.h"
49
#include "gravity_properties.h"
50
#include "hydro_io.h"
51
#include "hydro_properties.h"
52
#include "io_properties.h"
Peter W. Draper's avatar
Peter W. Draper committed
53
#include "memuse.h"
54
#include "output_list.h"
55
#include "output_options.h"
56
#include "part.h"
lhausamm's avatar
lhausamm committed
57
#include "part_type.h"
Loic Hausammann's avatar
Loic Hausammann committed
58
#include "sink_io.h"
59
#include "star_formation_io.h"
60
#include "stars_io.h"
61
#include "tracers_io.h"
62
#include "units.h"
63
#include "velociraptor_io.h"
64
#include "xmf.h"
65

66
/* The current limit of ROMIO (the underlying MPI-IO layer) is 2GB */
67
#define HDF5_PARALLEL_IO_MAX_BYTES 2147000000LL
68

69
/* Are we timing the i/o? */
70
//#define IO_SPEED_MEASUREMENT
71

72
/**
73
 * @brief Reads a chunk of data from an open HDF5 dataset
74
 *
75
76
77
78
79
 * @param h_data The HDF5 dataset to write to.
 * @param h_plist_id the parallel HDF5 properties.
 * @param props The #io_props of the field to read.
 * @param N The number of particles to write.
 * @param offset Offset in the array where this mpi task starts writing.
80
 * @param internal_units The #unit_system used internally.
81
 * @param ic_units The #unit_system used in the snapshots.
82
 * @param cleanup_h Are we removing h-factors from the ICs?
83
84
 * @param cleanup_sqrt_a Are we cleaning-up the sqrt(a) factors in the Gadget
 * IC velocities?
85
 * @param h The value of the reduced Hubble constant to use for cleaning.
86
 * @param a The current value of the scale-factor.
87
 */
88
89
90
91
92
93
94
void read_array_parallel_chunk(hid_t h_data, hid_t h_plist_id,
                               const struct io_props props, size_t N,
                               long long offset,
                               const struct unit_system* internal_units,
                               const struct unit_system* ic_units,
                               int cleanup_h, int cleanup_sqrt_a, double h,
                               double a) {
95

96
97
98
  const size_t typeSize = io_sizeof_type(props.type);
  const size_t copySize = typeSize * props.dimension;
  const size_t num_elements = N * props.dimension;
99

100
101
  /* Can't handle writes of more than 2GB */
  if (N * props.dimension * typeSize > HDF5_PARALLEL_IO_MAX_BYTES)
102
    error("Dataset too large to be read in one pass!");
103

104
  /* Allocate temporary buffer */
105
  void* temp = malloc(num_elements * typeSize);
106
  if (temp == NULL) error("Unable to allocate memory for temporary buffer");
107

108
  /* Prepare information for hyper-slab */
109
110
  hsize_t shape[2], offsets[2];
  int rank;
111
  if (props.dimension > 1) {
112
113
    rank = 2;
    shape[0] = N;
114
    shape[1] = props.dimension;
115
116
117
    offsets[0] = offset;
    offsets[1] = 0;
  } else {
118
    rank = 2;
119
    shape[0] = N;
120
    shape[1] = 1;
121
122
123
    offsets[0] = offset;
    offsets[1] = 0;
  }
124
125

  /* Create data space in memory */
126
  const hid_t h_memspace = H5Screate_simple(rank, shape, NULL);
127

128
  /* Select hyper-slab in file */
129
  const hid_t h_filespace = H5Dget_space(h_data);
130
131
  H5Sselect_hyperslab(h_filespace, H5S_SELECT_SET, offsets, NULL, shape, NULL);

132
133
134
  /* Read HDF5 dataspace in temporary buffer */
  /* Dirty version that happens to work for vectors but should be improved */
  /* Using HDF5 dataspaces would be better */
135
  const hid_t h_err = H5Dread(h_data, io_hdf5_type(props.type), h_memspace,
136
                              h_filespace, h_plist_id, temp);
137
  if (h_err < 0) error("Error while reading data array '%s'.", props.name);
138
139
140

  /* Unit conversion if necessary */
  const double factor =
141
142
      units_conversion_factor(ic_units, internal_units, props.units);
  if (factor != 1.) {
143

Matthieu Schaller's avatar
Matthieu Schaller committed
144
    /* message("Converting ! factor=%e", factor); */
145

146
    if (io_is_double_precision(props.type)) {
Matthieu Schaller's avatar
Matthieu Schaller committed
147
      double* temp_d = (double*)temp;
148
149
      for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor;
    } else {
Matthieu Schaller's avatar
Matthieu Schaller committed
150
      float* temp_f = (float*)temp;
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

#ifdef SWIFT_DEBUG_CHECKS
      float maximum = 0.;
      float minimum = FLT_MAX;
#endif

      /* Loop that converts the Units */
      for (size_t i = 0; i < num_elements; ++i) {

#ifdef SWIFT_DEBUG_CHECKS
        /* Find the absolute minimum and maximum values */
        const float abstemp_f = fabsf(temp_f[i]);
        if (abstemp_f != 0.f) {
          maximum = max(maximum, abstemp_f);
          minimum = min(minimum, abstemp_f);
        }
#endif

        /* Convert the float units */
        temp_f[i] *= factor;
      }

#ifdef SWIFT_DEBUG_CHECKS
      /* The two possible errors: larger than float or smaller
       * than float precission. */
      if (factor * maximum > FLT_MAX) {
        error("Unit conversion results in numbers larger than floats");
      } else if (factor * minimum < FLT_MIN) {
        error("Numbers smaller than float precision");
      }
#endif
182
    }
183
  }
184

185
186
  /* Clean-up h if necessary */
  const float h_factor_exp = units_h_factor(internal_units, props.units);
187
  if (cleanup_h && h_factor_exp != 0.f) {
188
189
190
191
192
193

    /* message("Multipltying '%s' by h^%f=%f", props.name, h_factor_exp,
     * h_factor); */

    if (io_is_double_precision(props.type)) {
      double* temp_d = (double*)temp;
194
      const double h_factor = pow(h, h_factor_exp);
195
196
197
      for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= h_factor;
    } else {
      float* temp_f = (float*)temp;
198
      const float h_factor = pow(h, h_factor_exp);
199
200
201
202
      for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= h_factor;
    }
  }

203
204
205
206
207
208
209
210
211
212
213
214
215
216
  /* Clean-up a if necessary */
  if (cleanup_sqrt_a && a != 1. && (strcmp(props.name, "Velocities") == 0)) {

    if (io_is_double_precision(props.type)) {
      double* temp_d = (double*)temp;
      const double vel_factor = sqrt(a);
      for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= vel_factor;
    } else {
      float* temp_f = (float*)temp;
      const float vel_factor = sqrt(a);
      for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= vel_factor;
    }
  }

217
  /* Copy temporary buffer to particle data */
Matthieu Schaller's avatar
Matthieu Schaller committed
218
  char* temp_c = (char*)temp;
219
  for (size_t i = 0; i < N; ++i)
220
    memcpy(props.field + i * props.partSize, &temp_c[i * copySize], copySize);
221

222
223
  /* Free and close everything */
  free(temp);
224
225
  H5Sclose(h_filespace);
  H5Sclose(h_memspace);
226
227
228
229
230
231
}

/**
 * @brief Reads a data array from a given HDF5 group.
 *
 * @param grp The group from which to read.
Matthieu Schaller's avatar
Matthieu Schaller committed
232
 * @param props The #io_props of the field to read.
233
234
235
236
237
238
 * @param N The number of particles on that rank.
 * @param N_total The total number of particles.
 * @param mpi_rank The MPI rank of this node.
 * @param offset The offset in the array on disk for this rank.
 * @param internal_units The #unit_system used internally.
 * @param ic_units The #unit_system used in the ICs.
239
 * @param cleanup_h Are we removing h-factors from the ICs?
240
241
 * @param cleanup_sqrt_a Are we cleaning-up the sqrt(a) factors in the Gadget
 * IC velocities?
242
 * @param h The value of the reduced Hubble constant to use for cleaning.
243
 * @param a The current value of the scale-factor.
244
 */
245
246
247
248
249
void read_array_parallel(hid_t grp, struct io_props props, size_t N,
                         long long N_total, int mpi_rank, long long offset,
                         const struct unit_system* internal_units,
                         const struct unit_system* ic_units, int cleanup_h,
                         int cleanup_sqrt_a, double h, double a) {
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271

  const size_t typeSize = io_sizeof_type(props.type);
  const size_t copySize = typeSize * props.dimension;

  /* Check whether the dataspace exists or not */
  const htri_t exist = H5Lexists(grp, props.name, 0);
  if (exist < 0) {
    error("Error while checking the existence of data set '%s'.", props.name);
  } else if (exist == 0) {
    if (props.importance == COMPULSORY) {
      error("Compulsory data set '%s' not present in the file.", props.name);
    } else {
      for (size_t i = 0; i < N; ++i)
        memset(props.field + i * props.partSize, 0, copySize);
      return;
    }
  }

  /* Open data space in file */
  const hid_t h_data = H5Dopen2(grp, props.name, H5P_DEFAULT);
  if (h_data < 0) error("Error while opening data space '%s'.", props.name);

272
273
274
275
276
277
278
279
280
281
282
283
/* Parallel-HDF5 1.10.2 incorrectly reads data that was compressed */
/* We detect this here and crash with an error message instead of  */
/* continuing with garbage data.                                   */
#if H5_VERSION_LE(1, 10, 2) && H5_VERSION_GE(1, 10, 2)
  if (mpi_rank == 0) {

    /* Recover the list of filters that were applied to the data */
    const hid_t h_plist = H5Dget_create_plist(h_data);
    if (h_plist < 0)
      error("Error getting property list for data set '%s'", props.name);

    /* Recover the number of filters in the list */
284
    const int n_filters = H5Pget_nfilters(h_plist);
285
286
287
288

    for (int n = 0; n < n_filters; ++n) {

      unsigned int flag;
289
      size_t cd_nelmts = 32;
290
291
      unsigned int* cd_values = malloc(cd_nelmts * sizeof(unsigned int));
      size_t namelen = 256;
292
      char* name = calloc(namelen, sizeof(char));
293
294
295
296
      unsigned int filter_config;

      /* Recover the n^th filter in the list */
      const H5Z_filter_t filter =
297
          H5Pget_filter(h_plist, n, &flag, &cd_nelmts, cd_values, namelen, name,
298
299
300
                        &filter_config);
      if (filter < 0)
        error("Error retrieving %d^th (%d) filter for data set '%s'", n,
301
              n_filters, props.name);
302
303
304
305
306
307
308

      /* Now check whether the deflate filter had been applied */
      if (filter == H5Z_FILTER_DEFLATE)
        error(
            "HDF5 1.10.2 cannot correctly read data that was compressed with "
            "the 'deflate' filter.\nThe field '%s' has had this filter applied "
            "and the code would silently read garbage into the particle arrays "
309
            "so we'd rather stop here. You can:\n - Recompile the code with an "
310
            "earlier or older version of HDF5.\n - Use the 'h5repack' tool to "
311
312
            "remove the filter from the ICs (e.g. h5repack -f NONE -i in_file "
            "-o out_file).\n",
313
314
315
316
317
            props.name);

      free(name);
      free(cd_values);
    }
318
319

    H5Pclose(h_plist);
320
321
  }
#endif
322
323
324
325
326
327
328
329
330
331
332
333

  /* Create property list for collective dataset read. */
  const hid_t h_plist_id = H5Pcreate(H5P_DATASET_XFER);
  H5Pset_dxpl_mpio(h_plist_id, H5FD_MPIO_COLLECTIVE);

  /* Given the limitations of ROM-IO we will need to read the data in chunk of
     HDF5_PARALLEL_IO_MAX_BYTES bytes per node until all the nodes are done. */
  char redo = 1;
  while (redo) {

    /* Maximal number of elements */
    const size_t max_chunk_size =
334
        HDF5_PARALLEL_IO_MAX_BYTES / (props.dimension * typeSize);
335
336
337

    /* Write the first chunk */
    const size_t this_chunk = (N > max_chunk_size) ? max_chunk_size : N;
338
339
340
    read_array_parallel_chunk(h_data, h_plist_id, props, this_chunk, offset,
                              internal_units, ic_units, cleanup_h,
                              cleanup_sqrt_a, h, a);
341
342
343
344

    /* Compute how many items are left */
    if (N > max_chunk_size) {
      N -= max_chunk_size;
345
346
      props.field += max_chunk_size * props.partSize; /* char* on the field */
      props.parts += max_chunk_size;                  /* part* on the part */
347
348
      props.xparts += max_chunk_size;                 /* xpart* on the xpart */
      props.gparts += max_chunk_size;                 /* gpart* on the gpart */
349
350
      props.sparts += max_chunk_size;                 /* spart* on the spart */
      props.bparts += max_chunk_size;                 /* bpart* on the bpart */
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
      offset += max_chunk_size;
      redo = 1;
    } else {
      N = 0;
      offset += 0;
      redo = 0;
    }

    /* Do we need to run again ? */
    MPI_Allreduce(MPI_IN_PLACE, &redo, 1, MPI_SIGNED_CHAR, MPI_MAX,
                  MPI_COMM_WORLD);

    if (redo && mpi_rank == 0)
      message("Need to redo one iteration for array '%s'", props.name);
  }

  /* Close everything */
  H5Pclose(h_plist_id);
369
370
371
372
  H5Dclose(h_data);
}

/**
373
 * @brief Prepares an array in the snapshot.
374
 *
375
 * @param e The #engine we are writing from.
376
377
378
379
380
381
382
 * @param grp The HDF5 grp to write to.
 * @param fileName The name of the file we are writing to.
 * @param xmfFile The (opened) XMF file we are appending to.
 * @param partTypeGroupName The name of the group we are writing to.
 * @param props The #io_props of the field to write.
 * @param N_total The total number of particles to write in this array.
 * @param snapshot_units The units used for the data in this snapshot.
383
 */
Matthieu Schaller's avatar
Matthieu Schaller committed
384
385
386
387
388
void prepare_array_parallel(
    struct engine* e, hid_t grp, const char* fileName, FILE* xmfFile,
    char* partTypeGroupName, struct io_props props, long long N_total,
    const enum lossy_compression_schemes lossy_compression,
    const struct unit_system* snapshot_units) {
389
390
391
392
393
394
395
396
397
398
399
400
401

  /* Create data space */
  const hid_t h_space = H5Screate(H5S_SIMPLE);
  if (h_space < 0)
    error("Error while creating data space for field '%s'.", props.name);

  int rank = 0;
  hsize_t shape[2];
  hsize_t chunk_shape[2];
  if (props.dimension > 1) {
    rank = 2;
    shape[0] = N_total;
    shape[1] = props.dimension;
402
    chunk_shape[0] = 1 << 20; /* Just a guess...*/
403
404
405
406
407
    chunk_shape[1] = props.dimension;
  } else {
    rank = 1;
    shape[0] = N_total;
    shape[1] = 0;
408
    chunk_shape[0] = 1 << 20; /* Just a guess...*/
409
410
411
412
    chunk_shape[1] = 0;
  }

  /* Make sure the chunks are not larger than the dataset */
Matthieu Schaller's avatar
Matthieu Schaller committed
413
  if ((long long)chunk_shape[0] > N_total) chunk_shape[0] = N_total;
414
415
416
417
418
419

  /* Change shape of data space */
  hid_t h_err = H5Sset_extent_simple(h_space, rank, shape, NULL);
  if (h_err < 0)
    error("Error while changing data space shape for field '%s'.", props.name);

Matthieu Schaller's avatar
Matthieu Schaller committed
420
421
422
423
424
425
  /* Dataset type */
  hid_t h_type = H5Tcopy(io_hdf5_type(props.type));

  /* Dataset properties */
  hid_t h_prop = H5Pcreate(H5P_DATASET_CREATE);

426
427
428
429
430
  /* Create property list for collective dataset write.    */
  const hid_t h_plist_id = H5Pcreate(H5P_DATASET_XFER);
  H5Pset_dxpl_mpio(h_plist_id, H5FD_MPIO_COLLECTIVE);

  /* Set chunk size */
Matthieu Schaller's avatar
Matthieu Schaller committed
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
  // h_err = H5Pset_chunk(h_prop, rank, chunk_shape);
  // if (h_err < 0) {
  //  error("Error while setting chunk size (%llu, %llu) for field '%s'.",
  //        chunk_shape[0], chunk_shape[1], props.name);
  //}

  /* Are we imposing some form of lossy compression filter? */
  // if (lossy_compression != compression_write_lossless)
  //  set_hdf5_lossy_compression(&h_prop, &h_type, lossy_compression,
  //  props.name);

  /* Impose check-sum to verify data corruption */
  // h_err = H5Pset_fletcher32(h_prop);
  // if (h_err < 0)
  //  error("Error while setting checksum options for field '%s'.", props.name);
446
447

  /* Create dataset */
Matthieu Schaller's avatar
Matthieu Schaller committed
448
449
  const hid_t h_data = H5Dcreate(grp, props.name, h_type, h_space, H5P_DEFAULT,
                                 h_prop, H5P_DEFAULT);
Matthieu Schaller's avatar
Matthieu Schaller committed
450
  if (h_data < 0) error("Error while creating dataspace '%s'.", props.name);
451
452

  /* Write unit conversion factors for this data set */
453
454
455
  char buffer[FIELD_BUFFER_SIZE] = {0};
  units_cgs_conversion_string(buffer, snapshot_units, props.units,
                              props.scale_factor_exponent);
456
457
458
459
460
461
462
  float baseUnitsExp[5];
  units_get_base_unit_exponents_array(baseUnitsExp, props.units);
  io_write_attribute_f(h_data, "U_M exponent", baseUnitsExp[UNIT_MASS]);
  io_write_attribute_f(h_data, "U_L exponent", baseUnitsExp[UNIT_LENGTH]);
  io_write_attribute_f(h_data, "U_t exponent", baseUnitsExp[UNIT_TIME]);
  io_write_attribute_f(h_data, "U_I exponent", baseUnitsExp[UNIT_CURRENT]);
  io_write_attribute_f(h_data, "U_T exponent", baseUnitsExp[UNIT_TEMPERATURE]);
463
464
  io_write_attribute_f(h_data, "h-scale exponent", 0.f);
  io_write_attribute_f(h_data, "a-scale exponent", props.scale_factor_exponent);
465
466
467
  io_write_attribute_s(h_data, "Expression for physical CGS units", buffer);

  /* Write the actual number this conversion factor corresponds to */
468
469
  const double factor =
      units_cgs_conversion_factor(snapshot_units, props.units);
470
471
472
473
474
475
  io_write_attribute_d(
      h_data,
      "Conversion factor to CGS (not including cosmological corrections)",
      factor);
  io_write_attribute_d(
      h_data,
476
      "Conversion factor to physical CGS (including cosmological corrections)",
477
478
479
480
481
482
483
484
485
      factor * pow(e->cosmology->a, props.scale_factor_exponent));

#ifdef SWIFT_DEBUG_CHECKS
  if (strlen(props.description) == 0)
    error("Invalid (empty) description of the field '%s'", props.name);
#endif

  /* Write the full description */
  io_write_attribute_s(h_data, "Description", props.description);
486
487

  /* Add a line to the XMF */
488
489
  if (xmfFile != NULL)
    xmf_write_line(xmfFile, fileName, partTypeGroupName, props.name, N_total,
Loic Hausammann's avatar
Loic Hausammann committed
490
                   props.dimension, props.type);
491
492

  /* Close everything */
Matthieu Schaller's avatar
Matthieu Schaller committed
493
494
  H5Tclose(h_type);
  H5Pclose(h_prop);
495
496
497
498
499
  H5Pclose(h_plist_id);
  H5Dclose(h_data);
  H5Sclose(h_space);
}

500
501
502
503
504
505
506
507
508
509
510
/**
 * @brief Writes a chunk of data in an open HDF5 dataset
 *
 * @param e The #engine we are writing from.
 * @param h_data The HDF5 dataset to write to.
 * @param props The #io_props of the field to write.
 * @param N The number of particles to write.
 * @param offset Offset in the array where this mpi task starts writing.
 * @param internal_units The #unit_system used internally.
 * @param snapshot_units The #unit_system used in the snapshots.
 */
511
512
513
514
515
void write_array_parallel_chunk(struct engine* e, hid_t h_data,
                                const struct io_props props, size_t N,
                                long long offset,
                                const struct unit_system* internal_units,
                                const struct unit_system* snapshot_units) {
516

517
  const size_t typeSize = io_sizeof_type(props.type);
518
519
  const size_t num_elements = N * props.dimension;

520
  /* Can't handle writes of more than 2GB */
521
  if (N * props.dimension * typeSize > HDF5_PARALLEL_IO_MAX_BYTES)
522
    error("Dataset too large to be written in one pass!");
523

Matthieu Schaller's avatar
Matthieu Schaller committed
524
  /* message("Writing '%s' array...", props.name); */
525
526

  /* Allocate temporary buffer */
527
  void* temp = NULL;
Peter W. Draper's avatar
Peter W. Draper committed
528
  if (swift_memalign("writebuff", (void**)&temp, IO_BUFFER_ALIGNMENT,
529
530
                     num_elements * typeSize) != 0)
    error("Unable to allocate temporary i/o buffer");
531

532
533
534
535
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
  ticks tic = getticks();
#endif
536

537
538
  /* Copy the particle data to the temporary buffer */
  io_copy_temp_buffer(temp, e, props, N, internal_units, snapshot_units);
539

540
541
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
542
543
544
  if (engine_rank == 0)
    message("Copying for '%s' took %.3f %s.", props.name,
            clocks_from_ticks(getticks() - tic), clocks_getunit());
545
#endif
546

547
  /* Create data space */
548
  const hid_t h_memspace = H5Screate(H5S_SIMPLE);
549
  if (h_memspace < 0)
550
551
    error("Error while creating data space (memory) for field '%s'.",
          props.name);
552

553
554
555
556
  int rank;
  hsize_t shape[2];
  hsize_t offsets[2];
  if (props.dimension > 1) {
557
558
    rank = 2;
    shape[0] = N;
559
    shape[1] = props.dimension;
560
561
562
563
564
565
566
567
568
569
    offsets[0] = offset;
    offsets[1] = 0;
  } else {
    rank = 1;
    shape[0] = N;
    shape[1] = 0;
    offsets[0] = offset;
    offsets[1] = 0;
  }

570
  /* Change shape of memory data space */
571
  hid_t h_err = H5Sset_extent_simple(h_memspace, rank, shape, NULL);
572
  if (h_err < 0)
573
    error("Error while changing data space (memory) shape for field '%s'.",
574
          props.name);
575

576
577
  /* Select the hyper-salb corresponding to this rank */
  hid_t h_filespace = H5Dget_space(h_data);
578
  if (N > 0)
Matthieu Schaller's avatar
Matthieu Schaller committed
579
580
    H5Sselect_hyperslab(h_filespace, H5S_SELECT_SET, offsets, NULL, shape,
                        NULL);
581
  else
582
    H5Sselect_none(h_filespace);
583

584
585
586
587
588
589
590
591
  /* message("Writing %lld '%s', %zd elements = %zd bytes (int=%d) at offset
   * %zd", N, props.name, N * props.dimension, N * props.dimension * typeSize,
   */
  /* 	  (int)(N * props.dimension * typeSize), offset); */

  /* Make a dataset creation property list and set MPI-I/O mode */
  hid_t h_plist_id = H5Pcreate(H5P_DATASET_XFER);
  H5Pset_dxpl_mpio(h_plist_id, H5FD_MPIO_COLLECTIVE);
592

593
594
595
596
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
  tic = getticks();
#endif
597

598
599
  /* Write temporary buffer to HDF5 dataspace */
  h_err = H5Dwrite(h_data, io_hdf5_type(props.type), h_memspace, h_filespace,
600
                   h_plist_id, temp);
601
  if (h_err < 0) error("Error while writing data array '%s'.", props.name);
602

603
604
605
606
607
608
609
610
611
612
613
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
  ticks toc = getticks();
  float ms = clocks_from_ticks(toc - tic);
  int megaBytes = N * props.dimension * typeSize / (1024 * 1024);
  int total = 0;
  MPI_Reduce(&megaBytes, &total, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
  if (engine_rank == 0)
    message("H5Dwrite for '%s' (%d MB) took %.3f %s (speed = %f MB/s).",
            props.name, total, ms, clocks_getunit(), total / (ms / 1000.));
#endif
614

615
  /* Free and close everything */
Peter W. Draper's avatar
Peter W. Draper committed
616
  swift_free("writebuff", temp);
617
  H5Pclose(h_plist_id);
618
619
620
  H5Sclose(h_memspace);
  H5Sclose(h_filespace);
}
621

622
623
624
625
626
/**
 * @brief Writes a data array in given HDF5 group.
 *
 * @param e The #engine we are writing from.
 * @param grp The group in which to write.
627
 * @param fileName The name of the file in which the data is written.
628
629
630
631
 * @param partTypeGroupName The name of the group containing the particles in
 * the HDF5 file.
 * @param props The #io_props of the field to read
 * @param N The number of particles to write.
632
633
634
635
636
 * @param N_total Total number of particles across all cores.
 * @param mpi_rank The rank of this node.
 * @param offset Offset in the array where this mpi task starts writing.
 * @param internal_units The #unit_system used internally.
 * @param snapshot_units The #unit_system used in the snapshots.
637
 */
638
639
640
641
642
643
void write_array_parallel(struct engine* e, hid_t grp, char* fileName,
                          char* partTypeGroupName, struct io_props props,
                          size_t N, long long N_total, int mpi_rank,
                          long long offset,
                          const struct unit_system* internal_units,
                          const struct unit_system* snapshot_units) {
644
645

  const size_t typeSize = io_sizeof_type(props.type);
646
647
648
649

#ifdef IO_SPEED_MEASUREMENT
  const ticks tic = getticks();
#endif
650

651
652
  /* Open dataset */
  const hid_t h_data = H5Dopen(grp, props.name, H5P_DEFAULT);
Matthieu Schaller's avatar
Matthieu Schaller committed
653
  if (h_data < 0) error("Error while opening dataset '%s'.", props.name);
654

655
656
657
  /* Given the limitations of ROM-IO we will need to write the data in chunk of
     HDF5_PARALLEL_IO_MAX_BYTES bytes per node until all the nodes are done. */
  char redo = 1;
658
659
660
661
662
663
  while (redo) {

    /* Maximal number of elements */
    const size_t max_chunk_size =
        HDF5_PARALLEL_IO_MAX_BYTES / (props.dimension * typeSize);

664
    /* Write the first chunk */
665
    const size_t this_chunk = (N > max_chunk_size) ? max_chunk_size : N;
666
667
    write_array_parallel_chunk(e, h_data, props, this_chunk, offset,
                               internal_units, snapshot_units);
668
669

    /* Compute how many items are left */
670
    if (N > max_chunk_size) {
671
      N -= max_chunk_size;
672
673
      props.field += max_chunk_size * props.partSize; /* char* on the field */
      props.parts += max_chunk_size;                  /* part* on the part */
674
675
      props.xparts += max_chunk_size;                 /* xpart* on the xpart */
      props.gparts += max_chunk_size;                 /* gpart* on the gpart */
676
677
      props.sparts += max_chunk_size;                 /* spart* on the spart */
      props.bparts += max_chunk_size;                 /* bpart* on the bpart */
678
679
      offset += max_chunk_size;
      redo = 1;
680
    } else {
681
682
683
684
685
686
      N = 0;
      offset += 0;
      redo = 0;
    }

    /* Do we need to run again ? */
687
688
    MPI_Allreduce(MPI_IN_PLACE, &redo, 1, MPI_SIGNED_CHAR, MPI_MAX,
                  MPI_COMM_WORLD);
689

690
    if (redo && e->verbose && mpi_rank == 0)
691
      message("Need to redo one iteration for array '%s'", props.name);
692
  }
693

694
  /* Close everything */
695
  H5Dclose(h_data);
696

697
698
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
699
700
701
  if (engine_rank == 0)
    message("'%s' took %.3f %s.", props.name,
            clocks_from_ticks(getticks() - tic), clocks_getunit());
702
#endif
703
704
}

705
706
707
708
/**
 * @brief Reads an HDF5 initial condition file (GADGET-3 type) in parallel
 *
 * @param fileName The file to read.
709
 * @param internal_units The system units used internally
710
711
 * @param dim (output) The dimension of the volume read from the file.
 * @param parts (output) The array of #part read from the file.
712
 * @param gparts (output) The array of #gpart read from the file.
Loic Hausammann's avatar
Loic Hausammann committed
713
 * @param sinks (output) The array of #sink read from the file.
714
 * @param sparts (output) The array of #spart read from the file.
715
 * @param bparts (output) The array of #bpart read from the file.
716
717
 * @param Ngas (output) The number of particles read from the file.
 * @param Ngparts (output) The number of particles read from the file.
718
719
 * @param Ngparts_background (output) The number of background DM particles read
 * from the file.
Loic Hausammann's avatar
Loic Hausammann committed
720
 * @param Nsink (output) The number of particles read from the file.
721
 * @param Nstars (output) The number of particles read from the file.
722
 * @param Nblackholes (output) The number of particles read from the file.
723
 * @param flag_entropy (output) 1 if the ICs contained Entropy in the
724
 * InternalEnergy field
725
726
 * @param with_hydro Are we running with hydro ?
 * @param with_gravity Are we running with gravity ?
Loic Hausammann's avatar
Loic Hausammann committed
727
 * @param with_sink Are we running with sink ?
728
 * @param with_stars Are we running with stars ?
729
 * @param with_black_holes Are we running with black holes ?
Josh Borrow's avatar
Josh Borrow committed
730
 * @param with_cosmology Are we running with cosmology ?
731
 * @param cleanup_h Are we cleaning-up h-factors from the quantities we read?
732
733
 * @param cleanup_sqrt_a Are we cleaning-up the sqrt(a) factors in the Gadget
 * IC velocities?
734
 * @param h The value of the reduced Hubble constant to use for correction.
735
 * @param a The current value of the scale-factor.
736
737
738
739
 * @param mpi_rank The MPI rank of this node
 * @param mpi_size The number of MPI ranks
 * @param comm The MPI communicator
 * @param info The MPI information object
740
 * @param n_threads The number of threads to use for local operations.
741
 * @param dry_run If 1, don't read the particle. Only allocates the arrays.
742
743
 *
 */
744
void read_ic_parallel(char* fileName, const struct unit_system* internal_units,
745
                      double dim[3], struct part** parts, struct gpart** gparts,
Loic Hausammann's avatar
Loic Hausammann committed
746
747
748
                      struct sink** sinks, struct spart** sparts,
                      struct bpart** bparts, size_t* Ngas, size_t* Ngparts,
                      size_t* Ngparts_background, size_t* Nsinks,
Matthieu Schaller's avatar
Matthieu Schaller committed
749
                      size_t* Nstars, size_t* Nblackholes, int* flag_entropy,
Loic Hausammann's avatar
Loic Hausammann committed
750
751
752
753
                      int with_hydro, int with_gravity, int with_sink,
                      int with_stars, int with_black_holes, int with_cosmology,
                      int cleanup_h, int cleanup_sqrt_a, double h, double a,
                      int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info,
754
                      int n_threads, int dry_run, int remap_ids) {
755

756
  hid_t h_file = 0, h_grp = 0;
757
  /* GADGET has only cubic boxes (in cosmological mode) */
Matthieu Schaller's avatar
Matthieu Schaller committed
758
  double boxSize[3] = {0.0, -1.0, -1.0};
759
760
761
762
763
  long long numParticles[swift_type_count] = {0};
  long long numParticles_highWord[swift_type_count] = {0};
  size_t N[swift_type_count] = {0};
  long long N_total[swift_type_count] = {0};
  long long offset[swift_type_count] = {0};
764
  int dimension = 3; /* Assume 3D if nothing is specified */
765
  size_t Ndm = 0;
766
  size_t Ndm_background = 0;
767

768
  /* Initialise counters */
Matthieu Schaller's avatar
Matthieu Schaller committed
769
  *Ngas = 0, *Ngparts = 0, *Ngparts_background = 0, *Nstars = 0,
Loic Hausammann's avatar
Loic Hausammann committed
770
  *Nblackholes = 0, *Nsinks = 0;
771

772
773
774
775
776
  /* Open file */
  /* message("Opening file '%s' as IC.", fileName); */
  hid_t h_plist_id = H5Pcreate(H5P_FILE_ACCESS);
  H5Pset_fapl_mpio(h_plist_id, comm, info);
  h_file = H5Fopen(fileName, H5F_ACC_RDONLY, h_plist_id);
777
  if (h_file < 0) error("Error while opening file '%s'.", fileName);
778
779
780

  /* Open header to read simulation properties */
  /* message("Reading file header..."); */
781
  h_grp = H5Gopen(h_file, "/Header", H5P_DEFAULT);
782
783
  if (h_grp < 0) error("Error while opening file header\n");

784
785
786
787
  /* Check the dimensionality of the ICs (if the info exists) */
  const hid_t hid_dim = H5Aexists(h_grp, "Dimension");
  if (hid_dim < 0)
    error("Error while testing existance of 'Dimension' attribute");
788
  if (hid_dim > 0) io_read_attribute(h_grp, "Dimension", INT, &dimension);
789
790
791
792
  if (dimension != hydro_dimension)
    error("ICs dimensionality (%dD) does not match code dimensionality (%dD)",
          dimension, (int)hydro_dimension);

793
794
  /* Check whether the number of files is specified (if the info exists) */
  const hid_t hid_files = H5Aexists(h_grp, "NumFilesPerSnapshot");
795
  int num_files = 1;
796
797
798
799
800
801
802
803
804
805
806
807
  if (hid_files < 0)
    error(
        "Error while testing the existance of 'NumFilesPerSnapshot' attribute");
  if (hid_files > 0)
    io_read_attribute(h_grp, "NumFilesPerSnapshot", INT, &num_files);
  if (num_files != 1)
    error(
        "ICs are split over multiples files (%d). SWIFT cannot handle this "
        "case. The script /tools/combine_ics.py is availalbe in the repository "
        "to combine files into a valid input file.",
        num_files);

808
  /* Read the relevant information and print status */
809
  int flag_entropy_temp[6];
810
  io_read_attribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp);
811
  *flag_entropy = flag_entropy_temp[0];
812
813
814
815
  io_read_attribute(h_grp, "BoxSize", DOUBLE, boxSize);
  io_read_attribute(h_grp, "NumPart_Total", LONGLONG, numParticles);
  io_read_attribute(h_grp, "NumPart_Total_HighWord", LONGLONG,
                    numParticles_highWord);
816

Josh Borrow's avatar
Josh Borrow committed
817
818
819
820
821
822
823
  /* Check that the user is not doing something silly when they e.g. restart
   * from a snapshot by asserting that the current scale-factor (from
   * parameter file) and the redshift in the header are consistent */
  if (with_cosmology) {
    io_assert_valid_header_cosmology(h_grp, a);
  }

824
  for (int ptype = 0; ptype < swift_type_count; ++ptype)
Matthieu Schaller's avatar
Matthieu Schaller committed
825
826
    N_total[ptype] =
        (numParticles[ptype]) + (numParticles_highWord[ptype] << 32);
827

828
  /* Get the box size if not cubic */
829
830
831
832
  dim[0] = boxSize[0];
  dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1];
  dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2];

833
834
835
836
837
838
  /* Change box size in the 1D and 2D case */
  if (hydro_dimension == 2)
    dim[2] = min(dim[0], dim[1]);
  else if (hydro_dimension == 1)
    dim[2] = dim[1] = dim[0];

839
840
841
842
843
844
845
  /* Convert the box size if we want to clean-up h-factors */
  if (cleanup_h) {
    dim[0] /= h;
    dim[1] /= h;
    dim[2] /= h;
  }

846
  /* message("Found %lld particles in a %speriodic box of size [%f %f %f].", */
847
  /* 	  N_total[0], (periodic ? "": "non-"), dim[0], dim[1], dim[2]); */
848
849

  /* Divide the particles among the tasks. */
850
  for (int ptype = 0; ptype < swift_type_count; ++ptype) {
851
852
853
    offset[ptype] = mpi_rank * N_total[ptype] / mpi_size;
    N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype];
  }
854
855
856
857

  /* Close header */
  H5Gclose(h_grp);

858
  /* Read the unit system used in the ICs */
Matthieu Schaller's avatar
Matthieu Schaller committed
859
860
  struct unit_system* ic_units =
      (struct unit_system*)malloc(sizeof(struct unit_system));
861
  if (ic_units == NULL) error("Unable to allocate memory for IC unit system");
862
  io_read_unit_system(h_file, ic_units, internal_units, mpi_rank);
863
864
865
866
867

  /* Tell the user if a conversion will be needed */
  if (mpi_rank == 0) {
    if (units_are_equal(ic_units, internal_units)) {

Matthieu Schaller's avatar
Matthieu Schaller committed
868
      message("IC and internal units match. No conversion needed.");
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894

    } else {

      message("Conversion needed from:");
      message("(ICs) Unit system: U_M =      %e g.", ic_units->UnitMass_in_cgs);
      message("(ICs) Unit system: U_L =      %e cm.",
              ic_units->UnitLength_in_cgs);
      message("(ICs) Unit system: U_t =      %e s.", ic_units->UnitTime_in_cgs);
      message("(ICs) Unit system: U_I =      %e A.",
              ic_units->UnitCurrent_in_cgs);
      message("(ICs) Unit system: U_T =      %e K.",
              ic_units->UnitTemperature_in_cgs);
      message("to:");
      message("(internal) Unit system: U_M = %e g.",
              internal_units->UnitMass_in_cgs);
      message("(internal) Unit system: U_L = %e cm.",
              internal_units->UnitLength_in_cgs);
      message("(internal) Unit system: U_t = %e s.",
              internal_units->UnitTime_in_cgs);
      message("(internal) Unit system: U_I = %e A.",
              internal_units->UnitCurrent_in_cgs);
      message("(internal) Unit system: U_T = %e K.",
              internal_units->UnitTemperature_in_cgs);
    }
  }

895
  /* Convert the dimensions of the box */
Matthieu Schaller's avatar
Matthieu Schaller committed
896
897
898
  for (int j = 0; j < 3; j++)
    dim[j] *=
        units_conversion_factor(ic_units, internal_units, UNIT_CONV_LENGTH);
899

900
  /* Allocate memory to store SPH particles */
901
902
  if (with_hydro) {
    *Ngas = N[0];
903
    if (swift_memalign("parts", (void**)parts, part_align,
Matthieu Schaller's avatar
Matthieu Schaller committed
904
                       (*Ngas) * sizeof(struct part)) != 0)
905
906
907
908
      error("Error while allocating memory for particles");
    bzero(*parts, *Ngas * sizeof(struct part));
  }

Loic Hausammann's avatar
Loic Hausammann committed
909
910
911
912
913
914
915
916
917
  /* Allocate memory to store black hole particles */
  if (with_sink) {
    *Nsinks = N[swift_type_sink];
    if (swift_memalign("sinks", (void**)sinks, sink_align,
                       *Nsinks * sizeof(struct sink)) != 0)
      error("Error while allocating memory for sink particles");
    bzero(*sinks, *Nsinks * sizeof(struct sink));
  }

918
  /* Allocate memory to store stars particles */
919
  if (with_stars) {
920
    *Nstars = N[swift_type_stars];
921
    if (swift_memalign("sparts", (void**)sparts, spart_align,
922
                       *Nstars * sizeof(struct spart)) != 0)
923
      error("Error while allocating memory for stars particles");
924
925
926
    bzero(*sparts, *Nstars * sizeof(struct spart));
  }

927
928
929
  /* Allocate memory to store black hole particles */
  if (with_black_holes) {
    *Nblackholes = N[swift_type_black_hole];
Loic Hausammann's avatar
Loic Hausammann committed
930
    if (swift_memalign("bparts", (void**)bparts, bpart_align,
931
932
933
934
                       *Nblackholes * sizeof(struct bpart)) != 0)
      error("Error while allocating memory for black_holes particles");
    bzero(*bparts, *Nblackholes * sizeof(struct bpart));
  }
935

936
937
  /* Allocate memory to store gravity particles */
  if (with_gravity) {
938
939
    Ndm = N[swift_type_dark_matter];
    Ndm_background = N[swift_type_dark_matter_background];
940
941
    *Ngparts = (with_hydro ? N[swift_type_gas] : 0) +
               N[swift_type_dark_matter] +
942
               N[swift_type_dark_matter_background] +
943
               (with_stars ? N[swift_type_stars] : 0) +
Loic Hausammann's avatar
Loic Hausammann committed
944
               (with_sink ? N[swift_type_sink] : 0) +
945
               (with_black_holes ? N[swift_type_black_hole] : 0);
946
    *Ngparts_background = Ndm_background;
947
    if (swift_memalign("gparts", (void**)gparts, gpart_align,
Matthieu Schaller's avatar
Matthieu Schaller committed
948
                       *Ngparts * sizeof(struct gpart)) != 0)
949
950
951
952
953
      error("Error while allocating memory for gravity particles");
    bzero(*gparts, *Ngparts * sizeof(struct gpart));
  }

  /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) /
954
955
   * (1024.*1024.)); */

956
  /* message("BoxSize = %lf", dim[0]); */
957
  /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */
958
959

  /* Loop over all particle types */
960
  for (int ptype = 0; ptype < swift_type_count; ptype++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
961

962
963
    /* Don't do anything if no particle of this kind */
    if (N_total[ptype] == 0) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
964

965
966
967
    /* Open the particle group in the file */
    char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
    snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d",
Matthieu Schaller's avatar
Matthieu Schaller committed
968
             ptype);
969
    h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT);
970
    if (h_grp < 0)
971
      error("Error while opening particle group %s.", partTypeGroupName);
Matthieu Schaller's avatar
Matthieu Schaller committed
972

973
974
    int num_fields = 0;
    struct io_props list[100];
975
    size_t Nparticles = 0;
976

977
978
    /* Read particle fields into the particle structure */
    switch (ptype) {
Matthieu Schaller's avatar
Matthieu Schaller committed
979

980
      case swift_type_gas:
Matthieu Schaller's avatar
Matthieu Schaller committed
981
982
983
        if (with_hydro) {
          Nparticles = *Ngas;
          hydro_read_particles(*parts, list, &num_fields);
984
          num_fields += chemistry_read_particles(*parts, list + num_fields);
Matthieu Schaller's avatar
Matthieu Schaller committed
985
        }
Matthieu Schaller's avatar
Matthieu Schaller committed
986
987
        break;

988
      case swift_type_dark_matter:
Matthieu Schaller's avatar
Matthieu Schaller committed
989
990
991
992
        if (with_gravity) {
          Nparticles = Ndm;
          darkmatter_read_particles(*gparts, list, &num_fields);
        }
Matthieu Schaller's avatar
Matthieu Schaller committed
993
994
        break;

995
996
997
998
999
1000
1001
      case swift_type_dark_matter_background:
        if (with_gravity) {
          Nparticles = Ndm_background;
          darkmatter_read_particles(*gparts + Ndm, list, &num_fields);
        }
        break;

Loic Hausammann's avatar
Loic Hausammann committed
1002
1003
1004
1005
1006
1007
1008
      case swift_type_sink:
        if (with_sink) {
          Nparticles = *Nsinks;
          sink_read_particles(*sinks, list, &num_fields);
        }
        break;

1009
      case swift_type_stars:
1010
        if (with_stars) {
Matthieu Schaller's avatar
Matthieu Schaller committed
1011
          Nparticles = *Nstars;
1012
          stars_read_particles(*sparts, list, &num_fields);
Loic Hausammann's avatar
Fix API    
Loic Hausammann committed
1013
1014
          num_fields +=
              star_formation_read_particles(*sparts, list + num_fields);
Matthieu Schaller's avatar
Matthieu Schaller committed
1015
1016
        }
        break;