parallel_io.c 38.1 KB
Newer Older
1
2
/*******************************************************************************
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk).
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
20
21
22
23
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

24
#if defined(HAVE_HDF5) && defined(WITH_MPI) && defined(HAVE_PARALLEL_HDF5)
25
26
27
28
29

/* Some standard headers. */
#include <hdf5.h>
#include <math.h>
#include <mpi.h>
30
31
32
33
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
34

35
36
37
38
/* This object's header. */
#include "parallel_io.h"

/* Local includes. */
39
#include "common_io.h"
40
#include "dimension.h"
41
#include "engine.h"
42
#include "error.h"
43
#include "gravity_io.h"
44
#include "gravity_properties.h"
45
#include "hydro_io.h"
46
#include "hydro_properties.h"
47
#include "io_properties.h"
48
49
#include "kernel_hydro.h"
#include "part.h"
50
#include "stars_io.h"
51
#include "units.h"
52
#include "xmf.h"
53

54
55
56
/* Are we timing the i/o? */
//#define IO_SPEED_MEASUREMENT

57
/* The current limit of ROMIO (the underlying MPI-IO layer) is 2GB */
58
#define HDF5_PARALLEL_IO_MAX_BYTES 2000000000LL
59

60
/**
61
 * @brief Reads a chunk of data from an open HDF5 dataset
62
 *
63
64
65
66
67
 * @param h_data The HDF5 dataset to write to.
 * @param h_plist_id the parallel HDF5 properties.
 * @param props The #io_props of the field to read.
 * @param N The number of particles to write.
 * @param offset Offset in the array where this mpi task starts writing.
68
 * @param internal_units The #unit_system used internally.
69
 * @param ic_units The #unit_system used in the snapshots.
70
 */
71
void readArray_chunk(hid_t h_data, hid_t h_plist_id,
72
73
74
                     const struct io_props props, size_t N, long long offset,
                     const struct unit_system* internal_units,
                     const struct unit_system* ic_units) {
75

76
77
78
  const size_t typeSize = io_sizeof_type(props.type);
  const size_t copySize = typeSize * props.dimension;
  const size_t num_elements = N * props.dimension;
79

80
81
82
  /* Can't handle writes of more than 2GB */
  if (N * props.dimension * typeSize > HDF5_PARALLEL_IO_MAX_BYTES)
    error("Dataset too large to be written in one pass!");
83

84
  /* Allocate temporary buffer */
85
  void* temp = malloc(num_elements * typeSize);
86
  if (temp == NULL) error("Unable to allocate memory for temporary buffer");
87

88
  /* Prepare information for hyper-slab */
89
90
  hsize_t shape[2], offsets[2];
  int rank;
91
  if (props.dimension > 1) {
92
93
    rank = 2;
    shape[0] = N;
94
    shape[1] = props.dimension;
95
96
97
    offsets[0] = offset;
    offsets[1] = 0;
  } else {
98
    rank = 2;
99
    shape[0] = N;
100
    shape[1] = 1;
101
102
103
    offsets[0] = offset;
    offsets[1] = 0;
  }
104
105

  /* Create data space in memory */
106
  const hid_t h_memspace = H5Screate_simple(rank, shape, NULL);
107

108
  /* Select hyper-slab in file */
109
  const hid_t h_filespace = H5Dget_space(h_data);
110
111
  H5Sselect_hyperslab(h_filespace, H5S_SELECT_SET, offsets, NULL, shape, NULL);

112
113
114
  /* Read HDF5 dataspace in temporary buffer */
  /* Dirty version that happens to work for vectors but should be improved */
  /* Using HDF5 dataspaces would be better */
115
  const hid_t h_err = H5Dread(h_data, io_hdf5_type(props.type), h_memspace,
116
                              h_filespace, h_plist_id, temp);
117
  if (h_err < 0) {
118
    error("Error while reading data array '%s'.", props.name);
119
120
121
122
  }

  /* Unit conversion if necessary */
  const double factor =
123
124
      units_conversion_factor(ic_units, internal_units, props.units);
  if (factor != 1.) {
125

Matthieu Schaller's avatar
Matthieu Schaller committed
126
    /* message("Converting ! factor=%e", factor); */
127

128
    if (io_is_double_precision(props.type)) {
129
130
131
132
133
134
      double* temp_d = temp;
      for (size_t i = 0; i < num_elements; ++i) temp_d[i] *= factor;
    } else {
      float* temp_f = temp;
      for (size_t i = 0; i < num_elements; ++i) temp_f[i] *= factor;
    }
135
  }
136
137

  /* Copy temporary buffer to particle data */
138
139
  char* temp_c = temp;
  for (size_t i = 0; i < N; ++i)
140
    memcpy(props.field + i * props.partSize, &temp_c[i * copySize], copySize);
141

142
143
  /* Free and close everything */
  free(temp);
144
145
  H5Sclose(h_filespace);
  H5Sclose(h_memspace);
146
147
148
149
150
151
}

/**
 * @brief Reads a data array from a given HDF5 group.
 *
 * @param grp The group from which to read.
Matthieu Schaller's avatar
Matthieu Schaller committed
152
 * @param props The #io_props of the field to read.
153
154
155
156
157
158
159
 * @param N The number of particles on that rank.
 * @param N_total The total number of particles.
 * @param mpi_rank The MPI rank of this node.
 * @param offset The offset in the array on disk for this rank.
 * @param internal_units The #unit_system used internally.
 * @param ic_units The #unit_system used in the ICs.
 */
160
161
void readArray(hid_t grp, struct io_props props, size_t N, long long N_total,
               int mpi_rank, long long offset,
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
               const struct unit_system* internal_units,
               const struct unit_system* ic_units) {

  const size_t typeSize = io_sizeof_type(props.type);
  const size_t copySize = typeSize * props.dimension;

  /* Check whether the dataspace exists or not */
  const htri_t exist = H5Lexists(grp, props.name, 0);
  if (exist < 0) {
    error("Error while checking the existence of data set '%s'.", props.name);
  } else if (exist == 0) {
    if (props.importance == COMPULSORY) {
      error("Compulsory data set '%s' not present in the file.", props.name);
    } else {
      for (size_t i = 0; i < N; ++i)
        memset(props.field + i * props.partSize, 0, copySize);
      return;
    }
  }

  /* Open data space in file */
  const hid_t h_data = H5Dopen2(grp, props.name, H5P_DEFAULT);
  if (h_data < 0) error("Error while opening data space '%s'.", props.name);

  /* Check data type */
  const hid_t h_type = H5Dget_type(h_data);
  if (h_type < 0) error("Unable to retrieve data type from the file");
  /* if (!H5Tequal(h_type, hdf5_type(type))) */
  /*   error("Non-matching types between the code and the file"); */

  /* Create property list for collective dataset read. */
  const hid_t h_plist_id = H5Pcreate(H5P_DATASET_XFER);
  H5Pset_dxpl_mpio(h_plist_id, H5FD_MPIO_COLLECTIVE);

  /* Given the limitations of ROM-IO we will need to read the data in chunk of
     HDF5_PARALLEL_IO_MAX_BYTES bytes per node until all the nodes are done. */
  char redo = 1;
  while (redo) {

    /* Maximal number of elements */
    const size_t max_chunk_size =
203
        HDF5_PARALLEL_IO_MAX_BYTES / (props.dimension * typeSize);
204
205
206
207

    /* Write the first chunk */
    const size_t this_chunk = (N > max_chunk_size) ? max_chunk_size : N;
    readArray_chunk(h_data, h_plist_id, props, this_chunk, offset,
208
                    internal_units, ic_units);
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231

    /* Compute how many items are left */
    if (N > max_chunk_size) {
      N -= max_chunk_size;
      props.field += max_chunk_size * props.partSize;
      offset += max_chunk_size;
      redo = 1;
    } else {
      N = 0;
      offset += 0;
      redo = 0;
    }

    /* Do we need to run again ? */
    MPI_Allreduce(MPI_IN_PLACE, &redo, 1, MPI_SIGNED_CHAR, MPI_MAX,
                  MPI_COMM_WORLD);

    if (redo && mpi_rank == 0)
      message("Need to redo one iteration for array '%s'", props.name);
  }

  /* Close everything */
  H5Pclose(h_plist_id);
232
233
234
235
236
237
238
239
240
  H5Tclose(h_type);
  H5Dclose(h_data);
}

/*-----------------------------------------------------------------------------
 * Routines writing an output file
 *-----------------------------------------------------------------------------*/

/**
241
 * @brief Writes a chunk of data in an open HDF5 dataset
242
 *
243
 * @param e The #engine we are writing from.
244
 * @param h_data The HDF5 dataset to write to.
245
 * @param h_plist_id the parallel HDF5 properties.
246
 * @param props The #io_props of the field to read.
247
 * @param N The number of particles to write.
248
249
250
 * @param offset Offset in the array where this mpi task starts writing.
 * @param internal_units The #unit_system used internally.
 * @param snapshot_units The #unit_system used in the snapshots.
251
 */
252
253
254
255
256
void writeArray_chunk(struct engine* e, hid_t h_data, hid_t h_plist_id,
                      const struct io_props props, size_t N, long long offset,
                      const struct unit_system* internal_units,
                      const struct unit_system* snapshot_units) {

257
  const size_t typeSize = io_sizeof_type(props.type);
258
259
  const size_t num_elements = N * props.dimension;

260
  /* Can't handle writes of more than 2GB */
261
  if (N * props.dimension * typeSize > HDF5_PARALLEL_IO_MAX_BYTES)
262
    error("Dataset too large to be written in one pass!");
263

Matthieu Schaller's avatar
Matthieu Schaller committed
264
  /* message("Writing '%s' array...", props.name); */
265
266

  /* Allocate temporary buffer */
267
  void* temp = malloc(num_elements * typeSize);
268
  if (posix_memalign((void**)&temp, IO_BUFFER_ALIGNMENT,
269
270
                     num_elements * typeSize) != 0)
    error("Unable to allocate temporary i/o buffer");
271

272
273
274
275
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
  ticks tic = getticks();
#endif
276

277
278
  /* Copy the particle data to the temporary buffer */
  io_copy_temp_buffer(temp, e, props, N, internal_units, snapshot_units);
279

280
281
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
Matthieu Schaller's avatar
Matthieu Schaller committed
282
283
284
  if (engine_rank == 0)
    message("Copying for '%s' took %.3f %s.", props.name,
            clocks_from_ticks(getticks() - tic), clocks_getunit());
285
#endif
286

287
  /* Create data space */
288
  const hid_t h_memspace = H5Screate(H5S_SIMPLE);
289
  if (h_memspace < 0) {
290
291
    error("Error while creating data space (memory) for field '%s'.",
          props.name);
292
  }
293

294
295
296
297
  int rank;
  hsize_t shape[2];
  hsize_t offsets[2];
  if (props.dimension > 1) {
298
299
    rank = 2;
    shape[0] = N;
300
    shape[1] = props.dimension;
301
302
303
304
305
306
307
308
309
310
    offsets[0] = offset;
    offsets[1] = 0;
  } else {
    rank = 1;
    shape[0] = N;
    shape[1] = 0;
    offsets[0] = offset;
    offsets[1] = 0;
  }

311
  /* Change shape of memory data space */
312
  hid_t h_err = H5Sset_extent_simple(h_memspace, rank, shape, NULL);
313
314
  if (h_err < 0) {
    error("Error while changing data space (memory) shape for field '%s'.",
315
          props.name);
316
  }
317

318
319
  /* Select the hyper-salb corresponding to this rank */
  hid_t h_filespace = H5Dget_space(h_data);
Matthieu Schaller's avatar
Matthieu Schaller committed
320
321
322
  if (N > 0) {
    H5Sselect_hyperslab(h_filespace, H5S_SELECT_SET, offsets, NULL, shape,
                        NULL);
323
324
325
  } else {
    H5Sselect_none(h_filespace);
  }
326

Matthieu Schaller's avatar
Matthieu Schaller committed
327
328
329
/* message("Writing %lld '%s', %zd elements = %zd bytes (int=%d) at offset
 * %zd", N, props.name, N * props.dimension, N * props.dimension * typeSize, */
/* 	  (int)(N * props.dimension * typeSize), offset); */
330

331
332
333
334
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
  tic = getticks();
#endif
335

336
337
  /* Write temporary buffer to HDF5 dataspace */
  h_err = H5Dwrite(h_data, io_hdf5_type(props.type), h_memspace, h_filespace,
Matthieu Schaller's avatar
Matthieu Schaller committed
338
                   h_plist_id, temp);
339
340
341
342
  if (h_err < 0) {
    error("Error while writing data array '%s'.", props.name);
  }

343
344
345
346
347
348
349
350
351
352
353
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
  ticks toc = getticks();
  float ms = clocks_from_ticks(toc - tic);
  int megaBytes = N * props.dimension * typeSize / (1024 * 1024);
  int total = 0;
  MPI_Reduce(&megaBytes, &total, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
  if (engine_rank == 0)
    message("H5Dwrite for '%s' (%d MB) took %.3f %s (speed = %f MB/s).",
            props.name, total, ms, clocks_getunit(), total / (ms / 1000.));
#endif
354

355
356
357
358
359
  /* Free and close everything */
  free(temp);
  H5Sclose(h_memspace);
  H5Sclose(h_filespace);
}
360

361
362
363
364
365
/**
 * @brief Writes a data array in given HDF5 group.
 *
 * @param e The #engine we are writing from.
 * @param grp The group in which to write.
366
367
 * @param fileName The name of the file in which the data is written.
 * @param xmfFile The FILE used to write the XMF description.
368
369
370
371
 * @param partTypeGroupName The name of the group containing the particles in
 * the HDF5 file.
 * @param props The #io_props of the field to read
 * @param N The number of particles to write.
372
373
374
375
376
 * @param N_total Total number of particles across all cores.
 * @param mpi_rank The rank of this node.
 * @param offset Offset in the array where this mpi task starts writing.
 * @param internal_units The #unit_system used internally.
 * @param snapshot_units The #unit_system used in the snapshots.
377
 */
378
void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile,
379
                char* partTypeGroupName, struct io_props props, size_t N,
380
381
382
383
384
                long long N_total, int mpi_rank, long long offset,
                const struct unit_system* internal_units,
                const struct unit_system* snapshot_units) {

  const size_t typeSize = io_sizeof_type(props.type);
385
386
387
388

#ifdef IO_SPEED_MEASUREMENT
  const ticks tic = getticks();
#endif
389
390
391
392

  /* Work out properties of the array in the file */
  int rank;
  hsize_t shape_total[2];
393
  hsize_t chunk_shape[2];
394
395
396
397
  if (props.dimension > 1) {
    rank = 2;
    shape_total[0] = N_total;
    shape_total[1] = props.dimension;
398
399
    chunk_shape[0] = 1 << 16; /* Just a guess...*/
    chunk_shape[1] = props.dimension;
400
401
402
403
  } else {
    rank = 1;
    shape_total[0] = N_total;
    shape_total[1] = 0;
404
405
    chunk_shape[0] = 1 << 16; /* Just a guess...*/
    chunk_shape[1] = 0;
406
407
  }

408
  /* Make sure the chunks are not larger than the dataset */
lhausamm's avatar
lhausamm committed
409
  if (chunk_shape[0] > (hsize_t)N_total) chunk_shape[0] = N_total;
410
411

  /* Create the space in the file */
412
413
414
415
  hid_t h_filespace = H5Screate(H5S_SIMPLE);
  if (h_filespace < 0) {
    error("Error while creating data space (file) for field '%s'.", props.name);
  }
416

417
  /* Change shape of file data space */
418
  hid_t h_err = H5Sset_extent_simple(h_filespace, rank, shape_total, NULL);
419
  if (h_err < 0) {
420
421
    error("Error while changing data space (file) shape for field '%s'.",
          props.name);
422
423
  }

424
425
426
427
  /* Dataset properties */
  const hid_t h_prop = H5Pcreate(H5P_DATASET_CREATE);

  /* Set chunk size */
428
429
430
431
432
  /* h_err = H5Pset_chunk(h_prop, rank, chunk_shape); */
  /* if (h_err < 0) { */
  /*   error("Error while setting chunk size (%llu, %llu) for field '%s'.", */
  /*         chunk_shape[0], chunk_shape[1], props.name); */
  /* } */
433

434
  /* Create dataset */
435
436
  const hid_t h_data = H5Dcreate(grp, props.name, io_hdf5_type(props.type),
                                 h_filespace, H5P_DEFAULT, h_prop, H5P_DEFAULT);
437
  if (h_data < 0) {
438
    error("Error while creating dataset '%s'.", props.name);
439
440
  }

441
442
443
  H5Sclose(h_filespace);

  /* Create property list for collective dataset write.    */
444
  const hid_t h_plist_id = H5Pcreate(H5P_DATASET_XFER);
445
446
  H5Pset_dxpl_mpio(h_plist_id, H5FD_MPIO_COLLECTIVE);

447
448
449
  /* Given the limitations of ROM-IO we will need to write the data in chunk of
     HDF5_PARALLEL_IO_MAX_BYTES bytes per node until all the nodes are done. */
  char redo = 1;
450
451
452
453
454
455
  while (redo) {

    /* Maximal number of elements */
    const size_t max_chunk_size =
        HDF5_PARALLEL_IO_MAX_BYTES / (props.dimension * typeSize);

456
    /* Write the first chunk */
457
    const size_t this_chunk = (N > max_chunk_size) ? max_chunk_size : N;
458
    writeArray_chunk(e, h_data, h_plist_id, props, this_chunk, offset,
459
                     internal_units, snapshot_units);
460
461

    /* Compute how many items are left */
462
    if (N > max_chunk_size) {
463
      N -= max_chunk_size;
464
      props.field += max_chunk_size * props.partSize;
465
466
      offset += max_chunk_size;
      redo = 1;
467
    } else {
468
469
470
471
472
473
      N = 0;
      offset += 0;
      redo = 0;
    }

    /* Do we need to run again ? */
474
475
    MPI_Allreduce(MPI_IN_PLACE, &redo, 1, MPI_SIGNED_CHAR, MPI_MAX,
                  MPI_COMM_WORLD);
476

477
    if (redo && e->verbose && mpi_rank == 0)
478
      message("Need to redo one iteration for array '%s'", props.name);
479
  }
480
481

  /* Write XMF description for this data set */
Matthieu Schaller's avatar
Matthieu Schaller committed
482
  if (mpi_rank == 0)
483
484
    xmf_write_line(xmfFile, fileName, partTypeGroupName, props.name, N_total,
                   props.dimension, props.type);
485

486
  /* Write unit conversion factors for this data set */
487
488
  char buffer[FIELD_BUFFER_SIZE];
  units_cgs_conversion_string(buffer, snapshot_units, props.units);
489
490
491
492
493
494
495
496
  io_write_attribute_d(
      h_data, "CGS conversion factor",
      units_cgs_conversion_factor(snapshot_units, props.units));
  io_write_attribute_f(h_data, "h-scale exponent",
                       units_h_factor(snapshot_units, props.units));
  io_write_attribute_f(h_data, "a-scale exponent",
                       units_a_factor(snapshot_units, props.units));
  io_write_attribute_s(h_data, "Conversion factor", buffer);
497

498
499
  /* Close everything */
  H5Pclose(h_prop);
500
501
  H5Dclose(h_data);
  H5Pclose(h_plist_id);
502

503
504
#ifdef IO_SPEED_MEASUREMENT
  MPI_Barrier(MPI_COMM_WORLD);
Matthieu Schaller's avatar
Matthieu Schaller committed
505
506
507
  if (engine_rank == 0)
    message("'%s' took %.3f %s.", props.name,
            clocks_from_ticks(getticks() - tic), clocks_getunit());
508
#endif
509
510
}

511
512
513
514
/**
 * @brief Reads an HDF5 initial condition file (GADGET-3 type) in parallel
 *
 * @param fileName The file to read.
515
 * @param internal_units The system units used internally
516
517
 * @param dim (output) The dimension of the volume read from the file.
 * @param parts (output) The array of #part read from the file.
518
519
520
521
522
 * @param gparts (output) The array of #gpart read from the file.
 * @param sparts (output) The array of #spart read from the file.
 * @param Ngas (output) The number of particles read from the file.
 * @param Ngparts (output) The number of particles read from the file.
 * @param Nstars (output) The number of particles read from the file.
523
 * @param periodic (output) 1 if the volume is periodic, 0 if not.
524
 * @param flag_entropy (output) 1 if the ICs contained Entropy in the
525
 * InternalEnergy field
526
527
528
 * @param with_hydro Are we running with hydro ?
 * @param with_gravity Are we running with gravity ?
 * @param with_stars Are we running with stars ?
529
530
531
532
 * @param mpi_rank The MPI rank of this node
 * @param mpi_size The number of MPI ranks
 * @param comm The MPI communicator
 * @param info The MPI information object
533
 * @param dry_run If 1, don't read the particle. Only allocates the arrays.
534
535
 *
 */
536
void read_ic_parallel(char* fileName, const struct unit_system* internal_units,
537
                      double dim[3], struct part** parts, struct gpart** gparts,
Matthieu Schaller's avatar
Matthieu Schaller committed
538
539
540
541
542
                      struct spart** sparts, size_t* Ngas, size_t* Ngparts,
                      size_t* Nstars, int* periodic, int* flag_entropy,
                      int with_hydro, int with_gravity, int with_stars,
                      int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info,
                      int dry_run) {
543

544
  hid_t h_file = 0, h_grp = 0;
545
  /* GADGET has only cubic boxes (in cosmological mode) */
Matthieu Schaller's avatar
Matthieu Schaller committed
546
  double boxSize[3] = {0.0, -1.0, -1.0};
547
548
549
550
551
  long long numParticles[swift_type_count] = {0};
  long long numParticles_highWord[swift_type_count] = {0};
  size_t N[swift_type_count] = {0};
  long long N_total[swift_type_count] = {0};
  long long offset[swift_type_count] = {0};
552
  int dimension = 3; /* Assume 3D if nothing is specified */
553
  size_t Ndm = 0;
554
555
556
557
558
559
560
561
562
563
564
565

  /* Open file */
  /* message("Opening file '%s' as IC.", fileName); */
  hid_t h_plist_id = H5Pcreate(H5P_FILE_ACCESS);
  H5Pset_fapl_mpio(h_plist_id, comm, info);
  h_file = H5Fopen(fileName, H5F_ACC_RDONLY, h_plist_id);
  if (h_file < 0) {
    error("Error while opening file '%s'.", fileName);
  }

  /* Open header to read simulation properties */
  /* message("Reading runtime parameters..."); */
566
  h_grp = H5Gopen(h_file, "/RuntimePars", H5P_DEFAULT);
567
568
569
  if (h_grp < 0) error("Error while opening runtime parameters\n");

  /* Read the relevant information */
570
  io_read_attribute(h_grp, "PeriodicBoundariesOn", INT, periodic);
571
572
573
574
575
576

  /* Close runtime parameters */
  H5Gclose(h_grp);

  /* Open header to read simulation properties */
  /* message("Reading file header..."); */
577
  h_grp = H5Gopen(h_file, "/Header", H5P_DEFAULT);
578
579
  if (h_grp < 0) error("Error while opening file header\n");

580
581
582
583
  /* Check the dimensionality of the ICs (if the info exists) */
  const hid_t hid_dim = H5Aexists(h_grp, "Dimension");
  if (hid_dim < 0)
    error("Error while testing existance of 'Dimension' attribute");
584
  if (hid_dim > 0) io_read_attribute(h_grp, "Dimension", INT, &dimension);
585
586
587
588
  if (dimension != hydro_dimension)
    error("ICs dimensionality (%dD) does not match code dimensionality (%dD)",
          dimension, (int)hydro_dimension);

589
  /* Read the relevant information and print status */
590
  int flag_entropy_temp[6];
591
  io_read_attribute(h_grp, "Flag_Entropy_ICs", INT, flag_entropy_temp);
592
  *flag_entropy = flag_entropy_temp[0];
593
594
595
596
  io_read_attribute(h_grp, "BoxSize", DOUBLE, boxSize);
  io_read_attribute(h_grp, "NumPart_Total", LONGLONG, numParticles);
  io_read_attribute(h_grp, "NumPart_Total_HighWord", LONGLONG,
                    numParticles_highWord);
597

598
  for (int ptype = 0; ptype < swift_type_count; ++ptype)
Matthieu Schaller's avatar
Matthieu Schaller committed
599
600
    N_total[ptype] =
        (numParticles[ptype]) + (numParticles_highWord[ptype] << 32);
601

602
  /* Get the box size if not cubic */
603
604
605
606
  dim[0] = boxSize[0];
  dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1];
  dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2];

607
608
609
610
611
612
  /* Change box size in the 1D and 2D case */
  if (hydro_dimension == 2)
    dim[2] = min(dim[0], dim[1]);
  else if (hydro_dimension == 1)
    dim[2] = dim[1] = dim[0];

613
614
615
  /* message("Found %lld particles in a %speriodic box of size [%f %f %f].", */
  /* 	  N_total[0], (periodic ? "": "non-"), dim[0], */
  /* 	  dim[1], dim[2]); */
616
617

  /* Divide the particles among the tasks. */
618
  for (int ptype = 0; ptype < swift_type_count; ++ptype) {
619
620
621
    offset[ptype] = mpi_rank * N_total[ptype] / mpi_size;
    N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype];
  }
622
623
624
625

  /* Close header */
  H5Gclose(h_grp);

626
  /* Read the unit system used in the ICs */
627
  struct unit_system* ic_units = malloc(sizeof(struct unit_system));
628
  if (ic_units == NULL) error("Unable to allocate memory for IC unit system");
629
  io_read_unit_system(h_file, ic_units, mpi_rank);
630
631
632
633
634

  /* Tell the user if a conversion will be needed */
  if (mpi_rank == 0) {
    if (units_are_equal(ic_units, internal_units)) {

Matthieu Schaller's avatar
Matthieu Schaller committed
635
      message("IC and internal units match. No conversion needed.");
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661

    } else {

      message("Conversion needed from:");
      message("(ICs) Unit system: U_M =      %e g.", ic_units->UnitMass_in_cgs);
      message("(ICs) Unit system: U_L =      %e cm.",
              ic_units->UnitLength_in_cgs);
      message("(ICs) Unit system: U_t =      %e s.", ic_units->UnitTime_in_cgs);
      message("(ICs) Unit system: U_I =      %e A.",
              ic_units->UnitCurrent_in_cgs);
      message("(ICs) Unit system: U_T =      %e K.",
              ic_units->UnitTemperature_in_cgs);
      message("to:");
      message("(internal) Unit system: U_M = %e g.",
              internal_units->UnitMass_in_cgs);
      message("(internal) Unit system: U_L = %e cm.",
              internal_units->UnitLength_in_cgs);
      message("(internal) Unit system: U_t = %e s.",
              internal_units->UnitTime_in_cgs);
      message("(internal) Unit system: U_I = %e A.",
              internal_units->UnitCurrent_in_cgs);
      message("(internal) Unit system: U_T = %e K.",
              internal_units->UnitTemperature_in_cgs);
    }
  }

662
  /* Convert the dimensions of the box */
Matthieu Schaller's avatar
Matthieu Schaller committed
663
664
665
  for (int j = 0; j < 3; j++)
    dim[j] *=
        units_conversion_factor(ic_units, internal_units, UNIT_CONV_LENGTH);
666

667
  /* Allocate memory to store SPH particles */
668
669
  if (with_hydro) {
    *Ngas = N[0];
Matthieu Schaller's avatar
Matthieu Schaller committed
670
671
    if (posix_memalign((void*)parts, part_align,
                       (*Ngas) * sizeof(struct part)) != 0)
672
673
674
675
676
677
      error("Error while allocating memory for particles");
    bzero(*parts, *Ngas * sizeof(struct part));
  }

  /* Allocate memory to store star particles */
  if (with_stars) {
678
    *Nstars = N[swift_type_star];
679
680
681
682
683
684
685
686
687
    if (posix_memalign((void*)sparts, spart_align,
                       *Nstars * sizeof(struct spart)) != 0)
      error("Error while allocating memory for star particles");
    bzero(*sparts, *Nstars * sizeof(struct spart));
  }

  /* Allocate memory to store gravity particles */
  if (with_gravity) {
    Ndm = N[1];
688
689
690
    *Ngparts = (with_hydro ? N[swift_type_gas] : 0) +
               N[swift_type_dark_matter] +
               (with_stars ? N[swift_type_star] : 0);
691
    if (posix_memalign((void*)gparts, gpart_align,
Matthieu Schaller's avatar
Matthieu Schaller committed
692
                       *Ngparts * sizeof(struct gpart)) != 0)
693
694
695
696
697
      error("Error while allocating memory for gravity particles");
    bzero(*gparts, *Ngparts * sizeof(struct gpart));
  }

  /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) /
698
699
   * (1024.*1024.)); */

700
  /* message("BoxSize = %lf", dim[0]); */
701
  /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */
702
703

  /* Loop over all particle types */
704
  for (int ptype = 0; ptype < swift_type_count; ptype++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
705

706
707
    /* Don't do anything if no particle of this kind */
    if (N_total[ptype] == 0) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
708

709
710
711
    /* Open the particle group in the file */
    char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE];
    snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d",
Matthieu Schaller's avatar
Matthieu Schaller committed
712
             ptype);
713
714
715
716
    h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT);
    if (h_grp < 0) {
      error("Error while opening particle group %s.", partTypeGroupName);
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
717

718
719
    int num_fields = 0;
    struct io_props list[100];
720
    size_t Nparticles = 0;
721

722
723
    /* Read particle fields into the particle structure */
    switch (ptype) {
Matthieu Schaller's avatar
Matthieu Schaller committed
724

725
      case swift_type_gas:
Matthieu Schaller's avatar
Matthieu Schaller committed
726
727
728
729
        if (with_hydro) {
          Nparticles = *Ngas;
          hydro_read_particles(*parts, list, &num_fields);
        }
Matthieu Schaller's avatar
Matthieu Schaller committed
730
731
        break;

732
      case swift_type_dark_matter:
Matthieu Schaller's avatar
Matthieu Schaller committed
733
734
735
736
        if (with_gravity) {
          Nparticles = Ndm;
          darkmatter_read_particles(*gparts, list, &num_fields);
        }
Matthieu Schaller's avatar
Matthieu Schaller committed
737
738
        break;

739
      case swift_type_star:
740
        if (with_stars) {
Matthieu Schaller's avatar
Matthieu Schaller committed
741
742
743
744
          Nparticles = *Nstars;
          star_read_particles(*sparts, list, &num_fields);
        }
        break;
745

Matthieu Schaller's avatar
Matthieu Schaller committed
746
      default:
747
748
749
        if (mpi_rank == 0)
          message("Particle Type %d not yet supported. Particles ignored",
                  ptype);
750
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
751

752
753
754
    /* Read everything */
    if (!dry_run)
      for (int i = 0; i < num_fields; ++i)
755
756
        readArray(h_grp, list[i], Nparticles, N_total[ptype], mpi_rank,
                  offset[ptype], internal_units, ic_units);
757

758
759
760
    /* Close particle group */
    H5Gclose(h_grp);
  }
761

762
  /* Prepare the DM particles */
763
  if (!dry_run && with_gravity) io_prepare_dm_gparts(*gparts, Ndm);
764

765
766
  /* Duplicate the hydro particles into gparts */
  if (!dry_run && with_gravity && with_hydro)
767
    io_duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm);
768
769
770

  /* Duplicate the star particles into gparts */
  if (!dry_run && with_gravity && with_stars)
771
    io_duplicate_star_gparts(*sparts, *gparts, *Nstars, Ndm + *Ngas);
772
773

  /* message("Done Reading particles..."); */
774

775
776
777
  /* Clean up */
  free(ic_units);

778
779
780
781
782
783
784
  /* Close property handler */
  H5Pclose(h_plist_id);

  /* Close file */
  H5Fclose(h_file);
}

785
/**
Matthieu Schaller's avatar
Matthieu Schaller committed
786
787
 * @brief Writes an HDF5 output file (GADGET-3 type) with
 *its XMF descriptor
788
789
 *
 * @param e The engine containing all the system.
790
 * @param baseName The common part of the snapshot file name.
791
792
 * @param internal_units The #unit_system used internally
 * @param snapshot_units The #unit_system used in the snapshots
793
794
795
796
 * @param mpi_rank The MPI rank of this node.
 * @param mpi_size The number of MPI ranks.
 * @param comm The MPI communicator.
 * @param info The MPI information object
797
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
798
 * Creates an HDF5 output file and writes the particles
799
800
 * contained in the engine. If such a file already exists, it is
 * erased and replaced by the new one.
801
802
803
804
805
 * The companion XMF file is also updated accordingly.
 *
 * Calls #error() if an error occurs.
 *
 */
806
void write_output_parallel(struct engine* e, const char* baseName,
807
808
                           const struct unit_system* internal_units,
                           const struct unit_system* snapshot_units,
809
810
811
                           int mpi_rank, int mpi_size, MPI_Comm comm,
                           MPI_Info info) {

812
  hid_t h_file = 0, h_grp = 0;
813
  const size_t Ngas = e->s->nr_parts;
814
  const size_t Nstars = e->s->nr_sparts;
815
  const size_t Ntot = e->s->nr_gparts;
816
817
818
  int periodic = e->s->periodic;
  int numFiles = 1;
  struct part* parts = e->s->parts;
819
820
  struct gpart* gparts = e->s->gparts;
  struct gpart* dmparts = NULL;
821
  struct spart* sparts = e->s->sparts;
822
  static int outputCount = 0;
823
824
  FILE* xmfFile = 0;

825
  /* Number of unassociated gparts */
826
  const size_t Ndm = Ntot > 0 ? Ntot - (Ngas + Nstars) : 0;
827

828
  /* File name */
829
  char fileName[FILENAME_BUFFER_SIZE];
830
  snprintf(fileName, FILENAME_BUFFER_SIZE, "%s_%04i.hdf5", baseName,
831
           outputCount);
832
833

  /* First time, we need to create the XMF file */
834
  if (outputCount == 0 && mpi_rank == 0) xmf_create_file(baseName);
835

836
  /* Prepare the XMF file for the new entry */
837
  if (mpi_rank == 0) xmfFile = xmf_prepare_file(baseName);
838

839
  /* Prepare some file-access properties */
840
  hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
841
842
843

  /* Set some MPI-IO parameters */
  // MPI_Info_set(info, "IBM_largeblock_io", "true");
844
845
  MPI_Info_set(info, "romio_cb_write", "enable");
  MPI_Info_set(info, "romio_ds_write", "disable");
846
847
848
849

  /* Activate parallel i/o */
  hid_t h_err = H5Pset_fapl_mpio(plist_id, comm, info);
  if (h_err < 0) error("Error setting parallel i/o");
Matthieu Schaller's avatar
Matthieu Schaller committed
850

851
852
853
  /* Align on 4k pages. */
  h_err = H5Pset_alignment(plist_id, 1024, 4096);
  if (h_err < 0) error("Error setting Hdf5 alignment");
854
855
856
857
858

  /* Disable meta-data cache eviction */
  H5AC_cache_config_t mdc_config;
  mdc_config.version = H5AC__CURR_CACHE_CONFIG_VERSION;
  h_err = H5Pget_mdc_config(plist_id, &mdc_config);
859
  if (h_err < 0) error("Error getting the MDC config");
860
861
862
863

  mdc_config.evictions_enabled = 0; /* false */
  mdc_config.incr_mode = H5C_incr__off;
  mdc_config.decr_mode = H5C_decr__off;
864
  mdc_config.flash_incr_mode = H5C_flash_incr__off;
865
  h_err = H5Pset_mdc_config(plist_id, &mdc_config);
866
  if (h_err < 0) error("Error setting the MDC config");
867

868
  /* Open HDF5 file with the chosen parameters */
869
  h_file = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
870
871
872
  if (h_file < 0) {
    error("Error while opening file '%s'.", fileName);
  }
873

874
  /* Compute offset in the file and total number of particles */
875
876
877
878
879
  size_t N[swift_type_count] = {Ngas, Ndm, 0, 0, Nstars, 0};
  long long N_total[swift_type_count] = {0};
  long long offset[swift_type_count] = {0};
  MPI_Exscan(&N, &offset, swift_type_count, MPI_LONG_LONG_INT, MPI_SUM, comm);
  for (int ptype = 0; ptype < swift_type_count; ++ptype)
880
881
    N_total[ptype] = offset[ptype] + N[ptype];

Matthieu Schaller's avatar
Matthieu Schaller committed
882
883
  /* The last rank now has the correct N_total. Let's
   * broadcast from there */
884
  MPI_Bcast(&N_total, 6, MPI_LONG_LONG_INT, mpi_size - 1, comm);
885

Matthieu Schaller's avatar
Matthieu Schaller committed
886
  /* Now everybody konws its offset and the total number of
887
   * particles of each type */
888

Matthieu Schaller's avatar
Matthieu Schaller committed
889
890
  /* Write the part of the XMF file corresponding to this
   * specific output */
891
  if (mpi_rank == 0) xmf_write_outputheader(xmfFile, fileName, e->time);
892

893
894
  /* Open header to write simulation properties */
  /* message("Writing runtime parameters..."); */
Matthieu Schaller's avatar
Matthieu Schaller committed
895
896
  h_grp =
      H5Gcreate(h_file, "/RuntimePars", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
897
  if (h_grp < 0) error("Error while creating runtime parameters group\n");
898
899

  /* Write the relevant information */
900
  io_write_attribute(h_grp, "PeriodicBoundariesOn", INT, &periodic, 1);
901
902
903

  /* Close runtime parameters */
  H5Gclose(h_grp);
904

905
906
  /* Open header to write simulation properties */
  /* message("Writing file header..."); */
907
  h_grp = H5Gcreate(h_file, "/Header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
908
  if (h_grp < 0) error("Error while creating file header\n");
Matthieu Schaller's avatar
Matthieu Schaller committed
909

910
  /* Print the relevant information and print status */
911
  io_write_attribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3);
912
  double dblTime = e->time;
913
  io_write_attribute(h_grp, "Time", DOUBLE, &dblTime, 1);
914
  int dimension = (int)hydro_dimension;
915
  io_write_attribute(h_grp, "Dimension", INT, &dimension, 1);
Matthieu Schaller's avatar
Matthieu Schaller committed
916

917
  /* GADGET-2 legacy values */
918
  /* Number of particles of each type */
919
920
921
  unsigned int numParticles[swift_type_count] = {0};
  unsigned int numParticlesHighWord[swift_type_count] = {0};
  for (int ptype = 0; ptype < swift_type_count; ++ptype) {
922
923
924
    numParticles[ptype] = (unsigned int)N_total[ptype];
    numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32);
  }
925
926
927
928
929
930
  io_write_attribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total,
                     swift_type_count);
  io_write_attribute(h_grp, "NumPart_Total", UINT, numParticles,
                     swift_type_count);
  io_write_attribute(h_grp, "NumPart_Total_HighWord", UINT,
                     numParticlesHighWord, swift_type_count);
931
  double MassTable[6] = {0., 0., 0., 0., 0., 0.};
932
933
  io_write_attribute(h_grp, "MassTable", DOUBLE, MassTable, swift_type_count);
  unsigned int flagEntropy[swift_type_count] = {0};
934
  flagEntropy[0] = writeEntropyFlag();