main.c 17.6 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
 *
7
8
9
10
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
11
 *
12
13
14
15
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
16
 *
17
18
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
 *
20
21
 ******************************************************************************/

Pedro Gonnet's avatar
Pedro Gonnet committed
22
23
/* Config parameters. */
#include "../config.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
24
25
26
27
28
29
30
31

/* Some standard headers. */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <pthread.h>
#include <math.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
32
33
#include <float.h>
#include <limits.h>
34
#include <fenv.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
35

Pedro Gonnet's avatar
Pedro Gonnet committed
36
/* Conditional headers. */
Pedro Gonnet's avatar
Pedro Gonnet committed
37
#ifdef HAVE_LIBZ
38
#include <zlib.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
39
40
#endif

41
42
/* MPI headers. */
#ifdef WITH_MPI
43
#include <mpi.h>
44
45
#endif

Pedro Gonnet's avatar
Pedro Gonnet committed
46
/* Local headers. */
47
#include "swift.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
48
49
50

/* Ticks per second on this machine. */
#ifndef CPU_TPS
51
#define CPU_TPS 2.40e9
Pedro Gonnet's avatar
Pedro Gonnet committed
52
53
#endif

54
55
/* Engine policy flags. */
#ifndef ENGINE_POLICY
56
#define ENGINE_POLICY engine_policy_none
57
58
#endif

Pedro Gonnet's avatar
Pedro Gonnet committed
59

Pedro Gonnet's avatar
Pedro Gonnet committed
60
61
62
63
64
65


/**
 * @brief Main routine that loads a few particles and generates some output.
 *
 */
66
67
68

int main(int argc, char *argv[]) {

69
70
  int c, icount, j, k, N, periodic = 1;
  long long N_total = -1;
71
  int nr_threads = 1, nr_queues = -1;
72
  int dump_tasks = 0;
73
74
75
  int data[2];
  double dim[3] = {1.0, 1.0, 1.0}, shift[3] = {0.0, 0.0, 0.0};
  double h_max = -1.0, scaling = 1.0;
76
  double time_end = DBL_MAX;
77
78
79
80
81
  struct part *parts = NULL;
  struct space s;
  struct engine e;
  struct UnitSystem us;
  char ICfileName[200] = "";
82
  char dumpfile[30];
83
  float dt_max = 0.0f, dt_min = 0.0f;
84
85
  ticks tic;
  int nr_nodes = 1, myrank = 0, grid[3] = {1, 1, 1};
86
  FILE *file_thread;
87
  int with_outputs = 1;
88

89
90
  /* Choke on FP-exceptions. */
  //feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW );
91

92
#ifdef WITH_MPI
93
94
  /* Start by initializing MPI. */
  int res, prov;
95
  if ((res = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &prov)) != MPI_SUCCESS)
96
97
    error("Call to MPI_Init failed with error %i.", res);
  if (prov != MPI_THREAD_MULTIPLE)
98
99
100
    error("MPI does not provide the level of threading required "
          "(MPI_THREAD_MULTIPLE).");
  if ((res = MPI_Comm_size(MPI_COMM_WORLD, &nr_nodes)) != MPI_SUCCESS)
101
102
103
104
105
106
107
108
109
110
111
112
113
    error("MPI_Comm_size failed with error %i.", res);
  if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS)
    error("Call to MPI_Comm_rank failed with error %i.", res);
  if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) !=
      MPI_SUCCESS)
    error("Call to MPI_Comm_set_errhandler failed with error %i.", res);
  if (myrank == 0) message("MPI is up and running with %i node(s).", nr_nodes);
  fflush(stdout);

  /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */
  factor(nr_nodes, &grid[0], &grid[1]);
  factor(nr_nodes / grid[1], &grid[0], &grid[2]);
  factor(grid[0] * grid[1], &grid[1], &grid[0]);
114
#endif
115

116
117
118
  /* Greeting message */
  if (myrank == 0) greetings();

119
#ifndef WITH_MPI
Angus Lepper's avatar
Angus Lepper committed
120
121
122
123
124
125
126
127
128
129
130
131
132
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
  /* Ensure the NUMA node on which we initialise (first touch) everything
   * doesn't change before engine_init allocates NUMA-local workers. Otherwise,
   * we may be scheduled elsewhere between the two times.
   */
  cpu_set_t affinity;
  CPU_ZERO(&affinity);
  CPU_SET(sched_getcpu(), &affinity);
  if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity) != 0) {
    message("failed to set entry thread's affinity");
  } else {
    message("set entry thread's affinity");
  }
133
#endif
Angus Lepper's avatar
Angus Lepper committed
134
135
#endif

136
137
138
139
  /* Init the space. */
  bzero(&s, sizeof(struct space));

  /* Parse the options */
140
  while ((c = getopt(argc, argv, "a:c:d:e:f:g:m:q:s:t:w:y:z:")) != -1)
141
142
143
144
145
146
147
148
    switch (c) {
      case 'a':
        if (sscanf(optarg, "%lf", &scaling) != 1)
          error("Error parsing cutoff scaling.");
        if (myrank == 0) message("scaling cutoff by %.3f.", scaling);
        fflush(stdout);
        break;
      case 'c':
149
150
        if (sscanf(optarg, "%lf", &time_end) != 1) error("Error parsing final time.");
        if (myrank == 0) message("time_end set to %.3e.", time_end);
151
152
153
        fflush(stdout);
        break;
      case 'd':
154
155
156
157
158
159
        if (sscanf(optarg, "%f", &dt_min) != 1)
          error("Error parsing minimal timestep.");
        if (myrank == 0) message("dt_min set to %e.", dt_max);
        fflush(stdout);
        break;
      case 'e':
160
        if (sscanf(optarg, "%f", &dt_max) != 1)
161
162
          error("Error parsing maximal timestep.");
        if (myrank == 0) message("dt_max set to %e.", dt_max);
163
164
165
166
167
168
169
170
171
172
173
174
175
176
        fflush(stdout);
        break;
      case 'f':
        if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name.");
        break;
      case 'g':
        if (sscanf(optarg, "%i %i %i", &grid[0], &grid[1], &grid[2]) != 3)
          error("Error parsing grid.");
        break;
      case 'm':
        if (sscanf(optarg, "%lf", &h_max) != 1) error("Error parsing h_max.");
        if (myrank == 0) message("maximum h set to %e.", h_max);
        fflush(stdout);
        break;
177
178
      case 'o':
        with_outputs = 0;
179
        break;
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
      case 'q':
        if (sscanf(optarg, "%d", &nr_queues) != 1)
          error("Error parsing number of queues.");
        break;
      case 's':
        if (sscanf(optarg, "%lf %lf %lf", &shift[0], &shift[1], &shift[2]) != 3)
          error("Error parsing shift.");
        if (myrank == 0)
          message("will shift parts by [ %.3f %.3f %.3f ].", shift[0], shift[1],
                  shift[2]);
        break;
      case 't':
        if (sscanf(optarg, "%d", &nr_threads) != 1)
          error("Error parsing number of threads.");
        break;
      case 'w':
        if (sscanf(optarg, "%d", &space_subsize) != 1)
          error("Error parsing sub size.");
        if (myrank == 0) message("sub size set to %i.", space_subsize);
        break;
200
      case 'y':
201
202
        if(sscanf(optarg, "%d", &dump_tasks) != 1)
          error("Error parsing dump_tasks (-y)");
203
        break;
204
205
206
207
208
209
210
211
212
213
214
      case 'z':
        if (sscanf(optarg, "%d", &space_splitsize) != 1)
          error("Error parsing split size.");
        if (myrank == 0) message("split size set to %i.", space_splitsize);
        break;
      case '?':
        error("Unknown option.");
        break;
    }

#if defined(WITH_MPI)
215
216
217
218
219
220
221
222
223
224
  if (myrank == 0) {
    message("Running with %i thread(s) per node.", nr_threads);
    message("grid set to [ %i %i %i ].", grid[0], grid[1], grid[2]);

    if (nr_nodes == 1) {
      message("WARNING: you are running with one MPI rank.");
      message("WARNING: you should use the non-MPI version of this program." );
    }
    fflush(stdout);
  }
225
#else
226
  if (myrank == 0) message("Running with %i thread(s).", nr_threads);
227
228
#endif

229
230
231
  /* How large are the parts? */
  if (myrank == 0) {
    message("sizeof(struct part) is %li bytes.", (long int)sizeof(struct part));
232
    message("sizeof(struct xpart) is %li bytes.", (long int)sizeof(struct xpart));
233
234
235
    message("sizeof(struct gpart) is %li bytes.",
            (long int)sizeof(struct gpart));
  }
236

237
  /* Initialize unit system */
238
239
240
241
242
243
244
245
246
247
248
249
250
251
  initUnitSystem(&us);
  if (myrank == 0) {
    message("Unit system: U_M = %e g.", us.UnitMass_in_cgs);
    message("Unit system: U_L = %e cm.", us.UnitLength_in_cgs);
    message("Unit system: U_t = %e s.", us.UnitTime_in_cgs);
    message("Unit system: U_I = %e A.", us.UnitCurrent_in_cgs);
    message("Unit system: U_T = %e K.", us.UnitTemperature_in_cgs);
    message("Density units: %e a^%f h^%f.",
            conversionFactor(&us, UNIT_CONV_DENSITY),
            aFactor(&us, UNIT_CONV_DENSITY), hFactor(&us, UNIT_CONV_DENSITY));
    message("Entropy units: %e a^%f h^%f.",
            conversionFactor(&us, UNIT_CONV_ENTROPY),
            aFactor(&us, UNIT_CONV_ENTROPY), hFactor(&us, UNIT_CONV_ENTROPY));
  }
252

253
254
255
256
  /* Check we have sensible time step bounds */
  if ( dt_min > dt_max )
    error("Minimal time step size must be large than maximal time step size ");
  
257
258
259
  /* Check whether an IC file has been provided */
  if (strcmp(ICfileName, "") == 0)
    error("An IC file name must be provided via the option -f");
260

261
262
263
264
265
266
  /* Read particles and space information from (GADGET) IC */
  tic = getticks();
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
  read_ic_parallel(ICfileName, dim, &parts, &N, &periodic, myrank, nr_nodes,
                   MPI_COMM_WORLD, MPI_INFO_NULL);
267
#else
268
269
  read_ic_serial(ICfileName, dim, &parts, &N, &periodic, myrank, nr_nodes,
                 MPI_COMM_WORLD, MPI_INFO_NULL);
270
271
#endif
#else
272
  read_ic_single(ICfileName, dim, &parts, &N, &periodic);
273
#endif
274

275
276
277
278
279
  if (myrank == 0)
    message("reading particle properties took %.3f ms.",
            ((double)(getticks() - tic)) / CPU_TPS * 1000);
  fflush(stdout);

280
281
#if defined(WITH_MPI)
  long long N_long = N;
282
  MPI_Reduce(&N_long, &N_total, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
283
284
285
286
287
#else
  N_total = N;
#endif
  if (myrank == 0) message("Read %lld particles from the ICs", N_total);

288
289
290
291
292
293
294
295
296
297
298
  /* Apply h scaling */
  if (scaling != 1.0)
    for (k = 0; k < N; k++) parts[k].h *= scaling;

  /* Apply shift */
  if (shift[0] != 0 || shift[1] != 0 || shift[2] != 0)
    for (k = 0; k < N; k++) {
      parts[k].x[0] += shift[0];
      parts[k].x[1] += shift[1];
      parts[k].x[2] += shift[2];
    }
299

300
301
302
303
304
  /* Set default number of queues. */
  if (nr_queues < 0) nr_queues = nr_threads;

  /* Initialize the space with this data. */
  tic = getticks();
305
  space_init(&s, dim, parts, N, periodic, h_max, myrank == 0);
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
  if (myrank == 0)
    message("space_init took %.3f ms.",
            ((double)(getticks() - tic)) / CPU_TPS * 1000);
  fflush(stdout);

  /* Say a few nice things about the space we just created. */
  if (myrank == 0) {
    message("space dimensions are [ %.3f %.3f %.3f ].", s.dim[0], s.dim[1],
            s.dim[2]);
    message("space %s periodic.", s.periodic ? "is" : "isn't");
    message("highest-level cell dimensions are [ %i %i %i ].", s.cdim[0],
            s.cdim[1], s.cdim[2]);
    message("%i parts in %i cells.", s.nr_parts, s.tot_cells);
    message("maximum depth is %d.", s.maxdepth);
    // message( "cutoffs in [ %g %g ]." , s.h_min , s.h_max ); fflush(stdout);
  }

323
  /* Verify that each particle is in it's proper cell. */
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
  if (myrank == 0) {
    icount = 0;
    space_map_cells_pre(&s, 0, &map_cellcheck, &icount);
    message("map_cellcheck picked up %i parts.", icount);
  }

  if (myrank == 0) {
    data[0] = s.maxdepth;
    data[1] = 0;
    space_map_cells_pre(&s, 0, &map_maxdepth, data);
    message("nr of cells at depth %i is %i.", data[0], data[1]);
  }

  /* Dump the particle positions. */
  // space_map_parts( &s , &map_dump , shift );

  /* Initialize the engine with this space. */
  tic = getticks();
342
  if (myrank == 0) message("nr_nodes is %i.", nr_nodes);
343
  engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank,
344
              ENGINE_POLICY | engine_policy_steal, 0, time_end, dt_min, dt_max);
345
346
347
348
  if (myrank == 0)
    message("engine_init took %.3f ms.",
            ((double)(getticks() - tic)) / CPU_TPS * 1000);
  fflush(stdout);
349
350

#ifdef WITH_MPI
351
352
353
  /* Split the space. */
  engine_split(&e, grid);
  engine_redistribute(&e);
354
#endif
355

356
357
358
359
  if (with_outputs) {
    /* Write the state of the system as it is before starting time integration.
     */
    tic = getticks();
360
361
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
362
363
    write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
                          MPI_INFO_NULL);
364
#else
365
366
    write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
                        MPI_INFO_NULL);
367
368
#endif
#else
369
    write_output_single(&e, &us);
370
#endif
Peter W. Draper's avatar
Peter W. Draper committed
371
372
373
    if (myrank == 0)
      message("writing particle properties took %.3f ms.",
              ((double)(getticks() - tic)) / CPU_TPS * 1000);
374
375
    fflush(stdout);
  }
376
377
378
379
380
381

/* Init the runner history. */
#ifdef HIST
  for (k = 0; k < runner_hist_N; k++) runner_hist_bins[k] = 0;
#endif

382
  if (myrank == 0) {
383
384
385
386
    message(
	    "Running on %lld particles until t=%.3e with %i threads and %i "
	    "queues (dt_min=%.3e, dt_max=%.3e)...",
	    N_total, time_end, e.nr_threads, e.sched.nr_queues, e.dt_min, e.dt_max);
387
388
    fflush(stdout);
  }
389

390
391
392
  /* Initialise the particles */
  engine_init_particles(&e);
  
393
394
395
  /* Legend */
  if (myrank == 0)
    printf("# Step  Time  time-step  CPU Wall-clock time [ms]\n");
396
  
397
  /* Let loose a runner on the space. */
398
  for (j = 0; e.time < time_end; j++) {
399
400
401
402
403
404
405
406
407
408
409
410
411
412

/* Repartition the space amongst the nodes? */
#if defined(WITH_MPI) && defined(HAVE_METIS)
    if (j % 100 == 2) e.forcerepart = 1;
#endif

    timers_reset(timers_mask_all);
#ifdef COUNTER
    for (k = 0; k < runner_counter_count; k++) runner_counter[k] = 0;
#endif

    /* Take a step. */
    engine_step(&e);

413
    if (with_outputs && j % 100 == 0) {
414
415
416
417
418

#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
      write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
                            MPI_INFO_NULL);
419
#else
420
421
      write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
                          MPI_INFO_NULL);
422
#endif
423
#else
424
      write_output_single(&e, &us);
425
#endif
426
    }
427

428
429
430
431
  /* Dump the task data using the given frequency. */
    if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) {
#ifdef WITH_MPI

432
      /* Make sure output file is empty, only on one rank. */
433
      sprintf(dumpfile, "thread_info_MPI-step%d.dat", j);
434
435
436
437
438
      if (myrank == 0) {
          file_thread = fopen(dumpfile, "w");
          fclose(file_thread);
      }
      MPI_Barrier(MPI_COMM_WORLD);
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490

      for (int i = 0; i < nr_nodes; i++) {

        /* Rank 0 decides the index of writing node, this happens one-by-one. */
        int kk = i;
        MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);

        if (i == myrank) {

          /* Open file and position at end. */
          file_thread = fopen(dumpfile, "a");

          fprintf(file_thread, " %03i 0 0 0 0 %lli 0 0 0 0\n", myrank,
                  e.tic_step);
          int count = 0;
          for (int l = 0; l < e.sched.nr_tasks; l++)
            if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) {
              fprintf(
                  file_thread, " %03i %i %i %i %i %lli %lli %i %i %i\n", myrank,
                  e.sched.tasks[l].rid, e.sched.tasks[l].type,
                  e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
                  e.sched.tasks[l].tic, e.sched.tasks[l].toc,
                  (e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->count : 0,
                  (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->count : 0,
                  e.sched.tasks[l].flags);
              fflush(stdout);
              count++;
            }
          message("rank %d counted %d tasks", myrank, count);

          fclose(file_thread);
        }

        /* And we wait for all to synchronize. */
        MPI_Barrier(MPI_COMM_WORLD);
      }

#else
      sprintf(dumpfile, "thread_info-step%d.dat", j);
      file_thread = fopen(dumpfile, "w");
      for (int l = 0; l < e.sched.nr_tasks; l++)
        if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit)
          fprintf(file_thread, " %i %i %i %i %lli %lli %i %i\n",
                  e.sched.tasks[l].rid, e.sched.tasks[l].type,
                  e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
                  e.sched.tasks[l].tic, e.sched.tasks[l].toc,
                  (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->count,
                  (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count);
      fclose(file_thread);
#endif
    }

491
    /* Dump a line of aggregate output. */
492
493
494
495
496
497
498
499
500
501
502
    /*     if (myrank == 0) { */
    /*       printf("%i %e %.16e %.16e %.16e %.3e %.3e %i %.3e %.3e", j, e.time,
     */
    /*              e.ekin + e.epot, e.ekin, e.epot, e.dt, e.dt_step,
     * e.count_step, */
    /*              e.dt_min, e.dt_max); */
    /*       for (k = 0; k < timer_count; k++) */
    /*         printf(" %.3f", ((double)timers[k]) / CPU_TPS * 1000); */
    /*       printf("\n"); */
    /*       fflush(stdout); */
    /*     } */
503
504
505
506
507
508
509
510

    
    /* if (myrank == 0) { */
    /*   printf("%i %e", j, e.time); */
    /*   printf(" %.3f", ((double)timers[timer_count - 1]) / CPU_TPS * 1000); */
    /*   printf("\n"); */
    /*   fflush(stdout); */
    /* } */
511
512
513
  }

/* Print the values of the runner histogram. */
514
#ifdef HIST
515
516
517
518
519
520
521
  printf("main: runner histogram data:\n");
  for (k = 0; k < runner_hist_N; k++)
    printf(" %e %e %e\n",
           runner_hist_a + k * (runner_hist_b - runner_hist_a) / runner_hist_N,
           runner_hist_a +
               (k + 1) * (runner_hist_b - runner_hist_a) / runner_hist_N,
           (double)runner_hist_bins[k]);
522
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
523

524

525
  if (with_outputs) {
526
527
528
/* Write final output. */
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
529
530
    write_output_parallel(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
                          MPI_INFO_NULL);
531
#else
532
533
    write_output_serial(&e, &us, myrank, nr_nodes, MPI_COMM_WORLD,
                        MPI_INFO_NULL);
534
#endif
535
#else
536
    write_output_single(&e, &us);
537
#endif
538
  }
539

540
#ifdef WITH_MPI
541
542
  if (MPI_Finalize() != MPI_SUCCESS)
    error("call to MPI_Finalize failed with error %i.", res);
543
#endif
544
545

  /* Say goodbye. */
Matthieu Schaller's avatar
Matthieu Schaller committed
546
  if (myrank == 0) message("done. Bye.");
547
548
549

  /* All is calm, all is bright. */
  return 0;
550
}