test27cells.c 16.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

Matthieu Schaller's avatar
Matthieu Schaller committed
20
21
22
23
/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
24
#include <fenv.h>
25
#include <stdio.h>
26
27
28
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Matthieu Schaller's avatar
Matthieu Schaller committed
29
30

/* Local headers. */
31
32
#include "swift.h"

33
34
#define ACC_THRESHOLD 1e-5

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#if defined(WITH_VECTORIZATION) && defined(DOSELF1_VEC)
#define DOSELF1 runner_doself1_density_vec
#define DOSELF1_NAME "runner_doself1_density_vec"
#endif

#if defined(WITH_VECTORIZATION) && defined(DOSELF1_VEC_2)
#define DOSELF1 runner_doself1_density_vec_2
#define DOSELF1_NAME "runner_doself1_density_vec_2"
#endif

#ifndef DOSELF1
#define DOSELF1 runner_doself1_density
#define DOSELF1_NAME "runner_doself1_density"
#endif

50
51
52
53
54
55
56
enum velocity_types {
  velocity_zero,
  velocity_random,
  velocity_divergent,
  velocity_rotating
};

Matthieu Schaller's avatar
Matthieu Schaller committed
57
58
59
60
61
62
63
/**
 * @brief Constructs a cell and all of its particle in a valid state prior to
 * a DOPAIR or DOSELF calcuation.
 *
 * @param n The cube root of the number of particles.
 * @param offset The position of the cell offset from (0,0,0).
 * @param size The cell size.
James Willis's avatar
James Willis committed
64
65
 * @param h The smoothing length of the particles in units of the inter-particle
 *separation.
Matthieu Schaller's avatar
Matthieu Schaller committed
66
67
 * @param density The density of the fluid.
 * @param partId The running counter of IDs.
James Willis's avatar
James Willis committed
68
69
 * @param pert The perturbation to apply to the particles in the cell in units
 *of the inter-particle separation.
Matthieu Schaller's avatar
Matthieu Schaller committed
70
 * @param vel The type of velocity field (0, random, divergent, rotating)
71
 */
72
struct cell *make_cell(size_t n, double *offset, double size, double h,
73
74
                       double density, long long *partId, double pert,
                       enum velocity_types vel) {
75
76
  const size_t count = n * n * n;
  const double volume = size * size * size;
77
78
79
  struct cell *cell = malloc(sizeof(struct cell));
  bzero(cell, sizeof(struct cell));

80
81
  if (posix_memalign((void **)&cell->parts, part_align,
                     count * sizeof(struct part)) != 0) {
82
83
84
85
    error("couldn't allocate particles, no. of particles: %d", (int)count);
  }
  bzero(cell->parts, count * sizeof(struct part));

86
87
88
89
90
91
92
93
94
95
96
97
98
99
  /* Construct the parts */
  struct part *part = cell->parts;
  for (size_t x = 0; x < n; ++x) {
    for (size_t y = 0; y < n; ++y) {
      for (size_t z = 0; z < n; ++z) {
        part->x[0] =
            offset[0] +
            size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
        part->x[1] =
            offset[1] +
            size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
        part->x[2] =
            offset[2] +
            size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n;
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
        switch (vel) {
          case velocity_zero:
            part->v[0] = 0.f;
            part->v[1] = 0.f;
            part->v[2] = 0.f;
            break;
          case velocity_random:
            part->v[0] = random_uniform(-0.05, 0.05);
            part->v[1] = random_uniform(-0.05, 0.05);
            part->v[2] = random_uniform(-0.05, 0.05);
            break;
          case velocity_divergent:
            part->v[0] = part->x[0] - 1.5 * size;
            part->v[1] = part->x[1] - 1.5 * size;
            part->v[2] = part->x[2] - 1.5 * size;
            break;
          case velocity_rotating:
            part->v[0] = part->x[1];
            part->v[1] = -part->x[0];
            part->v[2] = 0.f;
            break;
        }
122
        part->h = size * h / (float)n;
123
        part->id = ++(*partId);
124

125
#ifdef GIZMO_SPH
126
        part->conserved.mass = density * volume / count;
127
#else
128
        part->mass = density * volume / count;
129
#endif
130
131
132
133
134
135

#if defined(HOPKINS_PE_SPH)
        part->entropy = 1.f;
        part->entropy_one_over_gamma = 1.f;
#endif

136
137
138
139
140
141
142
        part->ti_begin = 0;
        part->ti_end = 1;
        ++part;
      }
    }
  }

143
  /* Cell properties */
144
145
146
147
  cell->split = 0;
  cell->h_max = h;
  cell->count = count;
  cell->dx_max = 0.;
148
149
150
  cell->width[0] = size;
  cell->width[1] = size;
  cell->width[2] = size;
151
152
153
154
155
156
157
  cell->loc[0] = offset[0];
  cell->loc[1] = offset[1];
  cell->loc[2] = offset[2];

  cell->ti_end_min = 1;
  cell->ti_end_max = 1;

James Willis's avatar
James Willis committed
158
  shuffle_particles(cell->parts, cell->count);
159

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
  cell->sorted = 0;
  cell->sort = NULL;
  cell->sortsize = 0;

  return cell;
}

void clean_up(struct cell *ci) {
  free(ci->parts);
  free(ci->sort);
  free(ci);
}

/**
 * @brief Initializes all particles field to be ready for a density calculation
 */
void zero_particle_fields(struct cell *c) {
177
  for (int pid = 0; pid < c->count; pid++) {
178
179
180
181
    hydro_init_part(&c->parts[pid]);
  }
}

182
183
184
185
/**
 * @brief Ends the loop by adding the appropriate coefficients
 */
void end_calculation(struct cell *c) {
186
  for (int pid = 0; pid < c->count; pid++) {
187
    hydro_end_density(&c->parts[pid]);
188
189
190
  }
}

191
192
193
/**
 * @brief Dump all the particles to a file
 */
194
195
void dump_particle_fields(char *fileName, struct cell *main_cell,
                          struct cell **cells) {
196
197
  FILE *file = fopen(fileName, "w");

198
  /* Write header */
199
  fprintf(file,
200
201
          "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s "
          "%13s %13s %13s\n",
202
203
          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh",
          "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz");
204

205
  fprintf(file, "# Main cell --------------------------------------------\n");
206

207
  /* Write main cell */
208
  for (int pid = 0; pid < main_cell->count; pid++) {
209
    fprintf(file,
210
211
            "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
            "%13e %13e %13e\n",
212
213
            main_cell->parts[pid].id, main_cell->parts[pid].x[0],
            main_cell->parts[pid].x[1], main_cell->parts[pid].x[2],
214
            main_cell->parts[pid].v[0], main_cell->parts[pid].v[1],
215
216
            main_cell->parts[pid].v[2],
            hydro_get_density(&main_cell->parts[pid]),
217
218
219
#if defined(GIZMO_SPH)
            0.f,
#else
220
            main_cell->parts[pid].density.rho_dh,
221
222
#endif
            main_cell->parts[pid].density.wcount,
223
            main_cell->parts[pid].density.wcount_dh,
224
#if defined(GADGET2_SPH) || defined(DEFAULT_SPH) || defined(HOPKINS_PE_SPH)
Matthieu Schaller's avatar
Matthieu Schaller committed
225
226
227
228
            main_cell->parts[pid].density.div_v,
            main_cell->parts[pid].density.rot_v[0],
            main_cell->parts[pid].density.rot_v[1],
            main_cell->parts[pid].density.rot_v[2]
229
230
231
232
#else
            0., 0., 0., 0.
#endif
            );
233
234
  }

235
236
237
238
239
240
  /* Write all other cells */
  for (int i = 0; i < 3; ++i) {
    for (int j = 0; j < 3; ++j) {
      for (int k = 0; k < 3; ++k) {
        struct cell *cj = cells[i * 9 + j * 3 + k];
        if (cj == main_cell) continue;
241

242
243
244
        fprintf(file,
                "# Offset: [%2d %2d %2d] -----------------------------------\n",
                i - 1, j - 1, k - 1);
245

246
        for (int pjd = 0; pjd < cj->count; pjd++) {
247
248
          fprintf(
              file,
249
250
              "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e "
              "%13e %13e %13e\n",
251
              cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1],
252
              cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1],
253
              cj->parts[pjd].v[2], hydro_get_density(&cj->parts[pjd]),
254
255
256
#if defined(GIZMO_SPH)
              0.f,
#else
257
              main_cell->parts[pjd].density.rho_dh,
258
#endif
259
              cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh,
260
#if defined(GADGET2_SPH) || defined(DEFAULT_SPH) || defined(HOPKINS_PE_SPH)
Matthieu Schaller's avatar
Matthieu Schaller committed
261
262
              cj->parts[pjd].density.div_v, cj->parts[pjd].density.rot_v[0],
              cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]
263
264
265
266
#else
              0., 0., 0., 0.
#endif
              );
267
268
        }
      }
269
    }
270
271
272
273
  }
  fclose(file);
}

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
/**
 * @brief Compares the vectorised result against
 * the serial result of the interaction.
 *
 * @param serial_parts Particle array that has been interacted serially
 * @param vec_parts Particle array to be interacted using vectors
 * @param count No. of particles that have been interacted
 * @param threshold Level of accuracy needed
 *
 * @return Non-zero value if difference found, 0 otherwise
 */
int check_results(struct part *serial_parts, struct part *vec_parts, int count, double threshold) {
  int result = 0;

  for (int i = 0; i < count; i++)
    result += compare_particles(serial_parts[i], vec_parts[i], threshold);

  return result;
}

294
295
/* Just a forward declaration... */
void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj);
296
void runner_doself1_density(struct runner *r, struct cell *ci);
297
void runner_doself1_density_vec(struct runner *r, struct cell *ci);
298
void runner_doself1_density_vec_2(struct runner *r, struct cell *ci);
299

300
/* And go... */
301
int main(int argc, char *argv[]) {
302
303

  engine_pin();
304
  size_t runs = 0, particles = 0;
305
  double h = 1.23485, size = 1., rho = 1.;
306
  double perturbation = 0.;
307
  double threshold = ACC_THRESHOLD;
308
309
  char outputFileNameExtension[200] = "";
  char outputFileName[200] = "";
310
  enum velocity_types vel = velocity_zero;
Matthieu Schaller's avatar
Matthieu Schaller committed
311

312
313
314
315
  /* Initialize CPU frequency, this also starts time. */
  unsigned long long cpufreq = 0;
  clocks_set_cpufreq(cpufreq);

Matthieu Schaller's avatar
Matthieu Schaller committed
316
317
  /* Choke on FP-exceptions */
  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
Matthieu Schaller's avatar
Matthieu Schaller committed
318

319
  /* Get some randomness going */
320
321
  srand(0);

322
  char c;
323
  while ((c = getopt(argc, argv, "m:s:h:n:r:t:d:f:v:a:")) != -1) {
324
325
326
327
    switch (c) {
      case 'h':
        sscanf(optarg, "%lf", &h);
        break;
328
329
330
      case 's':
        sscanf(optarg, "%lf", &size);
        break;
331
      case 'n':
332
333
334
335
336
337
338
339
        sscanf(optarg, "%zu", &particles);
        break;
      case 'r':
        sscanf(optarg, "%zu", &runs);
        break;
      case 'd':
        sscanf(optarg, "%lf", &perturbation);
        break;
340
341
342
      case 'm':
        sscanf(optarg, "%lf", &rho);
        break;
343
344
345
      case 'f':
        strcpy(outputFileNameExtension, optarg);
        break;
346
      case 'v':
Matthieu Schaller's avatar
Matthieu Schaller committed
347
        sscanf(optarg, "%d", (int *)&vel);
348
        break;
349
350
351
      case 'a':
        sscanf(optarg, "%lf", &threshold);
        break;
352
353
354
355
356
357
      case '?':
        error("Unknown option.");
        break;
    }
  }

358
  if (h < 0 || particles == 0 || runs == 0) {
359
    printf(
360
361
362
363
        "\nUsage: %s -n PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n"
        "\nGenerates 27 cells, filled with particles on a Cartesian grid."
        "\nThese are then interacted using runner_dopair1_density() and "
        "runner_doself1_density()."
364
        "\n\nOptions:"
365
        "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
366
367
        "\n-m rho             - Physical density in the cell"
        "\n-s size            - Physical size of the cell"
368
        "\n-d pert            - Perturbation to apply to the particles [0,1["
369
370
        "\n-v type (0,1,2,3)  - Velocity field: (zero, random, divergent, "
        "rotating)"
371
        "\n-f fileName        - Part of the file name used to save the dumps\n",
372
373
374
375
        argv[0]);
    exit(1);
  }

376
  /* Help users... */
377
378
  message("Function called: %s", DOSELF1_NAME);
  message("Vector size: %d", VEC_SIZE);
379
  message("Adiabatic index: ga = %f", hydro_gamma);
380
  message("Hydro implementation: %s", SPH_IMPLEMENTATION);
381
382
  message("Smoothing length: h = %f", h * size);
  message("Kernel:               %s", kernel_name);
383
  message("Neighbour target: N = %f", pow_dimension(h) * kernel_norm);
384
385
386
  message("Density target: rho = %f", rho);
  message("div_v target:   div = %f", vel == 2 ? 3.f : 0.f);
  message("curl_v target: curl = [0., 0., %f]", vel == 3 ? -2.f : 0.f);
387

388
  printf("\n");
389

390
391
  /* Build the infrastructure */
  struct space space;
392
393
  space.periodic = 0;

394
  struct engine engine;
395
396
397
  engine.s = &space;
  engine.time = 0.1f;
  engine.ti_current = 1;
398
399

  struct runner runner;
400
401
  runner.e = &engine;

402
403
404
405
406
407
408
409
  /* Construct some cells */
  struct cell *cells[27];
  struct cell *main_cell;
  static long long partId = 0;
  for (int i = 0; i < 3; ++i) {
    for (int j = 0; j < 3; ++j) {
      for (int k = 0; k < 3; ++k) {
        double offset[3] = {i * size, j * size, k * size};
410
411
        cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho,
                                             &partId, perturbation, vel);
412

Matthieu Schaller's avatar
Matthieu Schaller committed
413
        runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0);
414
415
416
417
      }
    }
  }

418
  /* Store the main cell for future use */
419
420
  main_cell = cells[13];

421
422
423
  ticks timings[27];
  for (int i = 0; i < 27; i++) timings[i] = 0;

424
  ticks time = 0;
425
426
  for (size_t i = 0; i < runs; ++i) {
    /* Zero the fields */
427
    for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]);
428

429
    const ticks tic = getticks();
430

431
#if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION))
Matthieu Schaller's avatar
Matthieu Schaller committed
432

433
    /* Run all the pairs */
434
435
436
437
    for (int j = 0; j < 27; ++j) {
      if (cells[j] != main_cell) {
        const ticks sub_tic = getticks();

438
        runner_dopair1_density(&runner, main_cell, cells[j]);
439

440
441
442
443
444
        const ticks sub_toc = getticks();
        timings[j] += sub_toc - sub_tic;
      }
    }

445
    /* And now the self-interaction */
446
#ifdef WITH_VECTORIZATION
James Willis's avatar
James Willis committed
447
448
449
450
    runner.par_cache.count = 0;
    cache_init(&runner.par_cache,512);
#endif

451
452
453
454
455
456
457
458
    const ticks self_tic = getticks();

    DOSELF1(&runner, main_cell);

    const ticks self_toc = getticks();

    timings[13] += self_toc - self_tic;

459
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
460

461
    const ticks toc = getticks();
462
463
    time += toc - tic;

464
465
466
    /* Let's get physical ! */
    end_calculation(main_cell);

467
468
    /* Dump if necessary */
    if (i % 50 == 0) {
469
470
471
      sprintf(outputFileName, "swift_dopair_27_%s.dat",
              outputFileNameExtension);
      dump_particle_fields(outputFileName, main_cell, cells);
472
473
474
    }
  }

475
476
477
478
479
  /* Store the vectorised particle results. */ 
  struct part vec_parts[main_cell->count];
  for(int i=0; i<main_cell->count; i++)
    vec_parts[i] = main_cell->parts[i];

480
  /* Output timing */
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
  ticks corner_time = timings[0] + timings[2] + timings[6] + timings[8] +
                      timings[18] + timings[20] + timings[24] + timings[26];

  ticks edge_time = timings[1] + timings[3] + timings[5] + timings[7] +
                    timings[9] + timings[11] + timings[15] + timings[17] +
                    timings[19] + timings[21] + timings[23] + timings[25];

  ticks face_time = timings[4] + timings[10] + timings[12] + timings[14] +
                    timings[16] + timings[22];

  message("Corner calculations took       : %15lli ticks.", corner_time / runs);
  message("Edge calculations took         : %15lli ticks.", edge_time / runs);
  message("Face calculations took         : %15lli ticks.", face_time / runs);
  message("Self calculations took         : %15lli ticks.", timings[13] / runs);
  message("SWIFT calculation took         : %15lli ticks.", time / runs);
496
497
498
499

  /* Now perform a brute-force version for accuracy tests */

  /* Zero the fields */
500
501
502
503
  for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]);

  const ticks tic = getticks();

504
#if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION))
505

506
507
508
  /* Run all the brute-force pairs */
  for (int j = 0; j < 27; ++j)
    if (cells[j] != main_cell) pairs_all_density(&runner, main_cell, cells[j]);
509

510
511
  /* And now the self-interaction */
  self_all_density(&runner, main_cell);
512

513
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
514

515
  const ticks toc = getticks();
516

517
518
  /* Let's get physical ! */
  end_calculation(main_cell);
519
520
521

  /* Dump */
  sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension);
522
  dump_particle_fields(outputFileName, main_cell, cells);
523

524
  /* Check serial results against the vectorised results. */
James Willis's avatar
James Willis committed
525
526
  if (check_results(main_cell->parts, vec_parts, main_cell->count, threshold))
    message("Differences found...");
527
  
528
  /* Output timing */
Matthieu Schaller's avatar
Matthieu Schaller committed
529
  message("Brute force calculation took : %15lli ticks.", toc - tic);
530
531

  /* Clean things to make the sanitizer happy ... */
532
  for (int i = 0; i < 27; ++i) clean_up(cells[i]);
533
534
535

  return 0;
}