benchmarkInteractions.c 14.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include <fenv.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "swift.h"

#define array_align sizeof(float) * VEC_SIZE
#define ACC_THRESHOLD 1e-5

#ifdef NONSYM_DENSITY
#define IACT runner_iact_nonsym_density
#define IACT_VEC runner_iact_nonsym_2_vec_density
#define IACT_NAME "test_nonsym_density"
#endif

#ifdef SYM_DENSITY
#define IACT runner_iact_density
#define IACT_VEC runner_iact_vec_density
#define IACT_NAME "test_sym_density"
#endif

#ifdef NONSYM_FORCE
#define IACT runner_iact_nonsym_force
#define IACT_VEC runner_iact_nonsym_vec_force
#define IACT_NAME "test_nonsym_force"
#endif

#ifdef SYM_FORCE
#define IACT runner_iact_force
#define IACT_VEC runner_iact_vec_force
#define IACT_NAME "test_sym_force"
#endif

#ifndef IACT
#define IACT runner_iact_nonsym_density
#define IACT_VEC runner_iact_nonsym_2_vec_density
#define IACT_NAME "test_nonsym_density"
#endif

/**
 * @brief Constructs an array of particles in a valid state prior to
 * a IACT_NONSYM and IACT_NONSYM_VEC call.
 *
 * @param count No. of particles to create
 * @param offset The position of the particle offset from (0,0,0).
 * @param spacing Particle spacing.
 * @param h The smoothing length of the particles in units of the inter-particle
 *separation.
 * @param partId The running counter of IDs.
 */
Matthieu Schaller's avatar
Matthieu Schaller committed
71
72
struct part *make_particles(size_t count, double *offset, double spacing,
                            double h, long long *partId) {
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

  struct part *particles;
  if (posix_memalign((void **)&particles, part_align,
                     count * sizeof(struct part)) != 0) {
    error("couldn't allocate particles, no. of particles: %d", (int)count);
  }
  bzero(particles, count * sizeof(struct part));

  /* Construct the particles */
  struct part *p;

  /* Set test particle at centre of unit sphere. */
  p = &particles[0];

  /* Place the test particle at the centre of a unit sphere. */
  p->x[0] = 0.0f;
  p->x[1] = 0.0f;
  p->x[2] = 0.0f;

  p->h = h;
  p->id = ++(*partId);
  p->mass = 1.0f;

  /* Place rest of particles around the test particle
   * with random position within a unit sphere. */
  for (size_t i = 1; i < count; ++i) {
    p = &particles[i];

    /* Randomise positions within a unit sphere. */
    p->x[0] = random_uniform(-1.0, 1.0);
    p->x[1] = random_uniform(-1.0, 1.0);
    p->x[2] = random_uniform(-1.0, 1.0);

    /* Randomise velocities. */
    p->v[0] = random_uniform(-0.05, 0.05);
    p->v[1] = random_uniform(-0.05, 0.05);
    p->v[2] = random_uniform(-0.05, 0.05);

    p->h = h;
    p->id = ++(*partId);
    p->mass = 1.0f;
  }
  return particles;
}

/**
 * @brief Populates particle properties needed for the force calculation.
 */
121
void prepare_force(struct part *parts, size_t count) {
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213

  struct part *p;
  for (size_t i = 0; i < count; ++i) {
    p = &parts[i];
    p->rho = i + 1;
    p->force.balsara = random_uniform(0.0, 1.0);
    p->force.P_over_rho2 = i + 1;
    p->force.soundspeed = random_uniform(2.0, 3.0);
    p->force.v_sig = 0.0f;
    p->force.h_dt = 0.0f;
  }
}

/**
 * @brief Dumps all particle information to a file
 */
void dump_indv_particle_fields(char *fileName, struct part *p) {

  FILE *file = fopen(fileName, "a");

  fprintf(file,
          "%6llu %10f %10f %10f %10f %10f %10f %10e %10e %10e %13e %13e %13e "
          "%13e %13e %13e %13e "
          "%13e %13e %13e\n",
          p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2],
          p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->rho,
          p->density.rho_dh, p->density.wcount, p->density.wcount_dh,
          p->force.h_dt, p->force.v_sig,
#if defined(MINIMAL_SPH)
          0., 0., 0., 0.
#else
          p->density.div_v, p->density.rot_v[0], p->density.rot_v[1],
          p->density.rot_v[2]
#endif
          );
  fclose(file);
}

/**
 * @brief Creates a header for the output file
 */
void write_header(char *fileName) {

  FILE *file = fopen(fileName, "w");
  /* Write header */
  fprintf(file,
          "# %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s %13s %13s "
          "%13s %13s %13s %13s"
          "%13s %13s %13s %13s\n",
          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "a_x", "a_y",
          "a_z", "rho", "rho_dh", "wcount", "wcount_dh", "dh/dt", "v_sig",
          "div_v", "curl_vx", "curl_vy", "curl_vz", "dS/dt");
  fprintf(file, "\n# PARTICLES BEFORE INTERACTION:\n");
  fclose(file);
}

/**
 * @brief Compares the vectorised result against
 * the serial result of the interaction.
 *
 * @param serial_test_part Particle that has been updated serially
 * @param serial_parts Particle array that has been interacted serially
 * @param vec_test_part Particle that has been updated using vectors
 * @param vec_parts Particle array to be interacted using vectors
 * @param count No. of particles that have been interacted
 *
 * @return Non-zero value if difference found, 0 otherwise
 */
int check_results(struct part serial_test_part, struct part *serial_parts,
                  struct part vec_test_part, struct part *vec_parts,
                  int count) {
  int result = 0;
  result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD);

  for (int i = 0; i < count; i++)
    result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD);

  return result;
}

/*
 * @brief Calls the serial and vectorised version of the non-symmetrical density
 * interaction.
 *
 * @param test_part Particle that will be updated
 * @param parts Particle array to be interacted
 * @param count No. of particles to be interacted
 * @param serial_inter_func Serial interaction function to be called
 * @param vec_inter_func Vectorised interaction function to be called
 * @param runs No. of times to call interactions
 *
 */
214
void test_interactions(struct part test_part, struct part *parts, size_t count,
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
                       char *filePrefix, int runs) {

  ticks serial_time = 0;
#ifdef WITH_VECTORIZATION
  ticks vec_time = 0;
#endif

  FILE *file;
  char serial_filename[200] = "";
  char vec_filename[200] = "";

  strcpy(serial_filename, filePrefix);
  strcpy(vec_filename, filePrefix);
  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
  sprintf(vec_filename + strlen(vec_filename), "_vec.dat");

  write_header(serial_filename);
  write_header(vec_filename);

  struct part pi_serial, pi_vec;
235
  struct part pj_serial[count], pj_vec[count];
236
237
238
239

  float r2[count] __attribute__((aligned(array_align)));
  float dx[3 * count] __attribute__((aligned(array_align)));

240
  struct part *piq[count], *pjq[count];
241
  for (size_t k = 0; k < count; k++) {
242
243
    piq[k] = NULL; pjq[k] = NULL;
  }
244
245

#ifdef WITH_VECTORIZATION
246
247
248
249
250
251
252
253
254
255
256
257
258
  float r2q[count] __attribute__((aligned(array_align)));
  float hiq[count] __attribute__((aligned(array_align)));
  float dxq[count] __attribute__((aligned(array_align)));

  float dyq[count] __attribute__((aligned(array_align)));
  float dzq[count] __attribute__((aligned(array_align)));
  float mjq[count] __attribute__((aligned(array_align)));
  float vixq[count] __attribute__((aligned(array_align)));
  float viyq[count] __attribute__((aligned(array_align)));
  float vizq[count] __attribute__((aligned(array_align)));
  float vjxq[count] __attribute__((aligned(array_align)));
  float vjyq[count] __attribute__((aligned(array_align)));
  float vjzq[count] __attribute__((aligned(array_align)));
259
#endif
260
261
262
263
264

  /* Call serial interaction a set number of times. */
  for (int k = 0; k < runs; k++) {
    /* Reset particle to initial setup */
    pi_serial = test_part;
265
    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];
266
267
268
269
270
271
272
273
274
275

    /* Only dump data on first run. */
    if (k == 0) {
      /* Dump state of particles before serial interaction. */
      dump_indv_particle_fields(serial_filename, &pi_serial);
      for (size_t i = 0; i < count; i++)
        dump_indv_particle_fields(serial_filename, &pj_serial[i]);
    }

    /* Perform serial interaction */
276
    for (size_t i = 0; i < count; i++) {
277
278
279
280
281
282
283
284
285
286
      /* Compute the pairwise distance. */
      r2[i] = 0.0f;
      for (int k = 0; k < 3; k++) {
        int ind = (3 * i) + k;
        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
        r2[i] += dx[ind] * dx[ind];
      }
    }

    const ticks tic = getticks();
Matthieu Schaller's avatar
Matthieu Schaller committed
287
/* Perform serial interaction */
288
#ifdef __ICC
289
#pragma novector
290
#endif
291
    for (size_t i = 0; i < count; i++) {
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
      IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial,
           &pj_serial[i]);
    }
    serial_time += getticks() - tic;
  }

  file = fopen(serial_filename, "a");
  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
  fclose(file);

  /* Dump result of serial interaction. */
  dump_indv_particle_fields(serial_filename, &pi_serial);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(serial_filename, &pj_serial[i]);

  /* Call vector interaction a set number of times. */
  for (int k = 0; k < runs; k++) {
    /* Reset particle to initial setup */
    pi_vec = test_part;
311
    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];
312
313

    /* Setup arrays for vector interaction. */
314
    for (size_t i = 0; i < count; i++) {
315
316
317
318
319
320
321
322
      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
        r2 += dx[k] * dx[k];
      }

323
#ifdef WITH_VECTORIZATION
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
      r2q[i] = r2;
      dxq[i] = dx[0];
      hiq[i] = pi_vec.h;
      piq[i] = &pi_vec;
      pjq[i] = &pj_vec[i];

      dyq[i] = dx[1];
      dzq[i] = dx[2];
      mjq[i] = pj_vec[i].mass;
      vixq[i] = pi_vec.v[0];
      viyq[i] = pi_vec.v[1];
      vizq[i] = pi_vec.v[2];
      vjxq[i] = pj_vec[i].v[0];
      vjyq[i] = pj_vec[i].v[1];
      vjzq[i] = pj_vec[i].v[2];
339
#endif
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
    }

    /* Only dump data on first run. */
    if (k == 0) {
      /* Dump state of particles before vector interaction. */
      dump_indv_particle_fields(vec_filename, piq[0]);
      for (size_t i = 0; i < count; i++)
        dump_indv_particle_fields(vec_filename, pjq[i]);
    }

/* Perform vector interaction. */
#ifdef WITH_VECTORIZATION
    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, mask, mask2;
    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
        curlvySum, curlvzSum;

    rhoSum.v = vec_set1(0.f);
    rho_dhSum.v = vec_set1(0.f);
    wcountSum.v = vec_set1(0.f);
    wcount_dhSum.v = vec_set1(0.f);
    div_vSum.v = vec_set1(0.f);
    curlvxSum.v = vec_set1(0.f);
    curlvySum.v = vec_set1(0.f);
    curlvzSum.v = vec_set1(0.f);

    hi_vec.v = vec_load(&hiq[0]);
    vix_vec.v = vec_load(&vixq[0]);
    viy_vec.v = vec_load(&viyq[0]);
    viz_vec.v = vec_load(&vizq[0]);

    hi_inv_vec = vec_reciprocal(hi_vec);
    mask.m = vec_setint1(0xFFFFFFFF);
    mask2.m = vec_setint1(0xFFFFFFFF);

#ifdef HAVE_AVX512_F
    KNL_MASK_16 knl_mask, knl_mask2;
    knl_mask = 0xFFFF;
    knl_mask2 = 0xFFFF;
#endif

    const ticks vec_tic = getticks();

382
    for (size_t i = 0; i < count; i += 2 * VEC_SIZE) {
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436

      IACT_VEC(&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec),
               (vix_vec), (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]),
               &(vjzq[i]), &(mjq[i]), &rhoSum, &rho_dhSum, &wcountSum,
               &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, &curlvzSum,
               mask, mask2,
#ifdef HAVE_AVX512_F
               knl_mask, knl_mask2);
#else
               0, 0);
#endif
    }

    VEC_HADD(rhoSum, piq[0]->rho);
    VEC_HADD(rho_dhSum, piq[0]->density.rho_dh);
    VEC_HADD(wcountSum, piq[0]->density.wcount);
    VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh);
    VEC_HADD(div_vSum, piq[0]->density.div_v);
    VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]);
    VEC_HADD(curlvySum, piq[0]->density.rot_v[1]);
    VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]);

    vec_time += getticks() - vec_tic;
#endif
  }

  file = fopen(vec_filename, "a");
  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
  fclose(file);

  /* Dump result of serial interaction. */
  dump_indv_particle_fields(vec_filename, piq[0]);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(vec_filename, pjq[i]);

#ifdef WITH_VECTORIZATION
  /* Check serial results against the vectorised results. */
  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
    message("Differences found...");
#endif

  message("The serial interactions took     : %15lli ticks.",
          serial_time / runs);
#ifdef WITH_VECTORIZATION
  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
#endif
}

/* And go... */
int main(int argc, char *argv[]) {
  size_t runs = 10000;
  double h = 1.0, spacing = 0.5;
  double offset[3] = {0.0, 0.0, 0.0};
437
  size_t count = 256;
438
439
440
441
442
443
444
445
446
447
448
449
450

  /* Get some randomness going */
  srand(0);

  char c;
  while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) {
    switch (c) {
      case 'h':
        sscanf(optarg, "%lf", &h);
        break;
      case 's':
        sscanf(optarg, "%lf", &spacing);
      case 'n':
451
        sscanf(optarg, "%zu", &count);
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
        break;
      case 'r':
        sscanf(optarg, "%zu", &runs);
        break;
      case '?':
        error("Unknown option.");
        break;
    }
  }

  if (h < 0 || spacing < 0) {
    printf(
        "\nUsage: %s [OPTIONS...]\n"
        "\nGenerates a particle array with equal particle separation."
        "\nThese are then interacted using runner_iact_density and "
        "runner_iact_vec_density."
        "\n\nOptions:"
        "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
        "\n-s SPACING=0.5     - Spacing between particles"
        "\n-n NUMBER=9        - No. of particles",
        argv[0]);
    exit(1);
  }

  /* Correct count so that VEC_SIZE of particles interact with the test
   * particle. */
  count = count - (count % VEC_SIZE) + 1;

  /* Build the infrastructure */
  static long long partId = 0;
  struct part test_particle;
  struct part *particles = make_particles(count, offset, spacing, h, &partId);

#if defined(NONSYM_FORCE) || defined(SYM_FORCE)
  prepare_force(particles, count);
#endif

  test_particle = particles[0];
  /* Call the non-sym density test. */
  message("Testing %s interaction...", IACT_NAME);
  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs);

  return 0;
}