testInteractions.c 25.1 KB
Newer Older
James Willis's avatar
James Willis committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
19
#include "../config.h"
James Willis's avatar
James Willis committed
20

21
/* Some standard headers. */
James Willis's avatar
James Willis committed
22
23
24
25
26
#include <fenv.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
27
28

/* Local includes */
James Willis's avatar
James Willis committed
29
30
#include "swift.h"

31
32
33
/* Other schemes need to be added here if they are not vectorized, otherwise
 * this test will simply not compile. */

Matthieu Schaller's avatar
Matthieu Schaller committed
34
#if defined(GADGET2_SPH) && defined(WITH_VECTORIZATION)
35

36
37
38
#define array_align sizeof(float) * VEC_SIZE
#define ACC_THRESHOLD 1e-5

39
40
41
42
43
44
#ifndef IACT
#define IACT runner_iact_nonsym_density
#define IACT_VEC runner_iact_nonsym_1_vec_density
#define IACT_NAME "test_nonsym_density"
#define NUM_VEC_PROC_INT 1
#endif
James Willis's avatar
James Willis committed
45
46
47
48
49
50
51
52
53
54
55
56

/**
 * @brief Constructs an array of particles in a valid state prior to
 * a IACT_NONSYM and IACT_NONSYM_VEC call.
 *
 * @param count No. of particles to create
 * @param offset The position of the particle offset from (0,0,0).
 * @param spacing Particle spacing.
 * @param h The smoothing length of the particles in units of the inter-particle
 *separation.
 * @param partId The running counter of IDs.
 */
James Willis's avatar
James Willis committed
57
58
struct part *make_particles(size_t count, double *offset, double spacing,
                            double h, long long *partId) {
Matthieu Schaller's avatar
Matthieu Schaller committed
59

James Willis's avatar
James Willis committed
60
61
62
63
64
  struct part *particles;
  if (posix_memalign((void **)&particles, part_align,
                     count * sizeof(struct part)) != 0) {
    error("couldn't allocate particles, no. of particles: %d", (int)count);
  }
James Willis's avatar
James Willis committed
65
  bzero(particles, count * sizeof(struct part));
Matthieu Schaller's avatar
Matthieu Schaller committed
66

James Willis's avatar
James Willis committed
67
68
  /* Construct the particles */
  struct part *p;
69
70
71
72
73
74
75
76
77
78
79

  /* Set test particle at centre of unit sphere. */
  p = &particles[0];

  /* Place the test particle at the centre of a unit sphere. */
  p->x[0] = 0.0f;
  p->x[1] = 0.0f;
  p->x[2] = 0.0f;

  p->h = h;
  p->id = ++(*partId);
80

81
#if !defined(GIZMO_MFV_SPH) && !defined(SHADOWFAX_SPH)
82
  p->mass = 1.0f;
83
#endif
84
85
86
87

  /* Place rest of particles around the test particle
   * with random position within a unit sphere. */
  for (size_t i = 1; i < count; ++i) {
James Willis's avatar
James Willis committed
88
    p = &particles[i];
89
90
91
92
93

    /* Randomise positions within a unit sphere. */
    p->x[0] = random_uniform(-1.0, 1.0);
    p->x[1] = random_uniform(-1.0, 1.0);
    p->x[2] = random_uniform(-1.0, 1.0);
James Willis's avatar
James Willis committed
94
95
96
97
98

    /* Randomise velocities. */
    p->v[0] = random_uniform(-0.05, 0.05);
    p->v[1] = random_uniform(-0.05, 0.05);
    p->v[2] = random_uniform(-0.05, 0.05);
Matthieu Schaller's avatar
Matthieu Schaller committed
99

James Willis's avatar
James Willis committed
100
101
    p->h = h;
    p->id = ++(*partId);
102
#if !defined(GIZMO_SPH) && !defined(SHADOWFAX_SPH)
James Willis's avatar
James Willis committed
103
    p->mass = 1.0f;
104
#endif
James Willis's avatar
James Willis committed
105
106
107
108
109
110
111
  }
  return particles;
}

/**
 * @brief Populates particle properties needed for the force calculation.
 */
112
void prepare_force(struct part *parts, size_t count) {
Matthieu Schaller's avatar
Matthieu Schaller committed
113

Josh Borrow's avatar
Josh Borrow committed
114
115
116
#if !defined(GIZMO_MFV_SPH) && !defined(SHADOWFAX_SPH) &&            \
    !defined(MINIMAL_SPH) && !defined(PLANETARY_SPH) &&              \
    !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) && \
Josh Borrow's avatar
Josh Borrow committed
117
    !defined(ANARCHY_PU_SPH) && !defined(SPHENIX_SPH) && !defined(DEFAULT_SPH)
James Willis's avatar
James Willis committed
118
  struct part *p;
119
  for (size_t i = 0; i < count; ++i) {
James Willis's avatar
James Willis committed
120
121
    p = &parts[i];
    p->rho = i + 1;
122
    p->force.balsara = random_uniform(0.0, 1.0);
James Willis's avatar
James Willis committed
123
    p->force.P_over_rho2 = i + 1;
124
125
126
    p->force.soundspeed = random_uniform(2.0, 3.0);
    p->force.v_sig = 0.0f;
    p->force.h_dt = 0.0f;
James Willis's avatar
James Willis committed
127
  }
128
#endif
James Willis's avatar
James Willis committed
129
130
131
132
133
134
135
136
}

/**
 * @brief Dumps all particle information to a file
 */
void dump_indv_particle_fields(char *fileName, struct part *p) {

  FILE *file = fopen(fileName, "a");
Matthieu Schaller's avatar
Matthieu Schaller committed
137
138

  fprintf(file,
James Willis's avatar
James Willis committed
139
140
141
142
          "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
          "%8.5f "
          "%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n",
          p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->h,
143
          hydro_get_comoving_density(p),
144
145
#if defined(MINIMAL_SPH) || defined(PLANETARY_SPH) || \
    defined(SHADOWFAX_SPH) || defined(DEFAULT_SPH)
James Willis's avatar
James Willis committed
146
          0.f,
James Willis's avatar
James Willis committed
147
#else
James Willis's avatar
James Willis committed
148
          p->density.div_v,
149
#endif
150
151
          hydro_get_drifted_comoving_entropy(p),
          hydro_get_drifted_comoving_internal_energy(p),
152
153
          hydro_get_comoving_pressure(p), hydro_get_comoving_soundspeed(p),
          p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->force.h_dt,
James Willis's avatar
James Willis committed
154
#if defined(GADGET2_SPH)
James Willis's avatar
James Willis committed
155
          p->force.v_sig, p->entropy_dt, 0.f
James Willis's avatar
James Willis committed
156
#elif defined(DEFAULT_SPH)
James Willis's avatar
James Willis committed
157
          p->force.v_sig, 0.f, p->force.u_dt
158
159
#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) ||           \
    defined(HOPKINS_PU_SPH_MONAGHAN) || defined(ANARCHY_PU_SPH) || \
Josh Borrow's avatar
Josh Borrow committed
160
    defined(SPHENIX_SPH) || defined(DEFAULT_SPH)
James Willis's avatar
James Willis committed
161
          p->force.v_sig, 0.f, p->u_dt
162
#else
James Willis's avatar
James Willis committed
163
          0.f, 0.f, 0.f
James Willis's avatar
James Willis committed
164
#endif
165
  );
166

James Willis's avatar
James Willis committed
167
168
169
170
171
172
173
174
175
  fclose(file);
}

/**
 * @brief Creates a header for the output file
 */
void write_header(char *fileName) {

  FILE *file = fopen(fileName, "w");
Matthieu Schaller's avatar
Matthieu Schaller committed
176
177
  /* Write header */
  fprintf(file,
James Willis's avatar
James Willis committed
178
179
180
181
182
          "# %4s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %13s %13s "
          "%13s %13s %13s %8s %8s\n",
          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "h", "rho",
          "div_v", "S", "u", "P", "c", "a_x", "a_y", "a_z", "h_dt", "v_sig",
          "dS/dt", "du/dt");
James Willis's avatar
James Willis committed
183

Matthieu Schaller's avatar
Matthieu Schaller committed
184
  fclose(file);
James Willis's avatar
James Willis committed
185
186
}

187
/**
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
 * @brief Compares the vectorised result against
 * the serial result of the interaction.
 *
 * @param serial_test_part Particle that has been updated serially
 * @param serial_parts Particle array that has been interacted serially
 * @param vec_test_part Particle that has been updated using vectors
 * @param vec_parts Particle array to be interacted using vectors
 * @param count No. of particles that have been interacted
 *
 * @return Non-zero value if difference found, 0 otherwise
 */
int check_results(struct part serial_test_part, struct part *serial_parts,
                  struct part vec_test_part, struct part *vec_parts,
                  int count) {
  int result = 0;
203
  result += compare_particles(&serial_test_part, &vec_test_part, ACC_THRESHOLD);
204
205

  for (int i = 0; i < count; i++)
206
    result += compare_particles(&serial_parts[i], &vec_parts[i], ACC_THRESHOLD);
207
208
209
210
211

  return result;
}

/*
212
213
 * @brief Calls the serial and vectorised version of the non-symmetrical density
 * interaction.
James Willis's avatar
James Willis committed
214
 *
215
 * @param test_part Particle that will be updated
James Willis's avatar
James Willis committed
216
217
 * @param parts Particle array to be interacted
 * @param count No. of particles to be interacted
218
219
220
 * @param serial_inter_func Serial interaction function to be called
 * @param vec_inter_func Vectorised interaction function to be called
 * @param runs No. of times to call interactions
221
 * @param num_vec_proc No. of vectors to use to process interaction
Matthieu Schaller's avatar
Matthieu Schaller committed
222
 *
James Willis's avatar
James Willis committed
223
 */
224
void test_interactions(struct part test_part, struct part *parts, size_t count,
225
                       char *filePrefix, int runs, int num_vec_proc) {
Matthieu Schaller's avatar
Matthieu Schaller committed
226

227
228
  ticks serial_time = 0;
  ticks vec_time = 0;
James Willis's avatar
James Willis committed
229

230
231
232
  const float a = 1.f;
  const float H = 0.f;

James Willis's avatar
James Willis committed
233
234
235
  char serial_filename[200] = "";
  char vec_filename[200] = "";

Matthieu Schaller's avatar
Matthieu Schaller committed
236
237
  strcpy(serial_filename, filePrefix);
  strcpy(vec_filename, filePrefix);
James Willis's avatar
James Willis committed
238
  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
James Willis's avatar
James Willis committed
239
  sprintf(vec_filename + strlen(vec_filename), "_%d_vec.dat", num_vec_proc);
James Willis's avatar
James Willis committed
240
241

  write_header(serial_filename);
Matthieu Schaller's avatar
Matthieu Schaller committed
242
  write_header(vec_filename);
James Willis's avatar
James Willis committed
243

244
245
246
  struct part pi_serial, pi_vec;
  struct part pj_serial[count], pj_vec[count];

247
248
249
250
251
252
253
254
255
  float r2[count] __attribute__((aligned(array_align)));
  float dx[3 * count] __attribute__((aligned(array_align)));

  struct part *piq[count], *pjq[count];
  for (size_t k = 0; k < count; k++) {
    piq[k] = NULL;
    pjq[k] = NULL;
  }

256
257
  float r2q[count] __attribute__((aligned(array_align)));
  float hiq[count] __attribute__((aligned(array_align)));
258
259
260
261
262
263
264
265
266
267
268
  float dxq[count] __attribute__((aligned(array_align)));

  float dyq[count] __attribute__((aligned(array_align)));
  float dzq[count] __attribute__((aligned(array_align)));
  float mjq[count] __attribute__((aligned(array_align)));
  float vixq[count] __attribute__((aligned(array_align)));
  float viyq[count] __attribute__((aligned(array_align)));
  float vizq[count] __attribute__((aligned(array_align)));
  float vjxq[count] __attribute__((aligned(array_align)));
  float vjyq[count] __attribute__((aligned(array_align)));
  float vjzq[count] __attribute__((aligned(array_align)));
269
270

  /* Call serial interaction a set number of times. */
271
  for (int r = 0; r < runs; r++) {
272
273
    /* Reset particle to initial setup */
    pi_serial = test_part;
274
    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];
275
276

    /* Perform serial interaction */
277
    for (size_t i = 0; i < count; i++) {
278
      /* Compute the pairwise distance. */
279
280
281
282
283
      r2[i] = 0.0f;
      for (int k = 0; k < 3; k++) {
        int ind = (3 * i) + k;
        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
        r2[i] += dx[ind] * dx[ind];
284
      }
285
    }
286

287
288
289
290
291
292
293
    const ticks tic = getticks();
/* Perform serial interaction */
#ifdef __ICC
#pragma novector
#endif
    for (size_t i = 0; i < count; i++) {
      IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial,
294
           &pj_serial[i], a, H);
295
    }
296
    serial_time += getticks() - tic;
Matthieu Schaller's avatar
Matthieu Schaller committed
297
  }
James Willis's avatar
James Willis committed
298
299

  /* Dump result of serial interaction. */
300
301
302
303
304
  dump_indv_particle_fields(serial_filename, &pi_serial);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(serial_filename, &pj_serial[i]);

  /* Call vector interaction a set number of times. */
305
  for (int r = 0; r < runs; r++) {
306
307
    /* Reset particle to initial setup */
    pi_vec = test_part;
308
    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];
309
310

    /* Setup arrays for vector interaction. */
311
    for (size_t i = 0; i < count; i++) {
312
      /* Compute the pairwise distance. */
313
314
      float my_r2 = 0.0f;
      float my_dx[3];
315
      for (int k = 0; k < 3; k++) {
316
317
        my_dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
        my_r2 += my_dx[k] * my_dx[k];
318
319
      }

320
321
322
323
324
      r2q[i] = my_r2;
      dxq[i] = my_dx[0];
      dyq[i] = my_dx[1];
      dzq[i] = my_dx[2];

325
326
327
      hiq[i] = pi_vec.h;
      piq[i] = &pi_vec;
      pjq[i] = &pj_vec[i];
James Willis's avatar
James Willis committed
328

329
330
331
332
333
334
335
      mjq[i] = pj_vec[i].mass;
      vixq[i] = pi_vec.v[0];
      viyq[i] = pi_vec.v[1];
      vizq[i] = pi_vec.v[2];
      vjxq[i] = pj_vec[i].v[0];
      vjyq[i] = pj_vec[i].v[1];
      vjzq[i] = pj_vec[i].v[2];
336
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
337

James Willis's avatar
James Willis committed
338
    /* Perform vector interaction. */
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec;
    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
        curlvySum, curlvzSum;
    mask_t mask, mask2;

    rhoSum.v = vec_set1(0.f);
    rho_dhSum.v = vec_set1(0.f);
    wcountSum.v = vec_set1(0.f);
    wcount_dhSum.v = vec_set1(0.f);
    div_vSum.v = vec_set1(0.f);
    curlvxSum.v = vec_set1(0.f);
    curlvySum.v = vec_set1(0.f);
    curlvzSum.v = vec_set1(0.f);

    hi_vec.v = vec_load(&hiq[0]);
    vix_vec.v = vec_load(&vixq[0]);
    viy_vec.v = vec_load(&viyq[0]);
    viz_vec.v = vec_load(&vizq[0]);

    hi_inv_vec = vec_reciprocal(hi_vec);
James Willis's avatar
James Willis committed
359
360
    vec_init_mask_true(mask);
    vec_init_mask_true(mask2);
361

362
363
    const ticks vec_tic = getticks();

364
365
366
367
368
369
370
371
372
373
374
    for (size_t i = 0; i < count; i += num_vec_proc * VEC_SIZE) {

      /* Interleave two vectors for interaction. */
      if (num_vec_proc == 2) {
        runner_iact_nonsym_2_vec_density(
            &(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec), (vix_vec),
            (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(mjq[i]),
            &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, &div_vSum,
            &curlvxSum, &curlvySum, &curlvzSum, mask, mask2, 0);
      } else { /* Only use one vector for interaction. */

375
376
377
378
379
        vector my_r2, my_dx, my_dy, my_dz;
        my_r2.v = vec_load(&(r2q[i]));
        my_dx.v = vec_load(&(dxq[i]));
        my_dy.v = vec_load(&(dyq[i]));
        my_dz.v = vec_load(&(dzq[i]));
380
381

        runner_iact_nonsym_1_vec_density(
382
383
384
385
            &my_r2, &my_dx, &my_dy, &my_dz, (hi_inv_vec), (vix_vec), (viy_vec),
            (viz_vec), &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(mjq[i]), &rhoSum,
            &rho_dhSum, &wcountSum, &wcount_dhSum, &div_vSum, &curlvxSum,
            &curlvySum, &curlvzSum, mask);
386
      }
387
388
    }

389
390
391
392
393
394
395
396
397
    VEC_HADD(rhoSum, piq[0]->rho);
    VEC_HADD(rho_dhSum, piq[0]->density.rho_dh);
    VEC_HADD(wcountSum, piq[0]->density.wcount);
    VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh);
    VEC_HADD(div_vSum, piq[0]->density.div_v);
    VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]);
    VEC_HADD(curlvySum, piq[0]->density.rot_v[1]);
    VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]);

398
399
    vec_time += getticks() - vec_tic;
  }
Matthieu Schaller's avatar
Matthieu Schaller committed
400

401
  /* Dump result of serial interaction. */
Matthieu Schaller's avatar
Matthieu Schaller committed
402
  dump_indv_particle_fields(vec_filename, piq[0]);
403
  for (size_t i = 0; i < count; i++)
Matthieu Schaller's avatar
Matthieu Schaller committed
404
    dump_indv_particle_fields(vec_filename, pjq[i]);
James Willis's avatar
James Willis committed
405

406
407
408
409
410
411
412
  /* Check serial results against the vectorised results. */
  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
    message("Differences found...");

  message("The serial interactions took     : %15lli ticks.",
          serial_time / runs);
  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
413
  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
James Willis's avatar
James Willis committed
414
415
}

James Willis's avatar
James Willis committed
416
/*
James Willis's avatar
James Willis committed
417
 * @brief Calls the serial and vectorised version of the non-symmetrical force
James Willis's avatar
James Willis committed
418
419
420
421
422
423
424
425
426
427
 * interaction.
 *
 * @param test_part Particle that will be updated
 * @param parts Particle array to be interacted
 * @param count No. of particles to be interacted
 * @param serial_inter_func Serial interaction function to be called
 * @param vec_inter_func Vectorised interaction function to be called
 * @param runs No. of times to call interactions
 *
 */
James Willis's avatar
James Willis committed
428
429
430
void test_force_interactions(struct part test_part, struct part *parts,
                             size_t count, char *filePrefix, int runs,
                             int num_vec_proc) {
James Willis's avatar
James Willis committed
431
432
433
434
435
436
437
438

  ticks serial_time = 0;
  ticks vec_time = 0;

  FILE *file;
  char serial_filename[200] = "";
  char vec_filename[200] = "";

439
440
441
  const float a = 1.f;
  const float H = 0.f;

James Willis's avatar
James Willis committed
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
  strcpy(serial_filename, filePrefix);
  strcpy(vec_filename, filePrefix);
  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
  sprintf(vec_filename + strlen(vec_filename), "_%d_vec.dat", num_vec_proc);

  write_header(serial_filename);
  write_header(vec_filename);

  struct part pi_serial, pi_vec;
  struct part pj_serial[count], pj_vec[count];

  float r2[count] __attribute__((aligned(array_align)));
  float dx[3 * count] __attribute__((aligned(array_align)));

  struct part *piq[count], *pjq[count];
  for (size_t k = 0; k < count; k++) {
    piq[k] = NULL;
    pjq[k] = NULL;
  }

  float r2q[count] __attribute__((aligned(array_align)));
  float dxq[count] __attribute__((aligned(array_align)));
  float dyq[count] __attribute__((aligned(array_align)));
  float dzq[count] __attribute__((aligned(array_align)));
James Willis's avatar
James Willis committed
466

James Willis's avatar
James Willis committed
467
468
469
470
471
472
473
474
475
  float hiq[count] __attribute__((aligned(array_align)));
  float vixq[count] __attribute__((aligned(array_align)));
  float viyq[count] __attribute__((aligned(array_align)));
  float vizq[count] __attribute__((aligned(array_align)));
  float rhoiq[count] __attribute__((aligned(array_align)));
  float grad_hiq[count] __attribute__((aligned(array_align)));
  float pOrhoi2q[count] __attribute__((aligned(array_align)));
  float balsaraiq[count] __attribute__((aligned(array_align)));
  float ciq[count] __attribute__((aligned(array_align)));
James Willis's avatar
James Willis committed
476

James Willis's avatar
James Willis committed
477
478
479
480
481
482
483
484
485
486
487
488
  float hj_invq[count] __attribute__((aligned(array_align)));
  float mjq[count] __attribute__((aligned(array_align)));
  float vjxq[count] __attribute__((aligned(array_align)));
  float vjyq[count] __attribute__((aligned(array_align)));
  float vjzq[count] __attribute__((aligned(array_align)));
  float rhojq[count] __attribute__((aligned(array_align)));
  float grad_hjq[count] __attribute__((aligned(array_align)));
  float pOrhoj2q[count] __attribute__((aligned(array_align)));
  float balsarajq[count] __attribute__((aligned(array_align)));
  float cjq[count] __attribute__((aligned(array_align)));

  /* Call serial interaction a set number of times. */
489
  for (int r = 0; r < runs; r++) {
James Willis's avatar
James Willis committed
490
491
492
493
494
    /* Reset particle to initial setup */
    pi_serial = test_part;
    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];

    /* Only dump data on first run. */
495
    if (r == 0) {
James Willis's avatar
James Willis committed
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
      /* Dump state of particles before serial interaction. */
      dump_indv_particle_fields(serial_filename, &pi_serial);
      for (size_t i = 0; i < count; i++)
        dump_indv_particle_fields(serial_filename, &pj_serial[i]);
    }

    /* Perform serial interaction */
    for (size_t i = 0; i < count; i++) {
      /* Compute the pairwise distance. */
      r2[i] = 0.0f;
      for (int k = 0; k < 3; k++) {
        int ind = (3 * i) + k;
        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
        r2[i] += dx[ind] * dx[ind];
      }
    }

    const ticks tic = getticks();
/* Perform serial interaction */
#ifdef __ICC
#pragma novector
#endif
    for (size_t i = 0; i < count; i++) {
James Willis's avatar
James Willis committed
519
      runner_iact_nonsym_force(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h,
520
                               &pi_serial, &pj_serial[i], a, H);
James Willis's avatar
James Willis committed
521
522
523
524
525
526
527
528
529
530
531
532
533
534
    }
    serial_time += getticks() - tic;
  }

  file = fopen(serial_filename, "a");
  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
  fclose(file);

  /* Dump result of serial interaction. */
  dump_indv_particle_fields(serial_filename, &pi_serial);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(serial_filename, &pj_serial[i]);

  /* Call vector interaction a set number of times. */
535
  for (int r = 0; r < runs; r++) {
James Willis's avatar
James Willis committed
536
537
538
539
540
541
542
    /* Reset particle to initial setup */
    pi_vec = test_part;
    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];

    /* Setup arrays for vector interaction. */
    for (size_t i = 0; i < count; i++) {
      /* Compute the pairwise distance. */
543
544
      float my_r2 = 0.0f;
      float my_dx[3];
James Willis's avatar
James Willis committed
545
      for (int k = 0; k < 3; k++) {
546
547
        my_dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
        my_r2 += my_dx[k] * my_dx[k];
James Willis's avatar
James Willis committed
548
549
550
551
552
      }

      piq[i] = &pi_vec;
      pjq[i] = &pj_vec[i];

553
554
555
556
      r2q[i] = my_r2;
      dxq[i] = my_dx[0];
      dyq[i] = my_dx[1];
      dzq[i] = my_dx[2];
James Willis's avatar
James Willis committed
557

James Willis's avatar
James Willis committed
558
559
560
561
562
563
      hiq[i] = pi_vec.h;
      vixq[i] = pi_vec.v[0];
      viyq[i] = pi_vec.v[1];
      vizq[i] = pi_vec.v[2];
      rhoiq[i] = pi_vec.rho;
      grad_hiq[i] = pi_vec.force.f;
Josh Borrow's avatar
Josh Borrow committed
564
#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) && \
Josh Borrow's avatar
Josh Borrow committed
565
    !defined(ANARCHY_PU_SPH) && !defined(SPHENIX_SPH)
James Willis's avatar
James Willis committed
566
      pOrhoi2q[i] = pi_vec.force.P_over_rho2;
567
#endif
James Willis's avatar
James Willis committed
568
569
      balsaraiq[i] = pi_vec.force.balsara;
      ciq[i] = pi_vec.force.soundspeed;
James Willis's avatar
James Willis committed
570

James Willis's avatar
James Willis committed
571
572
573
574
575
576
577
      hj_invq[i] = 1.f / pj_vec[i].h;
      mjq[i] = pj_vec[i].mass;
      vjxq[i] = pj_vec[i].v[0];
      vjyq[i] = pj_vec[i].v[1];
      vjzq[i] = pj_vec[i].v[2];
      rhojq[i] = pj_vec[i].rho;
      grad_hjq[i] = pj_vec[i].force.f;
Josh Borrow's avatar
Josh Borrow committed
578
#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) && \
Josh Borrow's avatar
Josh Borrow committed
579
    !defined(ANARCHY_PU_SPH) && !defined(SPHENIX_SPH)
James Willis's avatar
James Willis committed
580
      pOrhoj2q[i] = pj_vec[i].force.P_over_rho2;
581
#endif
James Willis's avatar
James Willis committed
582
583
584
585
586
      balsarajq[i] = pj_vec[i].force.balsara;
      cjq[i] = pj_vec[i].force.soundspeed;
    }

    /* Only dump data on first run. */
587
    if (r == 0) {
James Willis's avatar
James Willis committed
588
589
590
591
592
593
      /* Dump state of particles before vector interaction. */
      dump_indv_particle_fields(vec_filename, piq[0]);
      for (size_t i = 0; i < count; i++)
        dump_indv_particle_fields(vec_filename, pjq[i]);
    }

James Willis's avatar
James Willis committed
594
595
596
597
598
    /* Perform vector interaction. */
    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, rhoi_vec, grad_hi_vec,
        pOrhoi2_vec, balsara_i_vec, ci_vec;
    vector a_hydro_xSum, a_hydro_ySum, a_hydro_zSum, h_dtSum, v_sigSum,
        entropy_dtSum;
James Willis's avatar
James Willis committed
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617

    a_hydro_xSum.v = vec_setzero();
    a_hydro_ySum.v = vec_setzero();
    a_hydro_zSum.v = vec_setzero();
    h_dtSum.v = vec_setzero();
    v_sigSum.v = vec_setzero();
    entropy_dtSum.v = vec_setzero();

    hi_vec.v = vec_load(&hiq[0]);
    vix_vec.v = vec_load(&vixq[0]);
    viy_vec.v = vec_load(&viyq[0]);
    viz_vec.v = vec_load(&vizq[0]);
    rhoi_vec.v = vec_load(&rhoiq[0]);
    grad_hi_vec.v = vec_load(&grad_hiq[0]);
    pOrhoi2_vec.v = vec_load(&pOrhoi2q[0]);
    balsara_i_vec.v = vec_load(&balsaraiq[0]);
    ci_vec.v = vec_load(&ciq[0]);

    hi_inv_vec = vec_reciprocal(hi_vec);
James Willis's avatar
James Willis committed
618

James Willis's avatar
James Willis committed
619
620
621
    mask_t mask, mask2;
    vec_init_mask_true(mask);
    vec_init_mask_true(mask2);
James Willis's avatar
James Willis committed
622

James Willis's avatar
James Willis committed
623
624
625
626
627
    const ticks vec_tic = getticks();

    for (size_t i = 0; i < count; i += num_vec_proc * VEC_SIZE) {

      if (num_vec_proc == 2) {
James Willis's avatar
James Willis committed
628
629
630
631
632
        runner_iact_nonsym_2_vec_force(
            &(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (vix_vec), (viy_vec),
            (viz_vec), rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec,
            ci_vec, &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(rhojq[i]),
            &(grad_hjq[i]), &(pOrhoj2q[i]), &(balsarajq[i]), &(cjq[i]),
633
634
635
            &(mjq[i]), hi_inv_vec, &(hj_invq[i]), a, H, &a_hydro_xSum,
            &a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum,
            mask, mask2, 0);
James Willis's avatar
James Willis committed
636
637
      } else { /* Only use one vector for interaction. */

638
639
640
641
642
        vector my_r2, my_dx, my_dy, my_dz, hj, hj_inv;
        my_r2.v = vec_load(&(r2q[i]));
        my_dx.v = vec_load(&(dxq[i]));
        my_dy.v = vec_load(&(dyq[i]));
        my_dz.v = vec_load(&(dzq[i]));
643
644
        hj.v = vec_load(&hj_invq[i]);
        hj_inv = vec_reciprocal(hj);
James Willis's avatar
James Willis committed
645

James Willis's avatar
James Willis committed
646
        runner_iact_nonsym_1_vec_force(
647
            &my_r2, &my_dx, &my_dy, &my_dz, vix_vec, viy_vec, viz_vec, rhoi_vec,
James Willis's avatar
James Willis committed
648
649
            grad_hi_vec, pOrhoi2_vec, balsara_i_vec, ci_vec, &(vjxq[i]),
            &(vjyq[i]), &(vjzq[i]), &(rhojq[i]), &(grad_hjq[i]), &(pOrhoj2q[i]),
650
            &(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, hj_inv, a, H,
James Willis's avatar
James Willis committed
651
652
            &a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum,
            &entropy_dtSum, mask);
James Willis's avatar
James Willis committed
653
654
655
656
657
658
659
660
      }
    }

    VEC_HADD(a_hydro_xSum, piq[0]->a_hydro[0]);
    VEC_HADD(a_hydro_ySum, piq[0]->a_hydro[1]);
    VEC_HADD(a_hydro_zSum, piq[0]->a_hydro[2]);
    VEC_HADD(h_dtSum, piq[0]->force.h_dt);
    VEC_HMAX(v_sigSum, piq[0]->force.v_sig);
Josh Borrow's avatar
Josh Borrow committed
661
#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) && \
Josh Borrow's avatar
Josh Borrow committed
662
    !defined(ANARCHY_PU_SPH) && !defined(SPHENIX_SPH)
James Willis's avatar
James Willis committed
663
    VEC_HADD(entropy_dtSum, piq[0]->entropy_dt);
664
#endif
James Willis's avatar
James Willis committed
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687

    vec_time += getticks() - vec_tic;
  }

  file = fopen(vec_filename, "a");
  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
  fclose(file);

  /* Dump result of serial interaction. */
  dump_indv_particle_fields(vec_filename, piq[0]);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(vec_filename, pjq[i]);

  /* Check serial results against the vectorised results. */
  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
    message("Differences found...");

  message("The serial interactions took     : %15lli ticks.",
          serial_time / runs);
  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
}

James Willis's avatar
James Willis committed
688
689
/* And go... */
int main(int argc, char *argv[]) {
690
691
  size_t runs = 10000;
  double h = 1.0, spacing = 0.5;
Matthieu Schaller's avatar
Matthieu Schaller committed
692
  double offset[3] = {0.0, 0.0, 0.0};
693
  size_t count = 256;
James Willis's avatar
James Willis committed
694
695
696
697
698

  /* Get some randomness going */
  srand(0);

  char c;
699
  while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) {
James Willis's avatar
James Willis committed
700
701
702
703
704
705
    switch (c) {
      case 'h':
        sscanf(optarg, "%lf", &h);
        break;
      case 's':
        sscanf(optarg, "%lf", &spacing);
706
        break;
707
      case 'n':
708
        sscanf(optarg, "%zu", &count);
709
710
711
        break;
      case 'r':
        sscanf(optarg, "%zu", &runs);
James Willis's avatar
James Willis committed
712
713
714
715
716
717
718
719
720
721
722
        break;
      case '?':
        error("Unknown option.");
        break;
    }
  }

  if (h < 0 || spacing < 0) {
    printf(
        "\nUsage: %s [OPTIONS...]\n"
        "\nGenerates a particle array with equal particle separation."
Matthieu Schaller's avatar
Matthieu Schaller committed
723
724
        "\nThese are then interacted using runner_iact_density and "
        "runner_iact_vec_density."
James Willis's avatar
James Willis committed
725
726
        "\n\nOptions:"
        "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
727
728
        "\n-s SPACING=0.5     - Spacing between particles"
        "\n-n NUMBER=9        - No. of particles",
James Willis's avatar
James Willis committed
729
730
731
732
        argv[0]);
    exit(1);
  }

733
734
735
736
  /* Correct count so that VEC_SIZE of particles interact with the test
   * particle. */
  count = count - (count % VEC_SIZE) + 1;

James Willis's avatar
James Willis committed
737
738
  /* Build the infrastructure */
  static long long partId = 0;
739
740
741
742
  struct part test_particle;
  struct part *particles = make_particles(count, offset, spacing, h, &partId);

  test_particle = particles[0];
James Willis's avatar
James Willis committed
743
  /* Call the non-sym density test. */
744
745
746
747
748
  message("Testing %s interaction...", IACT_NAME);
  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs,
                    1);
  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs,
                    2);
James Willis's avatar
James Willis committed
749

James Willis's avatar
James Willis committed
750
751
  prepare_force(particles, count);

James Willis's avatar
James Willis committed
752
753
754
755
756
  test_force_interactions(test_particle, &particles[1], count - 1,
                          "test_nonsym_force", runs, 1);
  test_force_interactions(test_particle, &particles[1], count - 1,
                          "test_nonsym_force", runs, 2);

James Willis's avatar
James Willis committed
757
758
  return 0;
}
759
760
761

#else

762
int main(int argc, char *argv[]) { return 1; }
763
764

#endif