testInteractions.c 24 KB
Newer Older
James Willis's avatar
James Willis committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include <fenv.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "swift.h"

27
28
#ifdef WITH_VECTORIZATION

29
30
31
#define array_align sizeof(float) * VEC_SIZE
#define ACC_THRESHOLD 1e-5

32
33
34
35
36
37
#ifndef IACT
#define IACT runner_iact_nonsym_density
#define IACT_VEC runner_iact_nonsym_1_vec_density
#define IACT_NAME "test_nonsym_density"
#define NUM_VEC_PROC_INT 1
#endif
James Willis's avatar
James Willis committed
38
39
40
41
42
43
44
45
46
47
48
49

/**
 * @brief Constructs an array of particles in a valid state prior to
 * a IACT_NONSYM and IACT_NONSYM_VEC call.
 *
 * @param count No. of particles to create
 * @param offset The position of the particle offset from (0,0,0).
 * @param spacing Particle spacing.
 * @param h The smoothing length of the particles in units of the inter-particle
 *separation.
 * @param partId The running counter of IDs.
 */
James Willis's avatar
James Willis committed
50
51
struct part *make_particles(size_t count, double *offset, double spacing,
                            double h, long long *partId) {
Matthieu Schaller's avatar
Matthieu Schaller committed
52

James Willis's avatar
James Willis committed
53
54
55
56
57
  struct part *particles;
  if (posix_memalign((void **)&particles, part_align,
                     count * sizeof(struct part)) != 0) {
    error("couldn't allocate particles, no. of particles: %d", (int)count);
  }
James Willis's avatar
James Willis committed
58
  bzero(particles, count * sizeof(struct part));
Matthieu Schaller's avatar
Matthieu Schaller committed
59

James Willis's avatar
James Willis committed
60
61
  /* Construct the particles */
  struct part *p;
62
63
64
65
66
67
68
69
70
71
72

  /* Set test particle at centre of unit sphere. */
  p = &particles[0];

  /* Place the test particle at the centre of a unit sphere. */
  p->x[0] = 0.0f;
  p->x[1] = 0.0f;
  p->x[2] = 0.0f;

  p->h = h;
  p->id = ++(*partId);
73
74

#if !defined(GIZMO_SPH) && !defined(SHADOWFAX_SPH)
75
  p->mass = 1.0f;
76
#endif
77
78
79
80

  /* Place rest of particles around the test particle
   * with random position within a unit sphere. */
  for (size_t i = 1; i < count; ++i) {
James Willis's avatar
James Willis committed
81
    p = &particles[i];
82
83
84
85
86

    /* Randomise positions within a unit sphere. */
    p->x[0] = random_uniform(-1.0, 1.0);
    p->x[1] = random_uniform(-1.0, 1.0);
    p->x[2] = random_uniform(-1.0, 1.0);
James Willis's avatar
James Willis committed
87
88
89
90
91

    /* Randomise velocities. */
    p->v[0] = random_uniform(-0.05, 0.05);
    p->v[1] = random_uniform(-0.05, 0.05);
    p->v[2] = random_uniform(-0.05, 0.05);
Matthieu Schaller's avatar
Matthieu Schaller committed
92

James Willis's avatar
James Willis committed
93
94
    p->h = h;
    p->id = ++(*partId);
95
#if !defined(GIZMO_SPH) && !defined(SHADOWFAX_SPH)
James Willis's avatar
James Willis committed
96
    p->mass = 1.0f;
97
#endif
James Willis's avatar
James Willis committed
98
99
100
101
102
103
104
  }
  return particles;
}

/**
 * @brief Populates particle properties needed for the force calculation.
 */
105
void prepare_force(struct part *parts, size_t count) {
Matthieu Schaller's avatar
Matthieu Schaller committed
106

107
#if !defined(GIZMO_SPH) && !defined(SHADOWFAX_SPH) && !defined(MINIMAL_SPH)
James Willis's avatar
James Willis committed
108
  struct part *p;
109
  for (size_t i = 0; i < count; ++i) {
James Willis's avatar
James Willis committed
110
111
    p = &parts[i];
    p->rho = i + 1;
112
    p->force.balsara = random_uniform(0.0, 1.0);
James Willis's avatar
James Willis committed
113
    p->force.P_over_rho2 = i + 1;
114
115
116
    p->force.soundspeed = random_uniform(2.0, 3.0);
    p->force.v_sig = 0.0f;
    p->force.h_dt = 0.0f;
James Willis's avatar
James Willis committed
117
  }
118
#endif
James Willis's avatar
James Willis committed
119
120
121
122
123
124
125
126
}

/**
 * @brief Dumps all particle information to a file
 */
void dump_indv_particle_fields(char *fileName, struct part *p) {

  FILE *file = fopen(fileName, "a");
Matthieu Schaller's avatar
Matthieu Schaller committed
127
128

  fprintf(file,
James Willis's avatar
James Willis committed
129
130
131
132
          "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
          "%8.5f "
          "%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n",
          p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->h,
133
          hydro_get_comoving_density(p),
James Willis's avatar
James Willis committed
134
#if defined(MINIMAL_SPH) || defined(SHADOWFAX_SPH)
James Willis's avatar
James Willis committed
135
          0.f,
James Willis's avatar
James Willis committed
136
#else
James Willis's avatar
James Willis committed
137
          p->density.div_v,
138
#endif
139
          hydro_get_comoving_entropy(p), hydro_get_comoving_internal_energy(p),
140
141
          hydro_get_comoving_pressure(p), hydro_get_comoving_soundspeed(p),
          p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->force.h_dt,
James Willis's avatar
James Willis committed
142
#if defined(GADGET2_SPH)
James Willis's avatar
James Willis committed
143
          p->force.v_sig, p->entropy_dt, 0.f
James Willis's avatar
James Willis committed
144
#elif defined(DEFAULT_SPH)
James Willis's avatar
James Willis committed
145
          p->force.v_sig, 0.f, p->force.u_dt
James Willis's avatar
James Willis committed
146
#elif defined(MINIMAL_SPH)
James Willis's avatar
James Willis committed
147
          p->force.v_sig, 0.f, p->u_dt
148
#else
James Willis's avatar
James Willis committed
149
          0.f, 0.f, 0.f
James Willis's avatar
James Willis committed
150
#endif
James Willis's avatar
James Willis committed
151
          );
152

James Willis's avatar
James Willis committed
153
154
155
156
157
158
159
160
161
  fclose(file);
}

/**
 * @brief Creates a header for the output file
 */
void write_header(char *fileName) {

  FILE *file = fopen(fileName, "w");
Matthieu Schaller's avatar
Matthieu Schaller committed
162
163
  /* Write header */
  fprintf(file,
James Willis's avatar
James Willis committed
164
165
166
167
168
          "# %4s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %13s %13s "
          "%13s %13s %13s %8s %8s\n",
          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "h", "rho",
          "div_v", "S", "u", "P", "c", "a_x", "a_y", "a_z", "h_dt", "v_sig",
          "dS/dt", "du/dt");
James Willis's avatar
James Willis committed
169

Matthieu Schaller's avatar
Matthieu Schaller committed
170
  fclose(file);
James Willis's avatar
James Willis committed
171
172
}

173
/**
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
 * @brief Compares the vectorised result against
 * the serial result of the interaction.
 *
 * @param serial_test_part Particle that has been updated serially
 * @param serial_parts Particle array that has been interacted serially
 * @param vec_test_part Particle that has been updated using vectors
 * @param vec_parts Particle array to be interacted using vectors
 * @param count No. of particles that have been interacted
 *
 * @return Non-zero value if difference found, 0 otherwise
 */
int check_results(struct part serial_test_part, struct part *serial_parts,
                  struct part vec_test_part, struct part *vec_parts,
                  int count) {
  int result = 0;
  result += compare_particles(serial_test_part, vec_test_part, ACC_THRESHOLD);

  for (int i = 0; i < count; i++)
    result += compare_particles(serial_parts[i], vec_parts[i], ACC_THRESHOLD);

  return result;
}

/*
198
199
 * @brief Calls the serial and vectorised version of the non-symmetrical density
 * interaction.
James Willis's avatar
James Willis committed
200
 *
201
 * @param test_part Particle that will be updated
James Willis's avatar
James Willis committed
202
203
 * @param parts Particle array to be interacted
 * @param count No. of particles to be interacted
204
205
206
 * @param serial_inter_func Serial interaction function to be called
 * @param vec_inter_func Vectorised interaction function to be called
 * @param runs No. of times to call interactions
207
 * @param num_vec_proc No. of vectors to use to process interaction
Matthieu Schaller's avatar
Matthieu Schaller committed
208
 *
James Willis's avatar
James Willis committed
209
 */
210
void test_interactions(struct part test_part, struct part *parts, size_t count,
211
                       char *filePrefix, int runs, int num_vec_proc) {
Matthieu Schaller's avatar
Matthieu Schaller committed
212

213
214
  ticks serial_time = 0;
  ticks vec_time = 0;
James Willis's avatar
James Willis committed
215

216
217
218
  const float a = 1.f;
  const float H = 0.f;

James Willis's avatar
James Willis committed
219
220
221
  char serial_filename[200] = "";
  char vec_filename[200] = "";

Matthieu Schaller's avatar
Matthieu Schaller committed
222
223
  strcpy(serial_filename, filePrefix);
  strcpy(vec_filename, filePrefix);
James Willis's avatar
James Willis committed
224
  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
James Willis's avatar
James Willis committed
225
  sprintf(vec_filename + strlen(vec_filename), "_%d_vec.dat", num_vec_proc);
James Willis's avatar
James Willis committed
226
227

  write_header(serial_filename);
Matthieu Schaller's avatar
Matthieu Schaller committed
228
  write_header(vec_filename);
James Willis's avatar
James Willis committed
229

230
231
232
  struct part pi_serial, pi_vec;
  struct part pj_serial[count], pj_vec[count];

233
234
235
236
237
238
239
240
241
  float r2[count] __attribute__((aligned(array_align)));
  float dx[3 * count] __attribute__((aligned(array_align)));

  struct part *piq[count], *pjq[count];
  for (size_t k = 0; k < count; k++) {
    piq[k] = NULL;
    pjq[k] = NULL;
  }

242
243
  float r2q[count] __attribute__((aligned(array_align)));
  float hiq[count] __attribute__((aligned(array_align)));
244
245
246
247
248
249
250
251
252
253
254
  float dxq[count] __attribute__((aligned(array_align)));

  float dyq[count] __attribute__((aligned(array_align)));
  float dzq[count] __attribute__((aligned(array_align)));
  float mjq[count] __attribute__((aligned(array_align)));
  float vixq[count] __attribute__((aligned(array_align)));
  float viyq[count] __attribute__((aligned(array_align)));
  float vizq[count] __attribute__((aligned(array_align)));
  float vjxq[count] __attribute__((aligned(array_align)));
  float vjyq[count] __attribute__((aligned(array_align)));
  float vjzq[count] __attribute__((aligned(array_align)));
255
256

  /* Call serial interaction a set number of times. */
257
  for (int r = 0; r < runs; r++) {
258
259
    /* Reset particle to initial setup */
    pi_serial = test_part;
260
    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];
261
262

    /* Perform serial interaction */
263
    for (size_t i = 0; i < count; i++) {
264
      /* Compute the pairwise distance. */
265
266
267
268
269
      r2[i] = 0.0f;
      for (int k = 0; k < 3; k++) {
        int ind = (3 * i) + k;
        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
        r2[i] += dx[ind] * dx[ind];
270
      }
271
    }
272

273
274
275
276
277
278
279
    const ticks tic = getticks();
/* Perform serial interaction */
#ifdef __ICC
#pragma novector
#endif
    for (size_t i = 0; i < count; i++) {
      IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial,
280
           &pj_serial[i], a, H);
281
    }
282
    serial_time += getticks() - tic;
Matthieu Schaller's avatar
Matthieu Schaller committed
283
  }
James Willis's avatar
James Willis committed
284
285

  /* Dump result of serial interaction. */
286
287
288
289
290
  dump_indv_particle_fields(serial_filename, &pi_serial);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(serial_filename, &pj_serial[i]);

  /* Call vector interaction a set number of times. */
291
  for (int r = 0; r < runs; r++) {
292
293
    /* Reset particle to initial setup */
    pi_vec = test_part;
294
    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];
295
296

    /* Setup arrays for vector interaction. */
297
    for (size_t i = 0; i < count; i++) {
298
      /* Compute the pairwise distance. */
299
300
      float my_r2 = 0.0f;
      float my_dx[3];
301
      for (int k = 0; k < 3; k++) {
302
303
        my_dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
        my_r2 += my_dx[k] * my_dx[k];
304
305
      }

306
307
308
309
310
      r2q[i] = my_r2;
      dxq[i] = my_dx[0];
      dyq[i] = my_dx[1];
      dzq[i] = my_dx[2];

311
312
313
      hiq[i] = pi_vec.h;
      piq[i] = &pi_vec;
      pjq[i] = &pj_vec[i];
James Willis's avatar
James Willis committed
314

315
316
317
318
319
320
321
      mjq[i] = pj_vec[i].mass;
      vixq[i] = pi_vec.v[0];
      viyq[i] = pi_vec.v[1];
      vizq[i] = pi_vec.v[2];
      vjxq[i] = pj_vec[i].v[0];
      vjyq[i] = pj_vec[i].v[1];
      vjzq[i] = pj_vec[i].v[2];
322
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
323

James Willis's avatar
James Willis committed
324
    /* Perform vector interaction. */
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec;
    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
        curlvySum, curlvzSum;
    mask_t mask, mask2;

    rhoSum.v = vec_set1(0.f);
    rho_dhSum.v = vec_set1(0.f);
    wcountSum.v = vec_set1(0.f);
    wcount_dhSum.v = vec_set1(0.f);
    div_vSum.v = vec_set1(0.f);
    curlvxSum.v = vec_set1(0.f);
    curlvySum.v = vec_set1(0.f);
    curlvzSum.v = vec_set1(0.f);

    hi_vec.v = vec_load(&hiq[0]);
    vix_vec.v = vec_load(&vixq[0]);
    viy_vec.v = vec_load(&viyq[0]);
    viz_vec.v = vec_load(&vizq[0]);

    hi_inv_vec = vec_reciprocal(hi_vec);
James Willis's avatar
James Willis committed
345
346
    vec_init_mask_true(mask);
    vec_init_mask_true(mask2);
347

348
349
    const ticks vec_tic = getticks();

350
351
352
353
354
355
356
357
358
359
360
    for (size_t i = 0; i < count; i += num_vec_proc * VEC_SIZE) {

      /* Interleave two vectors for interaction. */
      if (num_vec_proc == 2) {
        runner_iact_nonsym_2_vec_density(
            &(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec), (vix_vec),
            (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(mjq[i]),
            &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, &div_vSum,
            &curlvxSum, &curlvySum, &curlvzSum, mask, mask2, 0);
      } else { /* Only use one vector for interaction. */

361
362
363
364
365
        vector my_r2, my_dx, my_dy, my_dz;
        my_r2.v = vec_load(&(r2q[i]));
        my_dx.v = vec_load(&(dxq[i]));
        my_dy.v = vec_load(&(dyq[i]));
        my_dz.v = vec_load(&(dzq[i]));
366
367

        runner_iact_nonsym_1_vec_density(
368
369
370
371
            &my_r2, &my_dx, &my_dy, &my_dz, (hi_inv_vec), (vix_vec), (viy_vec),
            (viz_vec), &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(mjq[i]), &rhoSum,
            &rho_dhSum, &wcountSum, &wcount_dhSum, &div_vSum, &curlvxSum,
            &curlvySum, &curlvzSum, mask);
372
      }
373
374
    }

375
376
377
378
379
380
381
382
383
    VEC_HADD(rhoSum, piq[0]->rho);
    VEC_HADD(rho_dhSum, piq[0]->density.rho_dh);
    VEC_HADD(wcountSum, piq[0]->density.wcount);
    VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh);
    VEC_HADD(div_vSum, piq[0]->density.div_v);
    VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]);
    VEC_HADD(curlvySum, piq[0]->density.rot_v[1]);
    VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]);

384
385
    vec_time += getticks() - vec_tic;
  }
Matthieu Schaller's avatar
Matthieu Schaller committed
386

387
  /* Dump result of serial interaction. */
Matthieu Schaller's avatar
Matthieu Schaller committed
388
  dump_indv_particle_fields(vec_filename, piq[0]);
389
  for (size_t i = 0; i < count; i++)
Matthieu Schaller's avatar
Matthieu Schaller committed
390
    dump_indv_particle_fields(vec_filename, pjq[i]);
James Willis's avatar
James Willis committed
391

392
393
394
395
396
397
398
  /* Check serial results against the vectorised results. */
  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
    message("Differences found...");

  message("The serial interactions took     : %15lli ticks.",
          serial_time / runs);
  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
399
  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
James Willis's avatar
James Willis committed
400
401
}

James Willis's avatar
James Willis committed
402
/*
James Willis's avatar
James Willis committed
403
 * @brief Calls the serial and vectorised version of the non-symmetrical force
James Willis's avatar
James Willis committed
404
405
406
407
408
409
410
411
412
413
 * interaction.
 *
 * @param test_part Particle that will be updated
 * @param parts Particle array to be interacted
 * @param count No. of particles to be interacted
 * @param serial_inter_func Serial interaction function to be called
 * @param vec_inter_func Vectorised interaction function to be called
 * @param runs No. of times to call interactions
 *
 */
James Willis's avatar
James Willis committed
414
415
416
void test_force_interactions(struct part test_part, struct part *parts,
                             size_t count, char *filePrefix, int runs,
                             int num_vec_proc) {
James Willis's avatar
James Willis committed
417
418
419
420
421
422
423
424

  ticks serial_time = 0;
  ticks vec_time = 0;

  FILE *file;
  char serial_filename[200] = "";
  char vec_filename[200] = "";

425
426
427
  const float a = 1.f;
  const float H = 0.f;

James Willis's avatar
James Willis committed
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
  strcpy(serial_filename, filePrefix);
  strcpy(vec_filename, filePrefix);
  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
  sprintf(vec_filename + strlen(vec_filename), "_%d_vec.dat", num_vec_proc);

  write_header(serial_filename);
  write_header(vec_filename);

  struct part pi_serial, pi_vec;
  struct part pj_serial[count], pj_vec[count];

  float r2[count] __attribute__((aligned(array_align)));
  float dx[3 * count] __attribute__((aligned(array_align)));

  struct part *piq[count], *pjq[count];
  for (size_t k = 0; k < count; k++) {
    piq[k] = NULL;
    pjq[k] = NULL;
  }

  float r2q[count] __attribute__((aligned(array_align)));
  float dxq[count] __attribute__((aligned(array_align)));
  float dyq[count] __attribute__((aligned(array_align)));
  float dzq[count] __attribute__((aligned(array_align)));
James Willis's avatar
James Willis committed
452

James Willis's avatar
James Willis committed
453
454
455
456
457
458
459
460
461
  float hiq[count] __attribute__((aligned(array_align)));
  float vixq[count] __attribute__((aligned(array_align)));
  float viyq[count] __attribute__((aligned(array_align)));
  float vizq[count] __attribute__((aligned(array_align)));
  float rhoiq[count] __attribute__((aligned(array_align)));
  float grad_hiq[count] __attribute__((aligned(array_align)));
  float pOrhoi2q[count] __attribute__((aligned(array_align)));
  float balsaraiq[count] __attribute__((aligned(array_align)));
  float ciq[count] __attribute__((aligned(array_align)));
James Willis's avatar
James Willis committed
462

James Willis's avatar
James Willis committed
463
464
465
466
467
468
469
470
471
472
473
474
  float hj_invq[count] __attribute__((aligned(array_align)));
  float mjq[count] __attribute__((aligned(array_align)));
  float vjxq[count] __attribute__((aligned(array_align)));
  float vjyq[count] __attribute__((aligned(array_align)));
  float vjzq[count] __attribute__((aligned(array_align)));
  float rhojq[count] __attribute__((aligned(array_align)));
  float grad_hjq[count] __attribute__((aligned(array_align)));
  float pOrhoj2q[count] __attribute__((aligned(array_align)));
  float balsarajq[count] __attribute__((aligned(array_align)));
  float cjq[count] __attribute__((aligned(array_align)));

  /* Call serial interaction a set number of times. */
475
  for (int r = 0; r < runs; r++) {
James Willis's avatar
James Willis committed
476
477
478
479
480
    /* Reset particle to initial setup */
    pi_serial = test_part;
    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];

    /* Only dump data on first run. */
481
    if (r == 0) {
James Willis's avatar
James Willis committed
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
      /* Dump state of particles before serial interaction. */
      dump_indv_particle_fields(serial_filename, &pi_serial);
      for (size_t i = 0; i < count; i++)
        dump_indv_particle_fields(serial_filename, &pj_serial[i]);
    }

    /* Perform serial interaction */
    for (size_t i = 0; i < count; i++) {
      /* Compute the pairwise distance. */
      r2[i] = 0.0f;
      for (int k = 0; k < 3; k++) {
        int ind = (3 * i) + k;
        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
        r2[i] += dx[ind] * dx[ind];
      }
    }

    const ticks tic = getticks();
/* Perform serial interaction */
#ifdef __ICC
#pragma novector
#endif
    for (size_t i = 0; i < count; i++) {
James Willis's avatar
James Willis committed
505
      runner_iact_nonsym_force(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h,
506
                               &pi_serial, &pj_serial[i], a, H);
James Willis's avatar
James Willis committed
507
508
509
510
511
512
513
514
515
516
517
518
519
520
    }
    serial_time += getticks() - tic;
  }

  file = fopen(serial_filename, "a");
  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
  fclose(file);

  /* Dump result of serial interaction. */
  dump_indv_particle_fields(serial_filename, &pi_serial);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(serial_filename, &pj_serial[i]);

  /* Call vector interaction a set number of times. */
521
  for (int r = 0; r < runs; r++) {
James Willis's avatar
James Willis committed
522
523
524
525
526
527
528
    /* Reset particle to initial setup */
    pi_vec = test_part;
    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];

    /* Setup arrays for vector interaction. */
    for (size_t i = 0; i < count; i++) {
      /* Compute the pairwise distance. */
529
530
      float my_r2 = 0.0f;
      float my_dx[3];
James Willis's avatar
James Willis committed
531
      for (int k = 0; k < 3; k++) {
532
533
        my_dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
        my_r2 += my_dx[k] * my_dx[k];
James Willis's avatar
James Willis committed
534
535
536
537
538
      }

      piq[i] = &pi_vec;
      pjq[i] = &pj_vec[i];

539
540
541
542
      r2q[i] = my_r2;
      dxq[i] = my_dx[0];
      dyq[i] = my_dx[1];
      dzq[i] = my_dx[2];
James Willis's avatar
James Willis committed
543

James Willis's avatar
James Willis committed
544
545
546
547
548
549
550
551
552
      hiq[i] = pi_vec.h;
      vixq[i] = pi_vec.v[0];
      viyq[i] = pi_vec.v[1];
      vizq[i] = pi_vec.v[2];
      rhoiq[i] = pi_vec.rho;
      grad_hiq[i] = pi_vec.force.f;
      pOrhoi2q[i] = pi_vec.force.P_over_rho2;
      balsaraiq[i] = pi_vec.force.balsara;
      ciq[i] = pi_vec.force.soundspeed;
James Willis's avatar
James Willis committed
553

James Willis's avatar
James Willis committed
554
555
556
557
558
559
560
561
562
563
564
565
566
      hj_invq[i] = 1.f / pj_vec[i].h;
      mjq[i] = pj_vec[i].mass;
      vjxq[i] = pj_vec[i].v[0];
      vjyq[i] = pj_vec[i].v[1];
      vjzq[i] = pj_vec[i].v[2];
      rhojq[i] = pj_vec[i].rho;
      grad_hjq[i] = pj_vec[i].force.f;
      pOrhoj2q[i] = pj_vec[i].force.P_over_rho2;
      balsarajq[i] = pj_vec[i].force.balsara;
      cjq[i] = pj_vec[i].force.soundspeed;
    }

    /* Only dump data on first run. */
567
    if (r == 0) {
James Willis's avatar
James Willis committed
568
569
570
571
572
573
      /* Dump state of particles before vector interaction. */
      dump_indv_particle_fields(vec_filename, piq[0]);
      for (size_t i = 0; i < count; i++)
        dump_indv_particle_fields(vec_filename, pjq[i]);
    }

James Willis's avatar
James Willis committed
574
575
576
577
578
    /* Perform vector interaction. */
    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, rhoi_vec, grad_hi_vec,
        pOrhoi2_vec, balsara_i_vec, ci_vec;
    vector a_hydro_xSum, a_hydro_ySum, a_hydro_zSum, h_dtSum, v_sigSum,
        entropy_dtSum;
James Willis's avatar
James Willis committed
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597

    a_hydro_xSum.v = vec_setzero();
    a_hydro_ySum.v = vec_setzero();
    a_hydro_zSum.v = vec_setzero();
    h_dtSum.v = vec_setzero();
    v_sigSum.v = vec_setzero();
    entropy_dtSum.v = vec_setzero();

    hi_vec.v = vec_load(&hiq[0]);
    vix_vec.v = vec_load(&vixq[0]);
    viy_vec.v = vec_load(&viyq[0]);
    viz_vec.v = vec_load(&vizq[0]);
    rhoi_vec.v = vec_load(&rhoiq[0]);
    grad_hi_vec.v = vec_load(&grad_hiq[0]);
    pOrhoi2_vec.v = vec_load(&pOrhoi2q[0]);
    balsara_i_vec.v = vec_load(&balsaraiq[0]);
    ci_vec.v = vec_load(&ciq[0]);

    hi_inv_vec = vec_reciprocal(hi_vec);
James Willis's avatar
James Willis committed
598

James Willis's avatar
James Willis committed
599
600
601
    mask_t mask, mask2;
    vec_init_mask_true(mask);
    vec_init_mask_true(mask2);
James Willis's avatar
James Willis committed
602

James Willis's avatar
James Willis committed
603
604
605
606
607
    const ticks vec_tic = getticks();

    for (size_t i = 0; i < count; i += num_vec_proc * VEC_SIZE) {

      if (num_vec_proc == 2) {
James Willis's avatar
James Willis committed
608
609
610
611
612
613
614
        runner_iact_nonsym_2_vec_force(
            &(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (vix_vec), (viy_vec),
            (viz_vec), rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec,
            ci_vec, &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(rhojq[i]),
            &(grad_hjq[i]), &(pOrhoj2q[i]), &(balsarajq[i]), &(cjq[i]),
            &(mjq[i]), hi_inv_vec, &(hj_invq[i]), &a_hydro_xSum, &a_hydro_ySum,
            &a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum, mask, mask2, 0);
James Willis's avatar
James Willis committed
615
616
      } else { /* Only use one vector for interaction. */

617
618
619
620
621
        vector my_r2, my_dx, my_dy, my_dz, hj, hj_inv;
        my_r2.v = vec_load(&(r2q[i]));
        my_dx.v = vec_load(&(dxq[i]));
        my_dy.v = vec_load(&(dyq[i]));
        my_dz.v = vec_load(&(dzq[i]));
622
623
        hj.v = vec_load(&hj_invq[i]);
        hj_inv = vec_reciprocal(hj);
James Willis's avatar
James Willis committed
624

James Willis's avatar
James Willis committed
625
        runner_iact_nonsym_1_vec_force(
626
            &my_r2, &my_dx, &my_dy, &my_dz, vix_vec, viy_vec, viz_vec, rhoi_vec,
James Willis's avatar
James Willis committed
627
628
            grad_hi_vec, pOrhoi2_vec, balsara_i_vec, ci_vec, &(vjxq[i]),
            &(vjyq[i]), &(vjzq[i]), &(rhojq[i]), &(grad_hjq[i]), &(pOrhoj2q[i]),
629
            &(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, hj_inv,
James Willis's avatar
James Willis committed
630
631
            &a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum,
            &entropy_dtSum, mask);
James Willis's avatar
James Willis committed
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
      }
    }

    VEC_HADD(a_hydro_xSum, piq[0]->a_hydro[0]);
    VEC_HADD(a_hydro_ySum, piq[0]->a_hydro[1]);
    VEC_HADD(a_hydro_zSum, piq[0]->a_hydro[2]);
    VEC_HADD(h_dtSum, piq[0]->force.h_dt);
    VEC_HMAX(v_sigSum, piq[0]->force.v_sig);
    VEC_HADD(entropy_dtSum, piq[0]->entropy_dt);

    vec_time += getticks() - vec_tic;
  }

  file = fopen(vec_filename, "a");
  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
  fclose(file);

  /* Dump result of serial interaction. */
  dump_indv_particle_fields(vec_filename, piq[0]);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(vec_filename, pjq[i]);

  /* Check serial results against the vectorised results. */
  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
    message("Differences found...");

  message("The serial interactions took     : %15lli ticks.",
          serial_time / runs);
  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
}

James Willis's avatar
James Willis committed
664
665
/* And go... */
int main(int argc, char *argv[]) {
666
667
  size_t runs = 10000;
  double h = 1.0, spacing = 0.5;
Matthieu Schaller's avatar
Matthieu Schaller committed
668
  double offset[3] = {0.0, 0.0, 0.0};
669
  size_t count = 256;
James Willis's avatar
James Willis committed
670
671
672
673
674

  /* Get some randomness going */
  srand(0);

  char c;
675
  while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) {
James Willis's avatar
James Willis committed
676
677
678
679
680
681
    switch (c) {
      case 'h':
        sscanf(optarg, "%lf", &h);
        break;
      case 's':
        sscanf(optarg, "%lf", &spacing);
682
        break;
683
      case 'n':
684
        sscanf(optarg, "%zu", &count);
685
686
687
        break;
      case 'r':
        sscanf(optarg, "%zu", &runs);
James Willis's avatar
James Willis committed
688
689
690
691
692
693
694
695
696
697
698
        break;
      case '?':
        error("Unknown option.");
        break;
    }
  }

  if (h < 0 || spacing < 0) {
    printf(
        "\nUsage: %s [OPTIONS...]\n"
        "\nGenerates a particle array with equal particle separation."
Matthieu Schaller's avatar
Matthieu Schaller committed
699
700
        "\nThese are then interacted using runner_iact_density and "
        "runner_iact_vec_density."
James Willis's avatar
James Willis committed
701
702
        "\n\nOptions:"
        "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
703
704
        "\n-s SPACING=0.5     - Spacing between particles"
        "\n-n NUMBER=9        - No. of particles",
James Willis's avatar
James Willis committed
705
706
707
708
        argv[0]);
    exit(1);
  }

709
710
711
712
  /* Correct count so that VEC_SIZE of particles interact with the test
   * particle. */
  count = count - (count % VEC_SIZE) + 1;

James Willis's avatar
James Willis committed
713
714
  /* Build the infrastructure */
  static long long partId = 0;
715
716
717
718
  struct part test_particle;
  struct part *particles = make_particles(count, offset, spacing, h, &partId);

  test_particle = particles[0];
James Willis's avatar
James Willis committed
719
  /* Call the non-sym density test. */
720
721
722
723
724
  message("Testing %s interaction...", IACT_NAME);
  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs,
                    1);
  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs,
                    2);
James Willis's avatar
James Willis committed
725

James Willis's avatar
James Willis committed
726
727
  prepare_force(particles, count);

James Willis's avatar
James Willis committed
728
729
730
731
732
  test_force_interactions(test_particle, &particles[1], count - 1,
                          "test_nonsym_force", runs, 1);
  test_force_interactions(test_particle, &particles[1], count - 1,
                          "test_nonsym_force", runs, 2);

James Willis's avatar
James Willis committed
733
734
  return 0;
}
735
736
737

#else

James Willis's avatar
James Willis committed
738
int main() { return 1; }
739
740

#endif