testInteractions.c 25.1 KB
Newer Older
James Willis's avatar
James Willis committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
19
#include "../config.h"
James Willis's avatar
James Willis committed
20

21
/* Some standard headers. */
James Willis's avatar
James Willis committed
22 23 24 25 26
#include <fenv.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
27 28

/* Local includes */
James Willis's avatar
James Willis committed
29 30
#include "swift.h"

31 32 33
/* Other schemes need to be added here if they are not vectorized, otherwise
 * this test will simply not compile. */

Matthieu Schaller's avatar
Matthieu Schaller committed
34
#if defined(GADGET2_SPH) && defined(WITH_VECTORIZATION)
35

36 37 38
#define array_align sizeof(float) * VEC_SIZE
#define ACC_THRESHOLD 1e-5

39 40 41 42 43 44
#ifndef IACT
#define IACT runner_iact_nonsym_density
#define IACT_VEC runner_iact_nonsym_1_vec_density
#define IACT_NAME "test_nonsym_density"
#define NUM_VEC_PROC_INT 1
#endif
James Willis's avatar
James Willis committed
45 46 47 48 49 50 51 52 53 54 55 56

/**
 * @brief Constructs an array of particles in a valid state prior to
 * a IACT_NONSYM and IACT_NONSYM_VEC call.
 *
 * @param count No. of particles to create
 * @param offset The position of the particle offset from (0,0,0).
 * @param spacing Particle spacing.
 * @param h The smoothing length of the particles in units of the inter-particle
 *separation.
 * @param partId The running counter of IDs.
 */
James Willis's avatar
James Willis committed
57 58
struct part *make_particles(size_t count, double *offset, double spacing,
                            double h, long long *partId) {
Matthieu Schaller's avatar
Matthieu Schaller committed
59

James Willis's avatar
James Willis committed
60 61 62 63 64
  struct part *particles;
  if (posix_memalign((void **)&particles, part_align,
                     count * sizeof(struct part)) != 0) {
    error("couldn't allocate particles, no. of particles: %d", (int)count);
  }
James Willis's avatar
James Willis committed
65
  bzero(particles, count * sizeof(struct part));
Matthieu Schaller's avatar
Matthieu Schaller committed
66

James Willis's avatar
James Willis committed
67 68
  /* Construct the particles */
  struct part *p;
69 70 71 72 73 74 75 76 77 78 79

  /* Set test particle at centre of unit sphere. */
  p = &particles[0];

  /* Place the test particle at the centre of a unit sphere. */
  p->x[0] = 0.0f;
  p->x[1] = 0.0f;
  p->x[2] = 0.0f;

  p->h = h;
  p->id = ++(*partId);
80

81
#if !defined(GIZMO_MFV_SPH) && !defined(SHADOWFAX_SPH)
82
  p->mass = 1.0f;
83
#endif
84 85 86 87

  /* Place rest of particles around the test particle
   * with random position within a unit sphere. */
  for (size_t i = 1; i < count; ++i) {
James Willis's avatar
James Willis committed
88
    p = &particles[i];
89 90 91 92 93

    /* Randomise positions within a unit sphere. */
    p->x[0] = random_uniform(-1.0, 1.0);
    p->x[1] = random_uniform(-1.0, 1.0);
    p->x[2] = random_uniform(-1.0, 1.0);
James Willis's avatar
James Willis committed
94 95 96 97 98

    /* Randomise velocities. */
    p->v[0] = random_uniform(-0.05, 0.05);
    p->v[1] = random_uniform(-0.05, 0.05);
    p->v[2] = random_uniform(-0.05, 0.05);
Matthieu Schaller's avatar
Matthieu Schaller committed
99

James Willis's avatar
James Willis committed
100 101
    p->h = h;
    p->id = ++(*partId);
102
#if !defined(GIZMO_SPH) && !defined(SHADOWFAX_SPH)
James Willis's avatar
James Willis committed
103
    p->mass = 1.0f;
104
#endif
James Willis's avatar
James Willis committed
105 106 107 108 109 110 111
  }
  return particles;
}

/**
 * @brief Populates particle properties needed for the force calculation.
 */
112
void prepare_force(struct part *parts, size_t count) {
Matthieu Schaller's avatar
Matthieu Schaller committed
113

Josh Borrow's avatar
Josh Borrow committed
114 115 116
#if !defined(GIZMO_MFV_SPH) && !defined(SHADOWFAX_SPH) &&            \
    !defined(MINIMAL_SPH) && !defined(PLANETARY_SPH) &&              \
    !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) && \
Josh Borrow's avatar
Josh Borrow committed
117
    !defined(ANARCHY_PU_SPH) && !defined(SPHENIX_SPH) && !defined(PHANTOM_SPH)
James Willis's avatar
James Willis committed
118
  struct part *p;
119
  for (size_t i = 0; i < count; ++i) {
James Willis's avatar
James Willis committed
120 121
    p = &parts[i];
    p->rho = i + 1;
122
    p->force.balsara = random_uniform(0.0, 1.0);
James Willis's avatar
James Willis committed
123
    p->force.P_over_rho2 = i + 1;
124 125 126
    p->force.soundspeed = random_uniform(2.0, 3.0);
    p->force.v_sig = 0.0f;
    p->force.h_dt = 0.0f;
James Willis's avatar
James Willis committed
127
  }
128
#endif
James Willis's avatar
James Willis committed
129 130 131 132 133 134 135 136
}

/**
 * @brief Dumps all particle information to a file
 */
void dump_indv_particle_fields(char *fileName, struct part *p) {

  FILE *file = fopen(fileName, "a");
Matthieu Schaller's avatar
Matthieu Schaller committed
137 138

  fprintf(file,
James Willis's avatar
James Willis committed
139 140 141 142
          "%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
          "%8.5f "
          "%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n",
          p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->h,
143
          hydro_get_comoving_density(p),
144
#if defined(MINIMAL_SPH) || defined(PLANETARY_SPH) || \
Josh Borrow's avatar
Josh Borrow committed
145
    defined(SHADOWFAX_SPH) || defined(PHANTOM_SPH)
James Willis's avatar
James Willis committed
146
          0.f,
James Willis's avatar
James Willis committed
147
#else
James Willis's avatar
James Willis committed
148
          p->density.div_v,
149
#endif
150 151
          hydro_get_drifted_comoving_entropy(p),
          hydro_get_drifted_comoving_internal_energy(p),
152 153
          hydro_get_comoving_pressure(p), hydro_get_comoving_soundspeed(p),
          p->a_hydro[0], p->a_hydro[1], p->a_hydro[2], p->force.h_dt,
James Willis's avatar
James Willis committed
154
#if defined(GADGET2_SPH)
James Willis's avatar
James Willis committed
155
          p->force.v_sig, p->entropy_dt, 0.f
Josh Borrow's avatar
Josh Borrow committed
156
#elif defined(PHANTOM_SPH)
James Willis's avatar
James Willis committed
157
          p->force.v_sig, 0.f, p->force.u_dt
158 159
#elif defined(MINIMAL_SPH) || defined(HOPKINS_PU_SPH) ||           \
    defined(HOPKINS_PU_SPH_MONAGHAN) || defined(ANARCHY_PU_SPH) || \
Josh Borrow's avatar
Josh Borrow committed
160
    defined(SPHENIX_SPH) || defined(PHANTOM_SPH)
James Willis's avatar
James Willis committed
161
          p->force.v_sig, 0.f, p->u_dt
162
#else
James Willis's avatar
James Willis committed
163
          0.f, 0.f, 0.f
James Willis's avatar
James Willis committed
164
#endif
165
  );
166

James Willis's avatar
James Willis committed
167 168 169 170 171 172 173 174 175
  fclose(file);
}

/**
 * @brief Creates a header for the output file
 */
void write_header(char *fileName) {

  FILE *file = fopen(fileName, "w");
Matthieu Schaller's avatar
Matthieu Schaller committed
176 177
  /* Write header */
  fprintf(file,
James Willis's avatar
James Willis committed
178 179 180 181 182
          "# %4s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %13s %13s "
          "%13s %13s %13s %8s %8s\n",
          "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "h", "rho",
          "div_v", "S", "u", "P", "c", "a_x", "a_y", "a_z", "h_dt", "v_sig",
          "dS/dt", "du/dt");
James Willis's avatar
James Willis committed
183

Matthieu Schaller's avatar
Matthieu Schaller committed
184
  fclose(file);
James Willis's avatar
James Willis committed
185 186
}

187
/**
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
 * @brief Compares the vectorised result against
 * the serial result of the interaction.
 *
 * @param serial_test_part Particle that has been updated serially
 * @param serial_parts Particle array that has been interacted serially
 * @param vec_test_part Particle that has been updated using vectors
 * @param vec_parts Particle array to be interacted using vectors
 * @param count No. of particles that have been interacted
 *
 * @return Non-zero value if difference found, 0 otherwise
 */
int check_results(struct part serial_test_part, struct part *serial_parts,
                  struct part vec_test_part, struct part *vec_parts,
                  int count) {
  int result = 0;
203
  result += compare_particles(&serial_test_part, &vec_test_part, ACC_THRESHOLD);
204 205

  for (int i = 0; i < count; i++)
206
    result += compare_particles(&serial_parts[i], &vec_parts[i], ACC_THRESHOLD);
207 208 209 210 211

  return result;
}

/*
212 213
 * @brief Calls the serial and vectorised version of the non-symmetrical density
 * interaction.
James Willis's avatar
James Willis committed
214
 *
215
 * @param test_part Particle that will be updated
James Willis's avatar
James Willis committed
216 217
 * @param parts Particle array to be interacted
 * @param count No. of particles to be interacted
218 219 220
 * @param serial_inter_func Serial interaction function to be called
 * @param vec_inter_func Vectorised interaction function to be called
 * @param runs No. of times to call interactions
221
 * @param num_vec_proc No. of vectors to use to process interaction
Matthieu Schaller's avatar
Matthieu Schaller committed
222
 *
James Willis's avatar
James Willis committed
223
 */
224
void test_interactions(struct part test_part, struct part *parts, size_t count,
225
                       char *filePrefix, int runs, int num_vec_proc) {
Matthieu Schaller's avatar
Matthieu Schaller committed
226

227 228
  ticks serial_time = 0;
  ticks vec_time = 0;
James Willis's avatar
James Willis committed
229

230 231 232
  const float a = 1.f;
  const float H = 0.f;

James Willis's avatar
James Willis committed
233 234 235
  char serial_filename[200] = "";
  char vec_filename[200] = "";

Matthieu Schaller's avatar
Matthieu Schaller committed
236 237
  strcpy(serial_filename, filePrefix);
  strcpy(vec_filename, filePrefix);
James Willis's avatar
James Willis committed
238
  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
James Willis's avatar
James Willis committed
239
  sprintf(vec_filename + strlen(vec_filename), "_%d_vec.dat", num_vec_proc);
James Willis's avatar
James Willis committed
240 241

  write_header(serial_filename);
Matthieu Schaller's avatar
Matthieu Schaller committed
242
  write_header(vec_filename);
James Willis's avatar
James Willis committed
243

244 245 246
  struct part pi_serial, pi_vec;
  struct part pj_serial[count], pj_vec[count];

247 248 249 250 251 252 253 254 255
  float r2[count] __attribute__((aligned(array_align)));
  float dx[3 * count] __attribute__((aligned(array_align)));

  struct part *piq[count], *pjq[count];
  for (size_t k = 0; k < count; k++) {
    piq[k] = NULL;
    pjq[k] = NULL;
  }

256 257
  float r2q[count] __attribute__((aligned(array_align)));
  float hiq[count] __attribute__((aligned(array_align)));
258 259 260 261 262 263 264 265 266 267 268
  float dxq[count] __attribute__((aligned(array_align)));

  float dyq[count] __attribute__((aligned(array_align)));
  float dzq[count] __attribute__((aligned(array_align)));
  float mjq[count] __attribute__((aligned(array_align)));
  float vixq[count] __attribute__((aligned(array_align)));
  float viyq[count] __attribute__((aligned(array_align)));
  float vizq[count] __attribute__((aligned(array_align)));
  float vjxq[count] __attribute__((aligned(array_align)));
  float vjyq[count] __attribute__((aligned(array_align)));
  float vjzq[count] __attribute__((aligned(array_align)));
269 270

  /* Call serial interaction a set number of times. */
271
  for (int r = 0; r < runs; r++) {
272 273
    /* Reset particle to initial setup */
    pi_serial = test_part;
274
    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];
275 276

    /* Perform serial interaction */
277
    for (size_t i = 0; i < count; i++) {
278
      /* Compute the pairwise distance. */
279 280 281 282 283
      r2[i] = 0.0f;
      for (int k = 0; k < 3; k++) {
        int ind = (3 * i) + k;
        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
        r2[i] += dx[ind] * dx[ind];
284
      }
285
    }
286

287 288 289 290 291 292 293
    const ticks tic = getticks();
/* Perform serial interaction */
#ifdef __ICC
#pragma novector
#endif
    for (size_t i = 0; i < count; i++) {
      IACT(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial,
294
           &pj_serial[i], a, H);
295
    }
296
    serial_time += getticks() - tic;
Matthieu Schaller's avatar
Matthieu Schaller committed
297
  }
James Willis's avatar
James Willis committed
298 299

  /* Dump result of serial interaction. */
300 301 302 303 304
  dump_indv_particle_fields(serial_filename, &pi_serial);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(serial_filename, &pj_serial[i]);

  /* Call vector interaction a set number of times. */
305
  for (int r = 0; r < runs; r++) {
306 307
    /* Reset particle to initial setup */
    pi_vec = test_part;
308
    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];
309 310

    /* Setup arrays for vector interaction. */
311
    for (size_t i = 0; i < count; i++) {
312
      /* Compute the pairwise distance. */
313 314
      float my_r2 = 0.0f;
      float my_dx[3];
315
      for (int k = 0; k < 3; k++) {
316 317
        my_dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
        my_r2 += my_dx[k] * my_dx[k];
318 319
      }

320 321 322 323 324
      r2q[i] = my_r2;
      dxq[i] = my_dx[0];
      dyq[i] = my_dx[1];
      dzq[i] = my_dx[2];

325 326 327
      hiq[i] = pi_vec.h;
      piq[i] = &pi_vec;
      pjq[i] = &pj_vec[i];
James Willis's avatar
James Willis committed
328

329 330 331 332 333 334 335
      mjq[i] = pj_vec[i].mass;
      vixq[i] = pi_vec.v[0];
      viyq[i] = pi_vec.v[1];
      vizq[i] = pi_vec.v[2];
      vjxq[i] = pj_vec[i].v[0];
      vjyq[i] = pj_vec[i].v[1];
      vjzq[i] = pj_vec[i].v[2];
336
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
337

James Willis's avatar
James Willis committed
338
    /* Perform vector interaction. */
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec;
    vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum,
        curlvySum, curlvzSum;
    mask_t mask, mask2;

    rhoSum.v = vec_set1(0.f);
    rho_dhSum.v = vec_set1(0.f);
    wcountSum.v = vec_set1(0.f);
    wcount_dhSum.v = vec_set1(0.f);
    div_vSum.v = vec_set1(0.f);
    curlvxSum.v = vec_set1(0.f);
    curlvySum.v = vec_set1(0.f);
    curlvzSum.v = vec_set1(0.f);

    hi_vec.v = vec_load(&hiq[0]);
    vix_vec.v = vec_load(&vixq[0]);
    viy_vec.v = vec_load(&viyq[0]);
    viz_vec.v = vec_load(&vizq[0]);

    hi_inv_vec = vec_reciprocal(hi_vec);
James Willis's avatar
James Willis committed
359 360
    vec_init_mask_true(mask);
    vec_init_mask_true(mask2);
361

362 363
    const ticks vec_tic = getticks();

364 365 366 367 368 369 370 371 372 373 374
    for (size_t i = 0; i < count; i += num_vec_proc * VEC_SIZE) {

      /* Interleave two vectors for interaction. */
      if (num_vec_proc == 2) {
        runner_iact_nonsym_2_vec_density(
            &(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (hi_inv_vec), (vix_vec),
            (viy_vec), (viz_vec), &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(mjq[i]),
            &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, &div_vSum,
            &curlvxSum, &curlvySum, &curlvzSum, mask, mask2, 0);
      } else { /* Only use one vector for interaction. */

375 376 377 378 379
        vector my_r2, my_dx, my_dy, my_dz;
        my_r2.v = vec_load(&(r2q[i]));
        my_dx.v = vec_load(&(dxq[i]));
        my_dy.v = vec_load(&(dyq[i]));
        my_dz.v = vec_load(&(dzq[i]));
380 381

        runner_iact_nonsym_1_vec_density(
382 383 384 385
            &my_r2, &my_dx, &my_dy, &my_dz, (hi_inv_vec), (vix_vec), (viy_vec),
            (viz_vec), &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(mjq[i]), &rhoSum,
            &rho_dhSum, &wcountSum, &wcount_dhSum, &div_vSum, &curlvxSum,
            &curlvySum, &curlvzSum, mask);
386
      }
387 388
    }

389 390 391 392 393 394 395 396 397
    VEC_HADD(rhoSum, piq[0]->rho);
    VEC_HADD(rho_dhSum, piq[0]->density.rho_dh);
    VEC_HADD(wcountSum, piq[0]->density.wcount);
    VEC_HADD(wcount_dhSum, piq[0]->density.wcount_dh);
    VEC_HADD(div_vSum, piq[0]->density.div_v);
    VEC_HADD(curlvxSum, piq[0]->density.rot_v[0]);
    VEC_HADD(curlvySum, piq[0]->density.rot_v[1]);
    VEC_HADD(curlvzSum, piq[0]->density.rot_v[2]);

398 399
    vec_time += getticks() - vec_tic;
  }
Matthieu Schaller's avatar
Matthieu Schaller committed
400

401
  /* Dump result of serial interaction. */
Matthieu Schaller's avatar
Matthieu Schaller committed
402
  dump_indv_particle_fields(vec_filename, piq[0]);
403
  for (size_t i = 0; i < count; i++)
Matthieu Schaller's avatar
Matthieu Schaller committed
404
    dump_indv_particle_fields(vec_filename, pjq[i]);
James Willis's avatar
James Willis committed
405

406 407 408 409 410 411 412
  /* Check serial results against the vectorised results. */
  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
    message("Differences found...");

  message("The serial interactions took     : %15lli ticks.",
          serial_time / runs);
  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
413
  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
James Willis's avatar
James Willis committed
414 415
}

James Willis's avatar
James Willis committed
416
/*
James Willis's avatar
James Willis committed
417
 * @brief Calls the serial and vectorised version of the non-symmetrical force
James Willis's avatar
James Willis committed
418 419 420 421 422 423 424 425 426 427
 * interaction.
 *
 * @param test_part Particle that will be updated
 * @param parts Particle array to be interacted
 * @param count No. of particles to be interacted
 * @param serial_inter_func Serial interaction function to be called
 * @param vec_inter_func Vectorised interaction function to be called
 * @param runs No. of times to call interactions
 *
 */
James Willis's avatar
James Willis committed
428 429 430
void test_force_interactions(struct part test_part, struct part *parts,
                             size_t count, char *filePrefix, int runs,
                             int num_vec_proc) {
James Willis's avatar
James Willis committed
431 432 433 434 435 436 437 438

  ticks serial_time = 0;
  ticks vec_time = 0;

  FILE *file;
  char serial_filename[200] = "";
  char vec_filename[200] = "";

439 440 441
  const float a = 1.f;
  const float H = 0.f;

James Willis's avatar
James Willis committed
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
  strcpy(serial_filename, filePrefix);
  strcpy(vec_filename, filePrefix);
  sprintf(serial_filename + strlen(serial_filename), "_serial.dat");
  sprintf(vec_filename + strlen(vec_filename), "_%d_vec.dat", num_vec_proc);

  write_header(serial_filename);
  write_header(vec_filename);

  struct part pi_serial, pi_vec;
  struct part pj_serial[count], pj_vec[count];

  float r2[count] __attribute__((aligned(array_align)));
  float dx[3 * count] __attribute__((aligned(array_align)));

  struct part *piq[count], *pjq[count];
  for (size_t k = 0; k < count; k++) {
    piq[k] = NULL;
    pjq[k] = NULL;
  }

  float r2q[count] __attribute__((aligned(array_align)));
  float dxq[count] __attribute__((aligned(array_align)));
  float dyq[count] __attribute__((aligned(array_align)));
  float dzq[count] __attribute__((aligned(array_align)));
James Willis's avatar
James Willis committed
466

James Willis's avatar
James Willis committed
467 468 469 470 471 472 473 474 475
  float hiq[count] __attribute__((aligned(array_align)));
  float vixq[count] __attribute__((aligned(array_align)));
  float viyq[count] __attribute__((aligned(array_align)));
  float vizq[count] __attribute__((aligned(array_align)));
  float rhoiq[count] __attribute__((aligned(array_align)));
  float grad_hiq[count] __attribute__((aligned(array_align)));
  float pOrhoi2q[count] __attribute__((aligned(array_align)));
  float balsaraiq[count] __attribute__((aligned(array_align)));
  float ciq[count] __attribute__((aligned(array_align)));
James Willis's avatar
James Willis committed
476

James Willis's avatar
James Willis committed
477 478 479 480 481 482 483 484 485 486 487 488
  float hj_invq[count] __attribute__((aligned(array_align)));
  float mjq[count] __attribute__((aligned(array_align)));
  float vjxq[count] __attribute__((aligned(array_align)));
  float vjyq[count] __attribute__((aligned(array_align)));
  float vjzq[count] __attribute__((aligned(array_align)));
  float rhojq[count] __attribute__((aligned(array_align)));
  float grad_hjq[count] __attribute__((aligned(array_align)));
  float pOrhoj2q[count] __attribute__((aligned(array_align)));
  float balsarajq[count] __attribute__((aligned(array_align)));
  float cjq[count] __attribute__((aligned(array_align)));

  /* Call serial interaction a set number of times. */
489
  for (int r = 0; r < runs; r++) {
James Willis's avatar
James Willis committed
490 491 492 493 494
    /* Reset particle to initial setup */
    pi_serial = test_part;
    for (size_t i = 0; i < count; i++) pj_serial[i] = parts[i];

    /* Only dump data on first run. */
495
    if (r == 0) {
James Willis's avatar
James Willis committed
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
      /* Dump state of particles before serial interaction. */
      dump_indv_particle_fields(serial_filename, &pi_serial);
      for (size_t i = 0; i < count; i++)
        dump_indv_particle_fields(serial_filename, &pj_serial[i]);
    }

    /* Perform serial interaction */
    for (size_t i = 0; i < count; i++) {
      /* Compute the pairwise distance. */
      r2[i] = 0.0f;
      for (int k = 0; k < 3; k++) {
        int ind = (3 * i) + k;
        dx[ind] = pi_serial.x[k] - pj_serial[i].x[k];
        r2[i] += dx[ind] * dx[ind];
      }
    }

    const ticks tic = getticks();
/* Perform serial interaction */
#ifdef __ICC
#pragma novector
#endif
    for (size_t i = 0; i < count; i++) {
James Willis's avatar
James Willis committed
519
      runner_iact_nonsym_force(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h,
520
                               &pi_serial, &pj_serial[i], a, H);
James Willis's avatar
James Willis committed
521 522 523 524 525 526 527 528 529 530 531 532 533 534
    }
    serial_time += getticks() - tic;
  }

  file = fopen(serial_filename, "a");
  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
  fclose(file);

  /* Dump result of serial interaction. */
  dump_indv_particle_fields(serial_filename, &pi_serial);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(serial_filename, &pj_serial[i]);

  /* Call vector interaction a set number of times. */
535
  for (int r = 0; r < runs; r++) {
James Willis's avatar
James Willis committed
536 537 538 539 540 541 542
    /* Reset particle to initial setup */
    pi_vec = test_part;
    for (size_t i = 0; i < count; i++) pj_vec[i] = parts[i];

    /* Setup arrays for vector interaction. */
    for (size_t i = 0; i < count; i++) {
      /* Compute the pairwise distance. */
543 544
      float my_r2 = 0.0f;
      float my_dx[3];
James Willis's avatar
James Willis committed
545
      for (int k = 0; k < 3; k++) {
546 547
        my_dx[k] = pi_vec.x[k] - pj_vec[i].x[k];
        my_r2 += my_dx[k] * my_dx[k];
James Willis's avatar
James Willis committed
548 549 550 551 552
      }

      piq[i] = &pi_vec;
      pjq[i] = &pj_vec[i];

553 554 555 556
      r2q[i] = my_r2;
      dxq[i] = my_dx[0];
      dyq[i] = my_dx[1];
      dzq[i] = my_dx[2];
James Willis's avatar
James Willis committed
557

James Willis's avatar
James Willis committed
558 559 560 561 562 563
      hiq[i] = pi_vec.h;
      vixq[i] = pi_vec.v[0];
      viyq[i] = pi_vec.v[1];
      vizq[i] = pi_vec.v[2];
      rhoiq[i] = pi_vec.rho;
      grad_hiq[i] = pi_vec.force.f;
Josh Borrow's avatar
Josh Borrow committed
564
#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) && \
Josh Borrow's avatar
Josh Borrow committed
565
    !defined(ANARCHY_PU_SPH) && !defined(SPHENIX_SPH)
James Willis's avatar
James Willis committed
566
      pOrhoi2q[i] = pi_vec.force.P_over_rho2;
567
#endif
James Willis's avatar
James Willis committed
568 569
      balsaraiq[i] = pi_vec.force.balsara;
      ciq[i] = pi_vec.force.soundspeed;
James Willis's avatar
James Willis committed
570

James Willis's avatar
James Willis committed
571 572 573 574 575 576 577
      hj_invq[i] = 1.f / pj_vec[i].h;
      mjq[i] = pj_vec[i].mass;
      vjxq[i] = pj_vec[i].v[0];
      vjyq[i] = pj_vec[i].v[1];
      vjzq[i] = pj_vec[i].v[2];
      rhojq[i] = pj_vec[i].rho;
      grad_hjq[i] = pj_vec[i].force.f;
Josh Borrow's avatar
Josh Borrow committed
578
#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) && \
Josh Borrow's avatar
Josh Borrow committed
579
    !defined(ANARCHY_PU_SPH) && !defined(SPHENIX_SPH)
James Willis's avatar
James Willis committed
580
      pOrhoj2q[i] = pj_vec[i].force.P_over_rho2;
581
#endif
James Willis's avatar
James Willis committed
582 583 584 585 586
      balsarajq[i] = pj_vec[i].force.balsara;
      cjq[i] = pj_vec[i].force.soundspeed;
    }

    /* Only dump data on first run. */
587
    if (r == 0) {
James Willis's avatar
James Willis committed
588 589 590 591 592 593
      /* Dump state of particles before vector interaction. */
      dump_indv_particle_fields(vec_filename, piq[0]);
      for (size_t i = 0; i < count; i++)
        dump_indv_particle_fields(vec_filename, pjq[i]);
    }

James Willis's avatar
James Willis committed
594 595 596 597 598
    /* Perform vector interaction. */
    vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, rhoi_vec, grad_hi_vec,
        pOrhoi2_vec, balsara_i_vec, ci_vec;
    vector a_hydro_xSum, a_hydro_ySum, a_hydro_zSum, h_dtSum, v_sigSum,
        entropy_dtSum;
James Willis's avatar
James Willis committed
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617

    a_hydro_xSum.v = vec_setzero();
    a_hydro_ySum.v = vec_setzero();
    a_hydro_zSum.v = vec_setzero();
    h_dtSum.v = vec_setzero();
    v_sigSum.v = vec_setzero();
    entropy_dtSum.v = vec_setzero();

    hi_vec.v = vec_load(&hiq[0]);
    vix_vec.v = vec_load(&vixq[0]);
    viy_vec.v = vec_load(&viyq[0]);
    viz_vec.v = vec_load(&vizq[0]);
    rhoi_vec.v = vec_load(&rhoiq[0]);
    grad_hi_vec.v = vec_load(&grad_hiq[0]);
    pOrhoi2_vec.v = vec_load(&pOrhoi2q[0]);
    balsara_i_vec.v = vec_load(&balsaraiq[0]);
    ci_vec.v = vec_load(&ciq[0]);

    hi_inv_vec = vec_reciprocal(hi_vec);
James Willis's avatar
James Willis committed
618

James Willis's avatar
James Willis committed
619 620 621
    mask_t mask, mask2;
    vec_init_mask_true(mask);
    vec_init_mask_true(mask2);
James Willis's avatar
James Willis committed
622

James Willis's avatar
James Willis committed
623 624 625 626 627
    const ticks vec_tic = getticks();

    for (size_t i = 0; i < count; i += num_vec_proc * VEC_SIZE) {

      if (num_vec_proc == 2) {
James Willis's avatar
James Willis committed
628 629 630 631 632
        runner_iact_nonsym_2_vec_force(
            &(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (vix_vec), (viy_vec),
            (viz_vec), rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec,
            ci_vec, &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(rhojq[i]),
            &(grad_hjq[i]), &(pOrhoj2q[i]), &(balsarajq[i]), &(cjq[i]),
633 634 635
            &(mjq[i]), hi_inv_vec, &(hj_invq[i]), a, H, &a_hydro_xSum,
            &a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum,
            mask, mask2, 0);
James Willis's avatar
James Willis committed
636 637
      } else { /* Only use one vector for interaction. */

638 639 640 641 642
        vector my_r2, my_dx, my_dy, my_dz, hj, hj_inv;
        my_r2.v = vec_load(&(r2q[i]));
        my_dx.v = vec_load(&(dxq[i]));
        my_dy.v = vec_load(&(dyq[i]));
        my_dz.v = vec_load(&(dzq[i]));
643 644
        hj.v = vec_load(&hj_invq[i]);
        hj_inv = vec_reciprocal(hj);
James Willis's avatar
James Willis committed
645

James Willis's avatar
James Willis committed
646
        runner_iact_nonsym_1_vec_force(
647
            &my_r2, &my_dx, &my_dy, &my_dz, vix_vec, viy_vec, viz_vec, rhoi_vec,
James Willis's avatar
James Willis committed
648 649
            grad_hi_vec, pOrhoi2_vec, balsara_i_vec, ci_vec, &(vjxq[i]),
            &(vjyq[i]), &(vjzq[i]), &(rhojq[i]), &(grad_hjq[i]), &(pOrhoj2q[i]),
650
            &(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, hj_inv, a, H,
James Willis's avatar
James Willis committed
651 652
            &a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum,
            &entropy_dtSum, mask);
James Willis's avatar
James Willis committed
653 654 655 656 657 658 659 660
      }
    }

    VEC_HADD(a_hydro_xSum, piq[0]->a_hydro[0]);
    VEC_HADD(a_hydro_ySum, piq[0]->a_hydro[1]);
    VEC_HADD(a_hydro_zSum, piq[0]->a_hydro[2]);
    VEC_HADD(h_dtSum, piq[0]->force.h_dt);
    VEC_HMAX(v_sigSum, piq[0]->force.v_sig);
Josh Borrow's avatar
Josh Borrow committed
661
#if !defined(HOPKINS_PU_SPH) && !defined(HOPKINS_PU_SPH_MONAGHAN) && \
Josh Borrow's avatar
Josh Borrow committed
662
    !defined(ANARCHY_PU_SPH) && !defined(SPHENIX_SPH)
James Willis's avatar
James Willis committed
663
    VEC_HADD(entropy_dtSum, piq[0]->entropy_dt);
664
#endif
James Willis's avatar
James Willis committed
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687

    vec_time += getticks() - vec_tic;
  }

  file = fopen(vec_filename, "a");
  fprintf(file, "\n# PARTICLES AFTER INTERACTION:\n");
  fclose(file);

  /* Dump result of serial interaction. */
  dump_indv_particle_fields(vec_filename, piq[0]);
  for (size_t i = 0; i < count; i++)
    dump_indv_particle_fields(vec_filename, pjq[i]);

  /* Check serial results against the vectorised results. */
  if (check_results(pi_serial, pj_serial, pi_vec, pj_vec, count))
    message("Differences found...");

  message("The serial interactions took     : %15lli ticks.",
          serial_time / runs);
  message("The vectorised interactions took : %15lli ticks.", vec_time / runs);
  message("Speed up: %15fx.", (double)(serial_time) / vec_time);
}

James Willis's avatar
James Willis committed
688 689
/* And go... */
int main(int argc, char *argv[]) {
690 691
  size_t runs = 10000;
  double h = 1.0, spacing = 0.5;
Matthieu Schaller's avatar
Matthieu Schaller committed
692
  double offset[3] = {0.0, 0.0, 0.0};
693
  size_t count = 256;
James Willis's avatar
James Willis committed
694 695 696 697 698

  /* Get some randomness going */
  srand(0);

  char c;
699
  while ((c = getopt(argc, argv, "h:s:n:r:")) != -1) {
James Willis's avatar
James Willis committed
700 701 702 703 704 705
    switch (c) {
      case 'h':
        sscanf(optarg, "%lf", &h);
        break;
      case 's':
        sscanf(optarg, "%lf", &spacing);
706
        break;
707
      case 'n':
708
        sscanf(optarg, "%zu", &count);
709 710 711
        break;
      case 'r':
        sscanf(optarg, "%zu", &runs);
James Willis's avatar
James Willis committed
712 713 714 715 716 717 718 719 720 721 722
        break;
      case '?':
        error("Unknown option.");
        break;
    }
  }

  if (h < 0 || spacing < 0) {
    printf(
        "\nUsage: %s [OPTIONS...]\n"
        "\nGenerates a particle array with equal particle separation."
Matthieu Schaller's avatar
Matthieu Schaller committed
723 724
        "\nThese are then interacted using runner_iact_density and "
        "runner_iact_vec_density."
James Willis's avatar
James Willis committed
725 726
        "\n\nOptions:"
        "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>"
727 728
        "\n-s SPACING=0.5     - Spacing between particles"
        "\n-n NUMBER=9        - No. of particles",
James Willis's avatar
James Willis committed
729 730 731 732
        argv[0]);
    exit(1);
  }

733 734 735 736
  /* Correct count so that VEC_SIZE of particles interact with the test
   * particle. */
  count = count - (count % VEC_SIZE) + 1;

James Willis's avatar
James Willis committed
737 738
  /* Build the infrastructure */
  static long long partId = 0;
739 740 741 742
  struct part test_particle;
  struct part *particles = make_particles(count, offset, spacing, h, &partId);

  test_particle = particles[0];
James Willis's avatar
James Willis committed
743
  /* Call the non-sym density test. */
744 745 746 747 748
  message("Testing %s interaction...", IACT_NAME);
  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs,
                    1);
  test_interactions(test_particle, &particles[1], count - 1, IACT_NAME, runs,
                    2);
James Willis's avatar
James Willis committed
749

James Willis's avatar
James Willis committed
750 751
  prepare_force(particles, count);

James Willis's avatar
James Willis committed
752 753 754 755 756
  test_force_interactions(test_particle, &particles[1], count - 1,
                          "test_nonsym_force", runs, 1);
  test_force_interactions(test_particle, &particles[1], count - 1,
                          "test_nonsym_force", runs, 2);

James Willis's avatar
James Willis committed
757 758
  return 0;
}
759 760 761

#else

762
int main(int argc, char *argv[]) { return 1; }
763 764

#endif