hydro_iact.h 33.1 KB
Newer Older
1
2
/*******************************************************************************
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
 ******************************************************************************/
20
21
#ifndef SWIFT_GADGET2_HYDRO_IACT_H
#define SWIFT_GADGET2_HYDRO_IACT_H
22
23

/**
24
 * @file Gadget2/hydro_iact.h
25
26
 * @brief SPH interaction functions following the Gadget-2 version of SPH.
 *
27
 * The interactions computed here are the ones presented in the Gadget-2 paper
28
29
 * Springel, V., MNRAS, Volume 364, Issue 4, pp. 1105-1134.
 * We use the same numerical coefficients as the Gadget-2 code. When used with
30
31
32
 * the Spline-3 kernel, the results should be equivalent to the ones obtained
 * with Gadget-2 up to the rounding errors and interactions missed by the
 * Gadget-2 tree-code neighbours search.
33
34
 */

35
36
#include "minmax.h"

37
38
39
/**
 * @brief Density loop
 */
40
41
42
__attribute__((always_inline)) INLINE static void runner_iact_density(
    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {

43
44
  float wi, wi_dx;
  float wj, wj_dx;
45
  float dv[3], curlvr[3];
46

47
  /* Get the masses. */
48
  const float mi = pi->mass;
49
50
51
52
53
54
55
56
57
58
59
60
61
  const float mj = pj->mass;

  /* Get r and r inverse. */
  const float r = sqrtf(r2);
  const float r_inv = 1.0f / r;

  /* Compute the kernel function for pi */
  const float hi_inv = 1.f / hi;
  const float ui = r * hi_inv;
  kernel_deval(ui, &wi, &wi_dx);

  /* Compute contribution to the density */
  pi->rho += mj * wi;
62
  pi->density.rho_dh -= mj * (hydro_dimension * wi + ui * wi_dx);
63

64
65
66
67
68
69
70
71
72
73
74
  /* Compute contribution to the number of neighbours */
  pi->density.wcount += wi;
  pi->density.wcount_dh -= ui * wi_dx;

  /* Compute the kernel function for pj */
  const float hj_inv = 1.f / hj;
  const float uj = r * hj_inv;
  kernel_deval(uj, &wj, &wj_dx);

  /* Compute contribution to the density */
  pj->rho += mi * wj;
75
  pj->density.rho_dh -= mi * (hydro_dimension * wj + uj * wj_dx);
76

77
78
79
  /* Compute contribution to the number of neighbours */
  pj->density.wcount += wj;
  pj->density.wcount_dh -= uj * wj_dx;
80

81
82
  const float faci = mj * wi_dx * r_inv;
  const float facj = mi * wj_dx * r_inv;
83

84
85
86
87
  /* Compute dv dot r */
  dv[0] = pi->v[0] - pj->v[0];
  dv[1] = pi->v[1] - pj->v[1];
  dv[2] = pi->v[2] - pj->v[2];
88
89
  const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2];

90
91
  pi->density.div_v -= faci * dvdr;
  pj->density.div_v -= facj * dvdr;
92
93
94
95
96
97

  /* Compute dv cross r */
  curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1];
  curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2];
  curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0];

98
99
100
  pi->density.rot_v[0] += faci * curlvr[0];
  pi->density.rot_v[1] += faci * curlvr[1];
  pi->density.rot_v[2] += faci * curlvr[2];
101

102
103
104
  pj->density.rot_v[0] += facj * curlvr[0];
  pj->density.rot_v[1] += facj * curlvr[1];
  pj->density.rot_v[2] += facj * curlvr[2];
105
106
}

107
108
109
110
111
112
/**
 * @brief Density loop (Vectorized version)
 */
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
    float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
    struct part **pj) {
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

#ifdef WITH_VECTORIZATION

  vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx;
  vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh;
  vector mi, mj;
  vector dx[3], dv[3];
  vector vi[3], vj[3];
  vector dvdr, div_vi, div_vj;
  vector curlvr[3], curl_vi[3], curl_vj[3];
  int k, j;

#if VEC_SIZE == 8
  /* Get the masses. */
  mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
                 pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
  mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
                 pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
  /* Get each velocity component. */
  for (k = 0; k < 3; k++) {
    vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
                      pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
    vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
                      pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
  }
Matthieu Schaller's avatar
Matthieu Schaller committed
138
139
  /* Get each component of particle separation.
   * (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/
140
141
142
143
144
145
146
147
148
149
150
151
  for (k = 0; k < 3; k++)
    dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
                      Dx[15 + k], Dx[18 + k], Dx[21 + k]);
#elif VEC_SIZE == 4
  mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
  mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
  for (k = 0; k < 3; k++) {
    vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
    vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
  }
  for (k = 0; k < 3; k++)
    dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
152
153
#else
  error("Unknown vector size.")
154
155
156
157
158
#endif

  /* Get the radius and inverse radius. */
  r2.v = vec_load(R2);
  ri.v = vec_rsqrt(r2.v);
Matthieu Schaller's avatar
Matthieu Schaller committed
159
160
  /*vec_rsqrt does not have the level of accuracy we need, so an extra term is
   * added below.*/
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
  ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
  r.v = r2.v * ri.v;

  hi.v = vec_load(Hi);
  hi_inv.v = vec_rcp(hi.v);
  hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
  xi.v = r.v * hi_inv.v;

  hj.v = vec_load(Hj);
  hj_inv.v = vec_rcp(hj.v);
  hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f));
  xj.v = r.v * hj_inv.v;

  /* Compute the kernel function. */
  kernel_deval_vec(&xi, &wi, &wi_dx);
  kernel_deval_vec(&xj, &wj, &wj_dx);

  /* Compute dv. */
  dv[0].v = vi[0].v - vj[0].v;
  dv[1].v = vi[1].v - vj[1].v;
  dv[2].v = vi[2].v - vj[2].v;

  /* Compute dv dot r */
  dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v);
  dvdr.v = dvdr.v * ri.v;

  /* Compute dv cross r */
  curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v;
  curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v;
  curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v;
  for (k = 0; k < 3; k++) curlvr[k].v *= ri.v;

  /* Compute density of pi. */
  rhoi.v = mj.v * wi.v;
195
  rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v);
196
197
198
199
200
201
202
  wcounti.v = wi.v;
  wcounti_dh.v = xi.v * wi_dx.v;
  div_vi.v = mj.v * dvdr.v * wi_dx.v;
  for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v;

  /* Compute density of pj. */
  rhoj.v = mi.v * wj.v;
203
  rhoj_dh.v = mi.v * (vec_set1(hydro_dimension) * wj.v + xj.v * wj_dx.v);
204
205
206
207
208
209
210
211
  wcountj.v = wj.v;
  wcountj_dh.v = xj.v * wj_dx.v;
  div_vj.v = mi.v * dvdr.v * wj_dx.v;
  for (k = 0; k < 3; k++) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v;

  /* Update particles. */
  for (k = 0; k < VEC_SIZE; k++) {
    pi[k]->rho += rhoi.f[k];
212
    pi[k]->density.rho_dh -= rhoi_dh.f[k];
213
214
    pi[k]->density.wcount += wcounti.f[k];
    pi[k]->density.wcount_dh -= wcounti_dh.f[k];
215
    pi[k]->density.div_v -= div_vi.f[k];
216
217
    for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k];
    pj[k]->rho += rhoj.f[k];
218
    pj[k]->density.rho_dh -= rhoj_dh.f[k];
219
220
    pj[k]->density.wcount += wcountj.f[k];
    pj[k]->density.wcount_dh -= wcountj_dh.f[k];
221
    pj[k]->density.div_v -= div_vj.f[k];
222
223
224
225
226
    for (j = 0; j < 3; j++) pj[k]->density.rot_v[j] += curl_vj[j].f[k];
  }

#else

Matthieu Schaller's avatar
Matthieu Schaller committed
227
228
229
  error(
      "The Gadget2 serial version of runner_iact_density was called when the "
      "vectorised version should have been used.")
230
231

#endif
232
233
}

234
235
236
/**
 * @brief Density loop (non-symmetric version)
 */
237
238
239
240
241
242
243
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {

  float wi, wi_dx;
  float dv[3], curlvr[3];

  /* Get the masses. */
244
  const float mj = pj->mass;
245
246

  /* Get r and r inverse. */
247
248
  const float r = sqrtf(r2);
  const float ri = 1.0f / r;
249

250
  /* Compute the kernel function */
251
252
253
  const float hi_inv = 1.0f / hi;
  const float ui = r * hi_inv;
  kernel_deval(ui, &wi, &wi_dx);
254
255
256

  /* Compute contribution to the density */
  pi->rho += mj * wi;
257
  pi->density.rho_dh -= mj * (hydro_dimension * wi + ui * wi_dx);
258
259
260

  /* Compute contribution to the number of neighbours */
  pi->density.wcount += wi;
261
  pi->density.wcount_dh -= ui * wi_dx;
262

263
  const float fac = mj * wi_dx * ri;
264

265
266
267
268
269
  /* Compute dv dot r */
  dv[0] = pi->v[0] - pj->v[0];
  dv[1] = pi->v[1] - pj->v[1];
  dv[2] = pi->v[2] - pj->v[2];
  const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2];
270
  pi->density.div_v -= fac * dvdr;
271

272
273
274
275
276
  /* Compute dv cross r */
  curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1];
  curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2];
  curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0];

277
278
279
  pi->density.rot_v[0] += fac * curlvr[0];
  pi->density.rot_v[1] += fac * curlvr[1];
  pi->density.rot_v[2] += fac * curlvr[2];
280
281
}

282
283
284
285
286
287
/**
 * @brief Density loop (non-symmetric vectorized version)
 */
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
                               struct part **pi, struct part **pj) {
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310

#ifdef WITH_VECTORIZATION

  vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx;
  vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi;
  vector mj;
  vector dx[3], dv[3];
  vector vi[3], vj[3];
  vector dvdr;
  vector curlvr[3], curl_vi[3];
  int k, j;

#if VEC_SIZE == 8
  /* Get the masses. */
  mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
                 pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
  /* Get each velocity component. */
  for (k = 0; k < 3; k++) {
    vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
                      pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
    vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
                      pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
  }
Matthieu Schaller's avatar
Matthieu Schaller committed
311
312
  /* Get each component of particle separation.
   * (Dx={dx1,dy1,dz1,dx2,dy2,dz2,...,dxn,dyn,dzn})*/
313
314
315
316
317
318
319
320
321
322
323
  for (k = 0; k < 3; k++)
    dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
                      Dx[15 + k], Dx[18 + k], Dx[21 + k]);
#elif VEC_SIZE == 4
  mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
  for (k = 0; k < 3; k++) {
    vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
    vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
  }
  for (k = 0; k < 3; k++)
    dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
324
325
#else
  error("Unknown vector size.")
326
327
328
329
330
#endif

  /* Get the radius and inverse radius. */
  r2.v = vec_load(R2);
  ri.v = vec_rsqrt(r2.v);
Matthieu Schaller's avatar
Matthieu Schaller committed
331
332
  /*vec_rsqrt does not have the level of accuracy we need, so an extra term is
   * added below.*/
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
  ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
  r.v = r2.v * ri.v;

  hi.v = vec_load(Hi);
  hi_inv.v = vec_rcp(hi.v);
  hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f));
  xi.v = r.v * hi_inv.v;

  kernel_deval_vec(&xi, &wi, &wi_dx);

  /* Compute dv. */
  dv[0].v = vi[0].v - vj[0].v;
  dv[1].v = vi[1].v - vj[1].v;
  dv[2].v = vi[2].v - vj[2].v;

  /* Compute dv dot r */
  dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v);
  dvdr.v = dvdr.v * ri.v;

  /* Compute dv cross r */
  curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v;
  curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v;
  curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v;
  for (k = 0; k < 3; k++) curlvr[k].v *= ri.v;

  /* Compute density of pi. */
  rhoi.v = mj.v * wi.v;
360
  rhoi_dh.v = mj.v * (vec_set1(hydro_dimension) * wi.v + xi.v * wi_dx.v);
361
362
363
364
365
366
367
368
  wcounti.v = wi.v;
  wcounti_dh.v = xi.v * wi_dx.v;
  div_vi.v = mj.v * dvdr.v * wi_dx.v;
  for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v;

  /* Update particles. */
  for (k = 0; k < VEC_SIZE; k++) {
    pi[k]->rho += rhoi.f[k];
369
    pi[k]->density.rho_dh -= rhoi_dh.f[k];
370
371
    pi[k]->density.wcount += wcounti.f[k];
    pi[k]->density.wcount_dh -= wcounti_dh.f[k];
372
    pi[k]->density.div_v -= div_vi.f[k];
373
374
375
376
377
    for (j = 0; j < 3; j++) pi[k]->density.rot_v[j] += curl_vi[j].f[k];
  }

#else

Matthieu Schaller's avatar
Matthieu Schaller committed
378
379
380
  error(
      "The Gadget2 serial version of runner_iact_nonsym_density was called "
      "when the vectorised version should have been used.")
381
382

#endif
383
384
}

385
386
387
/**
 * @brief Force loop
 */
388
389
390
__attribute__((always_inline)) INLINE static void runner_iact_force(
    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {

391
392
393
  float wi, wj, wi_dx, wj_dx;

  const float fac_mu = 1.f; /* Will change with cosmological integration */
394

395
396
397
398
  const float r = sqrtf(r2);
  const float r_inv = 1.0f / r;

  /* Get some values in local variables. */
399
  const float mi = pi->mass;
400
401
402
403
404
405
  const float mj = pj->mass;
  const float rhoi = pi->rho;
  const float rhoj = pj->rho;

  /* Get the kernel for hi. */
  const float hi_inv = 1.0f / hi;
406
  const float hid_inv = pow_dimension_plus_one(hi_inv); /* 1/h^(d+1) */
407
408
  const float ui = r * hi_inv;
  kernel_deval(ui, &wi, &wi_dx);
409
  const float wi_dr = hid_inv * wi_dx;
410
411
412

  /* Get the kernel for hj. */
  const float hj_inv = 1.0f / hj;
413
  const float hjd_inv = pow_dimension_plus_one(hj_inv); /* 1/h^(d+1) */
414
415
  const float xj = r * hj_inv;
  kernel_deval(xj, &wj, &wj_dx);
416
  const float wj_dr = hjd_inv * wj_dx;
417

418
419
420
421
422
  /* Compute h-gradient terms */
  const float f_i = pi->force.f;
  const float f_j = pj->force.f;

  /* Compute pressure terms */
423
424
  const float P_over_rho2_i = pi->force.P_over_rho2;
  const float P_over_rho2_j = pj->force.P_over_rho2;
425
426

  /* Compute sound speeds */
427
428
  const float ci = pi->force.soundspeed;
  const float cj = pj->force.soundspeed;
429

430
  /* Compute dv dot r. */
431
432
433
  const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] +
                     (pi->v[1] - pj->v[1]) * dx[1] +
                     (pi->v[2] - pj->v[2]) * dx[2];
434

435
  /* Balsara term */
436
437
  const float balsara_i = pi->force.balsara;
  const float balsara_j = pj->force.balsara;
Matthieu Schaller's avatar
Matthieu Schaller committed
438

439
  /* Are the particles moving towards each others ? */
440
  const float omega_ij = (dvdr < 0.f) ? dvdr : 0.f;
441
442
443
444
445
446
447
448
449
  const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */

  /* Signal velocity */
  const float v_sig = ci + cj - 3.f * mu_ij;

  /* Now construct the full viscosity term */
  const float rho_ij = 0.5f * (rhoi + rhoj);
  const float visc = -0.25f * const_viscosity_alpha * v_sig * mu_ij *
                     (balsara_i + balsara_j) / rho_ij;
450
451

  /* Now, convolve with the kernel */
452
  const float visc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv;
453
  const float sph_term =
454
      (f_i * P_over_rho2_i * wi_dr + f_j * P_over_rho2_j * wj_dr) * r_inv;
455
456
457
458
459

  /* Eventually got the acceleration */
  const float acc = visc_term + sph_term;

  /* Use the force Luke ! */
460
461
462
  pi->a_hydro[0] -= mj * acc * dx[0];
  pi->a_hydro[1] -= mj * acc * dx[1];
  pi->a_hydro[2] -= mj * acc * dx[2];
463

464
465
466
  pj->a_hydro[0] += mi * acc * dx[0];
  pj->a_hydro[1] += mi * acc * dx[1];
  pj->a_hydro[2] += mi * acc * dx[2];
467

468
  /* Get the time derivative for h. */
469
470
  pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr;
  pj->force.h_dt -= mi * dvdr * r_inv / rhoi * wj_dr;
471

472
  /* Update the signal velocity. */
473
474
  pi->force.v_sig = (pi->force.v_sig > v_sig) ? pi->force.v_sig : v_sig;
  pj->force.v_sig = (pj->force.v_sig > v_sig) ? pj->force.v_sig : v_sig;
475

476
  /* Change in entropy */
477
478
  pi->entropy_dt += mj * visc_term * dvdr;
  pj->entropy_dt += mi * visc_term * dvdr;
479
}
480

481
482
483
484
485
486
/**
 * @brief Force loop (Vectorized version)
 */
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
    float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
    struct part **pj) {
487
488
489
490
491
492

#ifdef WITH_VECTORIZATION

  vector r, r2, ri;
  vector xi, xj;
  vector hi, hj, hi_inv, hj_inv;
493
  vector hid_inv, hjd_inv;
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
  vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
  vector piPOrho, pjPOrho, pirho, pjrho;
  vector mi, mj;
  vector f;
  vector dx[3];
  vector vi[3], vj[3];
  vector pia[3], pja[3];
  vector pih_dt, pjh_dt;
  vector ci, cj, v_sig;
  vector omega_ij, mu_ij, fac_mu, balsara;
  vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt;
  int j, k;

  fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */

Matthieu Schaller's avatar
Matthieu Schaller committed
509
/* Load stuff. */
510
511
512
513
514
#if VEC_SIZE == 8
  mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
                 pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
  mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
                 pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
Matthieu Schaller's avatar
Matthieu Schaller committed
515
516
517
518
519
520
521
522
  piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
                      pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
                      pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
                      pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
  pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
                      pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
                      pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
                      pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
523
524
525
526
  pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
                    pi[5]->rho, pi[6]->rho, pi[7]->rho);
  pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
                    pj[5]->rho, pj[6]->rho, pj[7]->rho);
Matthieu Schaller's avatar
Matthieu Schaller committed
527
528
529
530
531
532
533
534
  ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
                 pi[2]->force.soundspeed, pi[3]->force.soundspeed,
                 pi[4]->force.soundspeed, pi[5]->force.soundspeed,
                 pi[6]->force.soundspeed, pi[7]->force.soundspeed);
  cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
                 pj[2]->force.soundspeed, pj[3]->force.soundspeed,
                 pj[4]->force.soundspeed, pj[5]->force.soundspeed,
                 pj[6]->force.soundspeed, pj[7]->force.soundspeed);
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  for (k = 0; k < 3; k++) {
    vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
                      pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
    vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
                      pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
  }
  for (k = 0; k < 3; k++)
    dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
                      Dx[15 + k], Dx[18 + k], Dx[21 + k]);
  balsara.v =
      vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
              pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
              pi[6]->force.balsara, pi[7]->force.balsara) +
      vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
              pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
              pj[6]->force.balsara, pj[7]->force.balsara);
#elif VEC_SIZE == 4
552
  mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
553
  mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
554
  piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
Matthieu Schaller's avatar
Matthieu Schaller committed
555
                      pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
556
  pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
Matthieu Schaller's avatar
Matthieu Schaller committed
557
                      pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
558
559
  pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
  pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
Matthieu Schaller's avatar
Matthieu Schaller committed
560
561
562
563
  ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
                 pi[2]->force.soundspeed, pi[3]->force.soundspeed);
  cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
                 pj[2]->force.soundspeed, pj[3]->force.soundspeed);
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
  for (k = 0; k < 3; k++) {
    vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
    vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
  }
  for (k = 0; k < 3; k++)
    dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
  balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara,
                      pi[2]->force.balsara, pi[3]->force.balsara) +
              vec_set(pj[0]->force.balsara, pj[1]->force.balsara,
                      pj[2]->force.balsara, pj[3]->force.balsara);
#else
  error("Unknown vector size.")
#endif

  /* Get the radius and inverse radius. */
  r2.v = vec_load(R2);
  ri.v = vec_rsqrt(r2.v);
  ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
  r.v = r2.v * ri.v;

  /* Get the kernel for hi. */
  hi.v = vec_load(Hi);
  hi_inv.v = vec_rcp(hi.v);
  hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f));
588
  hid_inv = pow_dimension_plus_one_vec(hi_inv); /* 1/h^(d+1) */
589
590
  xi.v = r.v * hi_inv.v;
  kernel_deval_vec(&xi, &wi, &wi_dx);
591
  wi_dr.v = hid_inv.v * wi_dx.v;
592
593
594
595
596

  /* Get the kernel for hj. */
  hj.v = vec_load(Hj);
  hj_inv.v = vec_rcp(hj.v);
  hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f));
597
  hjd_inv = pow_dimension_plus_one_vec(hj_inv); /* 1/h^(d+1) */
598
599
  xj.v = r.v * hj_inv.v;
  kernel_deval_vec(&xj, &wj, &wj_dx);
600
  wj_dr.v = hjd_inv.v * wj_dx.v;
601
602
603
604

  /* Compute dv dot r. */
  dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
           ((vi[2].v - vj[2].v) * dx[2].v);
Matthieu Schaller's avatar
Matthieu Schaller committed
605
  // dvdr.v = dvdr.v * ri.v;
606
607
608
609
610

  /* Compute the relative velocity. (This is 0 if the particles move away from
   * each other and negative otherwise) */
  omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
  mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */
Matthieu Schaller's avatar
Matthieu Schaller committed
611

612
613
  /* Compute signal velocity */
  v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v;
Matthieu Schaller's avatar
Matthieu Schaller committed
614

615
616
  /* Now construct the full viscosity term */
  rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v);
Matthieu Schaller's avatar
Matthieu Schaller committed
617
618
  visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v *
           mu_ij.v * balsara.v / rho_ij.v;
619
620
621
622
623
624
625

  /* Now, convolve with the kernel */
  visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
  sph_term.v = (piPOrho.v * wi_dr.v + pjPOrho.v * wj_dr.v) * ri.v;

  /* Eventually get the acceleration */
  acc.v = visc_term.v + sph_term.v;
Matthieu Schaller's avatar
Matthieu Schaller committed
626

627
628
629
630
631
632
633
634
635
636
637
638
  /* Use the force, Luke! */
  for (k = 0; k < 3; k++) {
    f.v = dx[k].v * acc.v;
    pia[k].v = mj.v * f.v;
    pja[k].v = mi.v * f.v;
  }

  /* Get the time derivative for h. */
  pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v;
  pjh_dt.v = mi.v * dvdr.v * ri.v / pirho.v * wj_dr.v;

  /* Change in entropy */
639
  entropy_dt.v = visc_term.v * dvdr.v;
Matthieu Schaller's avatar
Matthieu Schaller committed
640

641
642
643
644
645
646
  /* Store the forces back on the particles. */
  for (k = 0; k < VEC_SIZE; k++) {
    for (j = 0; j < 3; j++) {
      pi[k]->a_hydro[j] -= pia[j].f[k];
      pj[k]->a_hydro[j] += pja[j].f[k];
    }
647
648
    pi[k]->force.h_dt -= pih_dt.f[k];
    pj[k]->force.h_dt -= pjh_dt.f[k];
649
650
    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
    pj[k]->force.v_sig = max(pj[k]->force.v_sig, v_sig.f[k]);
651
    pi[k]->entropy_dt += entropy_dt.f[k] * mj.f[k];
652
    pj[k]->entropy_dt += entropy_dt.f[k] * mi.f[k];
653
654
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
655
#else
656

Matthieu Schaller's avatar
Matthieu Schaller committed
657
658
659
  error(
      "The Gadget2 serial version of runner_iact_nonsym_force was called when "
      "the vectorised version should have been used.")
660
661

#endif
662
663
}

664
665
666
/**
 * @brief Force loop (non-symmetric version)
 */
667
668
669
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {

670
671
672
  float wi, wj, wi_dx, wj_dx;

  const float fac_mu = 1.f; /* Will change with cosmological integration */
673

674
675
676
677
  const float r = sqrtf(r2);
  const float r_inv = 1.0f / r;

  /* Get some values in local variables. */
678
  // const float mi = pi->mass;
679
680
681
682
683
684
  const float mj = pj->mass;
  const float rhoi = pi->rho;
  const float rhoj = pj->rho;

  /* Get the kernel for hi. */
  const float hi_inv = 1.0f / hi;
685
  const float hid_inv = pow_dimension_plus_one(hi_inv); /* 1/h^(d+1) */
686
687
  const float ui = r * hi_inv;
  kernel_deval(ui, &wi, &wi_dx);
688
  const float wi_dr = hid_inv * wi_dx;
689
690
691

  /* Get the kernel for hj. */
  const float hj_inv = 1.0f / hj;
692
  const float hjd_inv = pow_dimension_plus_one(hj_inv); /* 1/h^(d+1) */
693
694
  const float xj = r * hj_inv;
  kernel_deval(xj, &wj, &wj_dx);
695
  const float wj_dr = hjd_inv * wj_dx;
696

697
698
699
700
701
  /* Compute h-gradient terms */
  const float f_i = pi->force.f;
  const float f_j = pj->force.f;

  /* Compute pressure terms */
702
703
  const float P_over_rho2_i = pi->force.P_over_rho2;
  const float P_over_rho2_j = pj->force.P_over_rho2;
704
705

  /* Compute sound speeds */
706
707
  const float ci = pi->force.soundspeed;
  const float cj = pj->force.soundspeed;
708

709
  /* Compute dv dot r. */
710
711
712
  const float dvdr = (pi->v[0] - pj->v[0]) * dx[0] +
                     (pi->v[1] - pj->v[1]) * dx[1] +
                     (pi->v[2] - pj->v[2]) * dx[2];
713

714
  /* Balsara term */
715
716
  const float balsara_i = pi->force.balsara;
  const float balsara_j = pj->force.balsara;
717
718

  /* Are the particles moving towards each others ? */
719
  const float omega_ij = (dvdr < 0.f) ? dvdr : 0.f;
720
721
722
723
724
725
726
727
728
  const float mu_ij = fac_mu * r_inv * omega_ij; /* This is 0 or negative */

  /* Signal velocity */
  const float v_sig = ci + cj - 3.f * mu_ij;

  /* Now construct the full viscosity term */
  const float rho_ij = 0.5f * (rhoi + rhoj);
  const float visc = -0.25f * const_viscosity_alpha * v_sig * mu_ij *
                     (balsara_i + balsara_j) / rho_ij;
729
730

  /* Now, convolve with the kernel */
731
  const float visc_term = 0.5f * visc * (wi_dr + wj_dr) * r_inv;
732
  const float sph_term =
733
      (f_i * P_over_rho2_i * wi_dr + f_j * P_over_rho2_j * wj_dr) * r_inv;
734
735
736

  /* Eventually got the acceleration */
  const float acc = visc_term + sph_term;
737

738
  /* Use the force Luke ! */
739
740
741
  pi->a_hydro[0] -= mj * acc * dx[0];
  pi->a_hydro[1] -= mj * acc * dx[1];
  pi->a_hydro[2] -= mj * acc * dx[2];
742

743
  /* Get the time derivative for h. */
744
  pi->force.h_dt -= mj * dvdr * r_inv / rhoj * wi_dr;
745

746
  /* Update the signal velocity. */
747
  pi->force.v_sig = (pi->force.v_sig > v_sig) ? pi->force.v_sig : v_sig;
748

749
  /* Change in entropy */
750
  pi->entropy_dt += mj * visc_term * dvdr;
751
}
752

753
754
755
756
757
758
/**
 * @brief Force loop (Vectorized non-symmetric version)
 */
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
    float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
    struct part **pj) {
759

Matthieu Schaller's avatar
Matthieu Schaller committed
760
#ifdef WITH_VECTORIZATION
761
762
763
764

  vector r, r2, ri;
  vector xi, xj;
  vector hi, hj, hi_inv, hj_inv;
765
  vector hid_inv, hjd_inv;
766
767
768
769
770
771
772
773
  vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr;
  vector piPOrho, pjPOrho, pirho, pjrho;
  vector mj;
  vector f;
  vector dx[3];
  vector vi[3], vj[3];
  vector pia[3];
  vector pih_dt;
774
775
  vector ci, cj, v_sig;
  vector omega_ij, mu_ij, fac_mu, balsara;
776
777
778
779
780
  vector rho_ij, visc, visc_term, sph_term, acc, entropy_dt;
  int j, k;

  fac_mu.v = vec_set1(1.f); /* Will change with cosmological integration */

Matthieu Schaller's avatar
Matthieu Schaller committed
781
/* Load stuff. */
782
783
784
#if VEC_SIZE == 8
  mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
                 pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
Matthieu Schaller's avatar
Matthieu Schaller committed
785
786
787
788
789
790
791
792
  piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
                      pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2,
                      pi[4]->force.P_over_rho2, pi[5]->force.P_over_rho2,
                      pi[6]->force.P_over_rho2, pi[7]->force.P_over_rho2);
  pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
                      pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2,
                      pj[4]->force.P_over_rho2, pj[5]->force.P_over_rho2,
                      pj[6]->force.P_over_rho2, pj[7]->force.P_over_rho2);
793
794
795
796
  pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho,
                    pi[5]->rho, pi[6]->rho, pi[7]->rho);
  pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho,
                    pj[5]->rho, pj[6]->rho, pj[7]->rho);
Matthieu Schaller's avatar
Matthieu Schaller committed
797
798
799
800
801
802
803
804
  ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
                 pi[2]->force.soundspeed, pi[3]->force.soundspeed,
                 pi[4]->force.soundspeed, pi[5]->force.soundspeed,
                 pi[6]->force.soundspeed, pi[7]->force.soundspeed);
  cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
                 pj[2]->force.soundspeed, pj[3]->force.soundspeed,
                 pj[4]->force.soundspeed, pj[5]->force.soundspeed,
                 pj[6]->force.soundspeed, pj[7]->force.soundspeed);
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
  for (k = 0; k < 3; k++) {
    vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k],
                      pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]);
    vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k],
                      pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]);
  }
  for (k = 0; k < 3; k++)
    dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
                      Dx[15 + k], Dx[18 + k], Dx[21 + k]);
  balsara.v =
      vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara,
              pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara,
              pi[6]->force.balsara, pi[7]->force.balsara) +
      vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara,
              pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara,
              pj[6]->force.balsara, pj[7]->force.balsara);
#elif VEC_SIZE == 4
  mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
823
  piPOrho.v = vec_set(pi[0]->force.P_over_rho2, pi[1]->force.P_over_rho2,
Matthieu Schaller's avatar
Matthieu Schaller committed
824
                      pi[2]->force.P_over_rho2, pi[3]->force.P_over_rho2);
825
  pjPOrho.v = vec_set(pj[0]->force.P_over_rho2, pj[1]->force.P_over_rho2,
Matthieu Schaller's avatar
Matthieu Schaller committed
826
                      pj[2]->force.P_over_rho2, pj[3]->force.P_over_rho2);
827
828
  pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho);
  pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho);
Matthieu Schaller's avatar
Matthieu Schaller committed
829
830
831
832
  ci.v = vec_set(pi[0]->force.soundspeed, pi[1]->force.soundspeed,
                 pi[2]->force.soundspeed, pi[3]->force.soundspeed);
  cj.v = vec_set(pj[0]->force.soundspeed, pj[1]->force.soundspeed,
                 pj[2]->force.soundspeed, pj[3]->force.soundspeed);
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
  for (k = 0; k < 3; k++) {
    vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]);
    vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]);
  }
  for (k = 0; k < 3; k++)
    dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
  balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara,
                      pi[2]->force.balsara, pi[3]->force.balsara) +
              vec_set(pj[0]->force.balsara, pj[1]->force.balsara,
                      pj[2]->force.balsara, pj[3]->force.balsara);
#else
  error("Unknown vector size.")
#endif

  /* Get the radius and inverse radius. */
  r2.v = vec_load(R2);
  ri.v = vec_rsqrt(r2.v);
  ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f));
  r.v = r2.v * ri.v;

  /* Get the kernel for hi. */
  hi.v = vec_load(Hi);
  hi_inv.v = vec_rcp(hi.v);
  hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f));
857
  hid_inv = pow_dimension_plus_one_vec(hi_inv);
858
859
  xi.v = r.v * hi_inv.v;
  kernel_deval_vec(&xi, &wi, &wi_dx);
860
  wi_dr.v = hid_inv.v * wi_dx.v;
861
862
863
864
865

  /* Get the kernel for hj. */
  hj.v = vec_load(Hj);
  hj_inv.v = vec_rcp(hj.v);
  hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f));
866
  hjd_inv = pow_dimension_plus_one_vec(hj_inv);
867
868
  xj.v = r.v * hj_inv.v;
  kernel_deval_vec(&xj, &wj, &wj_dx);
869
  wj_dr.v = hjd_inv.v * wj_dx.v;
870
871
872
873

  /* Compute dv dot r. */
  dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) +
           ((vi[2].v - vj[2].v) * dx[2].v);
Matthieu Schaller's avatar
Matthieu Schaller committed
874
  // dvdr.v = dvdr.v * ri.v;
875
876
877
878
879

  /* Compute the relative velocity. (This is 0 if the particles move away from
   * each other and negative otherwise) */
  omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f));
  mu_ij.v = fac_mu.v * ri.v * omega_ij.v; /* This is 0 or negative */
Matthieu Schaller's avatar
Matthieu Schaller committed
880

881
882
  /* Compute signal velocity */
  v_sig.v = ci.v + cj.v - vec_set1(3.0f) * mu_ij.v;
Matthieu Schaller's avatar
Matthieu Schaller committed
883

884
885
  /* Now construct the full viscosity term */
  rho_ij.v = vec_set1(0.5f) * (pirho.v + pjrho.v);
Matthieu Schaller's avatar
Matthieu Schaller committed
886
887
  visc.v = vec_set1(-0.25f) * vec_set1(const_viscosity_alpha) * v_sig.v *
           mu_ij.v * balsara.v / rho_ij.v;
888
889
890
891
892
893
894

  /* Now, convolve with the kernel */
  visc_term.v = vec_set1(0.5f) * visc.v * (wi_dr.v + wj_dr.v) * ri.v;
  sph_term.v = (piPOrho.v * wi_dr.v + pjPOrho.v * wj_dr.v) * ri.v;

  /* Eventually get the acceleration */
  acc.v = visc_term.v + sph_term.v;
Matthieu Schaller's avatar
Matthieu Schaller committed
895

896
897
898
899
900
901
902
  /* Use the force, Luke! */
  for (k = 0; k < 3; k++) {
    f.v = dx[k].v * acc.v;
    pia[k].v = mj.v * f.v;
  }

  /* Get the time derivative for h. */
903
  pih_dt.v = mj.v * dvdr.v * ri.v / pjrho.v * wi_dr.v;
904
905

  /* Change in entropy */
906
  entropy_dt.v = mj.v * visc_term.v * dvdr.v;
Matthieu Schaller's avatar
Matthieu Schaller committed
907

908
909
910
  /* Store the forces back on the particles. */
  for (k = 0; k < VEC_SIZE; k++) {
    for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
911
    pi[k]->force.h_dt -= pih_dt.f[k];
912
    pi[k]->force.v_sig = max(pi[k]->force.v_sig, v_sig.f[k]);
913
    pi[k]->entropy_dt += entropy_dt.f[k];
914
915
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
916
#else
917

Matthieu Schaller's avatar
Matthieu Schaller committed
918
919
920
  error(
      "The Gadget2 serial version of runner_iact_nonsym_force was called when "
      "the vectorised version should have been used.")
921
922

#endif
923
924
}

925
#endif /* SWIFT_GADGET2_HYDRO_IACT_H */