cache.h 17.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2016 James Willis (jame.s.willis@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
#ifndef SWIFT_CACHE_H
#define SWIFT_CACHE_H

/* Config parameters. */
#include "../config.h"

/* Local headers */
26
#include "align.h"
27
#include "cell.h"
James Willis's avatar
James Willis committed
28
#include "error.h"
James Willis's avatar
James Willis committed
29
#include "part.h"
30
#include "sort_part.h"
James Willis's avatar
James Willis committed
31
#include "vector.h"
32

33
34
35
#define NUM_VEC_PROC 2
#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)

36
#ifdef WITH_VECTORIZATION
James Willis's avatar
James Willis committed
37
/* Cache struct to hold a local copy of a cells' particle
38
 * properties required for density/force calculations.*/
James Willis's avatar
James Willis committed
39
struct cache {
40
41

  /* Particle x position. */
42
  float *restrict x SWIFT_CACHE_ALIGN;
James Willis's avatar
James Willis committed
43

44
  /* Particle y position. */
45
  float *restrict y SWIFT_CACHE_ALIGN;
46
47

  /* Particle z position. */
48
  float *restrict z SWIFT_CACHE_ALIGN;
49
50

  /* Particle smoothing length. */
51
  float *restrict h SWIFT_CACHE_ALIGN;
52
53

  /* Particle mass. */
54
  float *restrict m SWIFT_CACHE_ALIGN;
55
56

  /* Particle x velocity. */
57
  float *restrict vx SWIFT_CACHE_ALIGN;
58
59

  /* Particle y velocity. */
60
  float *restrict vy SWIFT_CACHE_ALIGN;
61
62

  /* Particle z velocity. */
63
  float *restrict vz SWIFT_CACHE_ALIGN;
James Willis's avatar
James Willis committed
64

65
  /* Maximum index into neighbouring cell for particles that are in range. */
66
  int *restrict max_index SWIFT_CACHE_ALIGN;
67

James Willis's avatar
James Willis committed
68
  /* Particle density. */
69
  float *restrict rho SWIFT_CACHE_ALIGN;
James Willis's avatar
James Willis committed
70

James Willis's avatar
James Willis committed
71
  /* Particle smoothing length gradient. */
72
  float *restrict grad_h SWIFT_CACHE_ALIGN;
James Willis's avatar
James Willis committed
73

James Willis's avatar
James Willis committed
74
  /* Pressure over density squared. */
75
  float *restrict pOrho2 SWIFT_CACHE_ALIGN;
James Willis's avatar
James Willis committed
76

James Willis's avatar
James Willis committed
77
  /* Balsara switch. */
78
  float *restrict balsara SWIFT_CACHE_ALIGN;
James Willis's avatar
James Willis committed
79

James Willis's avatar
James Willis committed
80
  /* Particle sound speed. */
81
  float *restrict soundspeed SWIFT_CACHE_ALIGN;
82
83
84
85
86

  /* Cache size. */
  int count;
};

James Willis's avatar
James Willis committed
87
88
/* Secondary cache struct to hold a list of interactions between two
 * particles.*/
89
90
91
struct c2_cache {

  /* Separation between two particles squared. */
92
  float r2q[C2_CACHE_SIZE] SWIFT_CACHE_ALIGN;
93
94

  /* x separation between two particles. */
95
  float dxq[C2_CACHE_SIZE] SWIFT_CACHE_ALIGN;
96
97

  /* y separation between two particles. */
98
  float dyq[C2_CACHE_SIZE] SWIFT_CACHE_ALIGN;
99
100

  /* z separation between two particles. */
101
  float dzq[C2_CACHE_SIZE] SWIFT_CACHE_ALIGN;
102
103

  /* Mass of particle pj. */
104
  float mq[C2_CACHE_SIZE] SWIFT_CACHE_ALIGN;
105
106

  /* x velocity of particle pj. */
107
  float vxq[C2_CACHE_SIZE] SWIFT_CACHE_ALIGN;
James Willis's avatar
James Willis committed
108

109
  /* y velocity of particle pj. */
110
  float vyq[C2_CACHE_SIZE] SWIFT_CACHE_ALIGN;
111
112

  /* z velocity of particle pj. */
113
  float vzq[C2_CACHE_SIZE] SWIFT_CACHE_ALIGN;
114
115
};

116
117
118
119
120
121
/**
 * @brief Allocate memory and initialise cache.
 *
 * @param c The cache.
 * @param count Number of particles to allocate space for.
 */
James Willis's avatar
James Willis committed
122
123
__attribute__((always_inline)) INLINE void cache_init(struct cache *c,
                                                      size_t count) {
124

James Willis's avatar
James Willis committed
125
126
  /* Align cache on correct byte boundary and pad cache size to be a multiple of
   * the vector size
127
   * and include 2 vector lengths for remainder operations. */
128
  size_t pad = 2 * VEC_SIZE, rem = count % VEC_SIZE;
129
  if (rem > 0) pad += VEC_SIZE - rem;
130
131
  size_t sizeBytes = (count + pad) * sizeof(float);
  size_t sizeIntBytes = (count + pad) * sizeof(int);
132
133
  int error = 0;

134
135
136
137
138
139
140
141
142
143
  /* Free memory if cache has already been allocated. */
  if (c->count > 0) {
    free(c->x);
    free(c->y);
    free(c->z);
    free(c->m);
    free(c->vx);
    free(c->vy);
    free(c->vz);
    free(c->h);
144
    free(c->max_index);
145
146
147
148
149
    free(c->rho);
    free(c->grad_h);
    free(c->pOrho2);
    free(c->balsara);
    free(c->soundspeed);
150
151
  }

152
153
154
155
156
157
158
159
  error += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error += posix_memalign((void **)&c->vx, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error += posix_memalign((void **)&c->vy, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error += posix_memalign((void **)&c->vz, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error += posix_memalign((void **)&c->h, SWIFT_CACHE_ALIGNMENT, sizeBytes);
James Willis's avatar
James Willis committed
160
161
  error += posix_memalign((void **)&c->max_index, SWIFT_CACHE_ALIGNMENT,
                          sizeIntBytes);
162
  error += posix_memalign((void **)&c->rho, SWIFT_CACHE_ALIGNMENT, sizeBytes);
James Willis's avatar
James Willis committed
163
164
165
166
167
168
169
170
  error +=
      posix_memalign((void **)&c->grad_h, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error +=
      posix_memalign((void **)&c->pOrho2, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error +=
      posix_memalign((void **)&c->balsara, SWIFT_CACHE_ALIGNMENT, sizeBytes);
  error +=
      posix_memalign((void **)&c->soundspeed, SWIFT_CACHE_ALIGNMENT, sizeBytes);
James Willis's avatar
James Willis committed
171

James Willis's avatar
James Willis committed
172
173
  if (error != 0)
    error("Couldn't allocate cache, no. of particles: %d", (int)count);
174
175
176
177
178
179
180
181
182
  c->count = count;
}

/**
 * @brief Populate cache by reading in the particles in unsorted order.
 *
 * @param ci The #cell.
 * @param ci_cache The cache.
 */
James Willis's avatar
James Willis committed
183
__attribute__((always_inline)) INLINE void cache_read_particles(
James Willis's avatar
James Willis committed
184
185
    const struct cell *restrict const ci,
    struct cache *restrict const ci_cache) {
186

187
188
#if defined(GADGET2_SPH)

James Willis's avatar
James Willis committed
189
190
  /* Let the compiler know that the data is aligned and create pointers to the
   * arrays inside the cache. */
James Willis's avatar
James Willis committed
191
192
193
194
195
196
197
198
  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, h, ci_cache->h, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, m, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vx, ci_cache->vx, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
James Willis's avatar
James Willis committed
199
200
201
202
203
204
205
206
207

  const struct part *restrict parts = ci->parts;
  double loc[3];
  loc[0] = ci->loc[0];
  loc[1] = ci->loc[1];
  loc[2] = ci->loc[2];

  /* Shift the particles positions to a local frame so single precision can be
   * used instead of double precision. */
James Willis's avatar
James Willis committed
208
  for (int i = 0; i < ci->count; i++) {
209
210
211
212
213
214
215
216
217
    x[i] = (float)(parts[i].x[0] - loc[0]);
    y[i] = (float)(parts[i].x[1] - loc[1]);
    z[i] = (float)(parts[i].x[2] - loc[2]);
    h[i] = parts[i].h;

    m[i] = parts[i].mass;
    vx[i] = parts[i].v[0];
    vy[i] = parts[i].v[1];
    vz[i] = parts[i].v[2];
James Willis's avatar
James Willis committed
218
  }
219
220

#endif
221
222
}

223
/**
James Willis's avatar
James Willis committed
224
225
 * @brief Populate cache for force interactions by reading in the particles in
 * unsorted order.
226
 *
227
228
 * @param ci The #cell.
 * @param ci_cache The cache.
229
 */
230
231
232
__attribute__((always_inline)) INLINE void cache_read_force_particles(
    const struct cell *restrict const ci,
    struct cache *restrict const ci_cache) {
233

234
#if defined(GADGET2_SPH)
235

236
237
238
239
240
241
242
243
244
245
246
  /* Let the compiler know that the data is aligned and create pointers to the
   * arrays inside the cache. */
  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, h, ci_cache->h, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, m, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vx, ci_cache->vx, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, rho, ci_cache->rho, SWIFT_CACHE_ALIGNMENT);
James Willis's avatar
James Willis committed
247
248
249
250
251
252
253
254
  swift_declare_aligned_ptr(float, grad_h, ci_cache->grad_h,
                            SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, pOrho2, ci_cache->pOrho2,
                            SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, balsara, ci_cache->balsara,
                            SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed,
                            SWIFT_CACHE_ALIGNMENT);
255
256
257
258
259
260
261
262
263

  const struct part *restrict parts = ci->parts;
  double loc[3];
  loc[0] = ci->loc[0];
  loc[1] = ci->loc[1];
  loc[2] = ci->loc[2];

  /* Shift the particles positions to a local frame so single precision can be
   * used instead of double precision. */
264
  for (int i = 0; i < ci->count; i++) {
265
266
267
268
269
270
271
272
273
    x[i] = (float)(parts[i].x[0] - loc[0]);
    y[i] = (float)(parts[i].x[1] - loc[1]);
    z[i] = (float)(parts[i].x[2] - loc[2]);
    h[i] = parts[i].h;

    m[i] = parts[i].mass;
    vx[i] = parts[i].v[0];
    vy[i] = parts[i].v[1];
    vz[i] = parts[i].v[2];
James Willis's avatar
James Willis committed
274

275
276
277
278
279
    rho[i] = parts[i].rho;
    grad_h[i] = parts[i].force.f;
    pOrho2[i] = parts[i].force.P_over_rho2;
    balsara[i] = parts[i].force.balsara;
    soundspeed[i] = parts[i].force.soundspeed;
280
  }
James Willis's avatar
James Willis committed
281

282
#endif
283
284
}

285
/**
James Willis's avatar
James Willis committed
286
287
288
 * @brief Populate caches by only reading particles that are within range of
 * each other within the adjoining cell.Also read the particles into the cache
 * in sorted order.
289
290
291
292
293
294
295
296
297
298
 *
 * @param ci The i #cell.
 * @param cj The j #cell.
 * @param ci_cache The #cache for cell ci.
 * @param cj_cache The #cache for cell cj.
 * @param sort_i The array of sorted particle indices for cell ci.
 * @param sort_j The array of sorted particle indices for cell ci.
 * @param shift The amount to shift the particle positions to account for BCs
 * @param first_pi The first particle in cell ci that is in range.
 * @param last_pj The last particle in cell cj that is in range.
James Willis's avatar
James Willis committed
299
300
 * @param num_vec_proc Number of vectors that will be used to process the
 * interaction.
301
 */
302
__attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
303
    const struct cell *restrict const ci, const struct cell *restrict const cj,
James Willis's avatar
James Willis committed
304
305
306
307
    struct cache *restrict const ci_cache,
    struct cache *restrict const cj_cache, const struct entry *restrict sort_i,
    const struct entry *restrict sort_j, const double *restrict const shift,
    int *first_pi, int *last_pj, const int num_vec_proc) {
308

309
  int idx;
310
  /* Pad number of particles read to the vector size. */
311
  int rem = (ci->count - *first_pi) % (num_vec_proc * VEC_SIZE);
312
313
  if (rem != 0) {
    int pad = (num_vec_proc * VEC_SIZE) - rem;
James Willis's avatar
James Willis committed
314

315
    if (*first_pi - pad >= 0) *first_pi -= pad;
316
317
  }

318
  rem = *last_pj % (num_vec_proc * VEC_SIZE);
319
320
321
  if (rem != 0) {
    int pad = (num_vec_proc * VEC_SIZE) - rem;

322
    if (*last_pj + pad < cj->count) *last_pj += pad;
323
324
  }

325
326
  int first_pi_align = *first_pi;
  int last_pj_align = *last_pj;
327
328
329
  const struct part *restrict parts_i = ci->parts;
  const struct part *restrict parts_j = cj->parts;
  double loc[3];
330
331
332
  loc[0] = cj->loc[0];
  loc[1] = cj->loc[1];
  loc[2] = cj->loc[2];
333

334
335
  /* Shift ci particles for boundary conditions and location of cell.*/
  double total_ci_shift[3];
James Willis's avatar
James Willis committed
336
337
338
  total_ci_shift[0] = loc[0] + shift[0];
  total_ci_shift[1] = loc[1] + shift[1];
  total_ci_shift[2] = loc[2] + shift[2];
339

James Willis's avatar
James Willis committed
340
341
  /* Let the compiler know that the data is aligned and create pointers to the
   * arrays inside the cache. */
James Willis's avatar
James Willis committed
342
343
344
345
346
347
348
349
  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, h, ci_cache->h, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, m, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vx, ci_cache->vx, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
350
351

  int ci_cache_count = ci->count - first_pi_align;
James Willis's avatar
James Willis committed
352
353
  /* Shift the particles positions to a local frame (ci frame) so single
   * precision
354
   * can be
James Willis's avatar
James Willis committed
355
356
   * used instead of double precision. Also shift the cell ci, particles
   * positions
357
358
359
   * due to BCs but leave cell cj. */
  for (int i = 0; i < ci_cache_count; i++) {
    idx = sort_i[i + first_pi_align].i;
360
361
362
    x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
    y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
    z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
363
364
365
366
367
368
    h[i] = parts_i[idx].h;

    m[i] = parts_i[idx].mass;
    vx[i] = parts_i[idx].v[0];
    vy[i] = parts_i[idx].v[1];
    vz[i] = parts_i[idx].v[2];
369
  }
370

371
#ifdef SWIFT_DEBUG_CHECKS
James Willis's avatar
James Willis committed
372
373
374
  const float shift_threshold_x = 4. * ci->width[0] * (1. + 2.*space_maxreldx);
  const float shift_threshold_y = 4. * ci->width[1] * (1. + 2.*space_maxreldx);
  const float shift_threshold_z = 4. * ci->width[2] * (1. + 2.*space_maxreldx);
375

376
377
  /* Make sure that particle positions have been shifted correctly. */
  for (int i = 0; i < ci_cache_count; i++) {
378
    if (x[i] > shift_threshold_x || x[i] < -shift_threshold_x)
James Willis's avatar
James Willis committed
379
      error(
380
381
          "Error: ci->loc[%lf,%lf,%lf],cj->loc[%lf,%lf,%lf] Particle %d x pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. x=%f, ci->width[0]=%f", ci->loc[0], ci->loc[1], ci->loc[2],
James Willis's avatar
James Willis committed
382
          loc[0], loc[1], loc[2], i, x[i], ci->width[0]);
383
    if (y[i] > shift_threshold_y || y[i] < -shift_threshold_y)
James Willis's avatar
James Willis committed
384
      error(
385
386
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d y pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. y=%f, ci->width[1]=%f", ci->loc[0], ci->loc[1], ci->loc[2],
James Willis's avatar
James Willis committed
387
          loc[0], loc[1], loc[2], i, y[i], ci->width[1]);
388
    if (z[i] > shift_threshold_z || z[i] < -shift_threshold_z)
James Willis's avatar
James Willis committed
389
      error(
390
391
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d z pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. z=%f, ci->width[2]=%f", ci->loc[0], ci->loc[1], ci->loc[2],
James Willis's avatar
James Willis committed
392
          loc[0], loc[1], loc[2], i, z[i], ci->width[2]);
393
394
  }
#endif
James Willis's avatar
James Willis committed
395

Matthieu Schaller's avatar
Matthieu Schaller committed
396
397
  /* Pad cache with fake particles that exist outside the cell so will not
   * interact.*/
398
  float fake_pix = 2.0f * parts_i[sort_i[ci->count - 1].i].x[0];
James Willis's avatar
James Willis committed
399
  for (int i = ci->count - first_pi_align;
400
       i < ci->count - first_pi_align + VEC_SIZE; i++) {
401
402
403
404
405
406
407
408
409
    x[i] = fake_pix;
    y[i] = 1.f;
    z[i] = 1.f;
    h[i] = 1.f;

    m[i] = 1.f;
    vx[i] = 1.f;
    vy[i] = 1.f;
    vz[i] = 1.f;
410
  }
James Willis's avatar
James Willis committed
411
412
413

  /* Let the compiler know that the data is aligned and create pointers to the
   * arrays inside the cache. */
James Willis's avatar
James Willis committed
414
415
416
417
418
419
420
421
  swift_declare_aligned_ptr(float, xj, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, yj, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, zj, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, hj, cj_cache->h, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, mj, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vxj, cj_cache->vx, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vyj, cj_cache->vy, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT);
James Willis's avatar
James Willis committed
422

423
  for (int i = 0; i <= last_pj_align; i++) {
424
    idx = sort_j[i].i;
425
426
427
428
429
430
431
432
433
    xj[i] = (float)(parts_j[idx].x[0] - loc[0]);
    yj[i] = (float)(parts_j[idx].x[1] - loc[1]);
    zj[i] = (float)(parts_j[idx].x[2] - loc[2]);
    hj[i] = parts_j[idx].h;

    mj[i] = parts_j[idx].mass;
    vxj[i] = parts_j[idx].v[0];
    vyj[i] = parts_j[idx].v[1];
    vzj[i] = parts_j[idx].v[2];
434
  }
435

436
437
438
#ifdef SWIFT_DEBUG_CHECKS
  /* Make sure that particle positions have been shifted correctly. */
  for (int i = 0; i <= last_pj_align; i++) {
439
    if (xj[i] > shift_threshold_x || xj[i] < -shift_threshold_x)
James Willis's avatar
James Willis committed
440
      error(
441
442
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d xj pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. xj=%f, ci->width[0]=%f", ci->loc[0], ci->loc[1], ci->loc[2],
James Willis's avatar
James Willis committed
443
          loc[0], loc[1], loc[2], i, xj[i], ci->width[0]);
444
    if (yj[i] > shift_threshold_y || yj[i] < -shift_threshold_y)
James Willis's avatar
James Willis committed
445
      error(
446
447
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d yj pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. yj=%f, ci->width[1]=%f", ci->loc[0], ci->loc[1], ci->loc[2],
James Willis's avatar
James Willis committed
448
          loc[0], loc[1], loc[2], i, yj[i], ci->width[1]);
449
    if (zj[i] > shift_threshold_z || zj[i] < -shift_threshold_z)
James Willis's avatar
James Willis committed
450
      error(
451
452
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d zj pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. zj=%f, ci->width[2]=%f", ci->loc[0], ci->loc[1], ci->loc[2],
James Willis's avatar
James Willis committed
453
          loc[0], loc[1], loc[2], i, zj[i], ci->width[2]);
454
455
456
  }
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
457
458
  /* Pad cache with fake particles that exist outside the cell so will not
   * interact.*/
459
460
  float fake_pjx = 2.0f * cj->parts[sort_j[cj->count - 1].i].x[0];
  for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) {
461
462
463
464
465
466
467
468
469
    xj[i] = fake_pjx;
    yj[i] = 1.f;
    zj[i] = 1.f;
    hj[i] = 1.f;

    mj[i] = 1.f;
    vxj[i] = 1.f;
    vyj[i] = 1.f;
    vzj[i] = 1.f;
470
  }
471
}
James Willis's avatar
James Willis committed
472

473
/* @brief Clean the memory allocated by a #cache object.
474
475
476
477
478
479
480
481
482
483
484
485
486
 *
 * @param c The #cache to clean.
 */
static INLINE void cache_clean(struct cache *c) {
  if (c->count > 0) {
    free(c->x);
    free(c->y);
    free(c->z);
    free(c->m);
    free(c->vx);
    free(c->vy);
    free(c->vz);
    free(c->h);
487
    free(c->max_index);
488
489
490
  }
}

491
492
#endif /* WITH_VECTORIZATION */

493
#endif /* SWIFT_CACHE_H */