gravity_cache.h 18.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
#ifndef SWIFT_GRAVITY_CACHE_H
#define SWIFT_GRAVITY_CACHE_H

/* Config parameters. */
#include "../config.h"

/* Local headers */
26
#include "accumulate.h"
27
28
29
#include "align.h"
#include "error.h"
#include "gravity.h"
30
#include "multipole_accept.h"
31
32
#include "vector.h"

33
34
35
36
37
/**
 * @brief A SoA object for the #gpart of a cell.
 *
 * This is used to help vectorize the leaf-leaf gravity interactions.
 */
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
struct gravity_cache {

  /*! #gpart x position. */
  float *restrict x SWIFT_CACHE_ALIGN;

  /*! #gpart y position. */
  float *restrict y SWIFT_CACHE_ALIGN;

  /*! #gpart z position. */
  float *restrict z SWIFT_CACHE_ALIGN;

  /*! #gpart softening length. */
  float *restrict epsilon SWIFT_CACHE_ALIGN;

  /*! #gpart mass. */
  float *restrict m SWIFT_CACHE_ALIGN;

  /*! #gpart x acceleration. */
  float *restrict a_x SWIFT_CACHE_ALIGN;

  /*! #gpart y acceleration. */
  float *restrict a_y SWIFT_CACHE_ALIGN;

  /*! #gpart z acceleration. */
  float *restrict a_z SWIFT_CACHE_ALIGN;

64
65
66
  /*! #gpart potential. */
  float *restrict pot SWIFT_CACHE_ALIGN;

67
68
69
70
71
72
  /*! Is this #gpart active ? */
  int *restrict active SWIFT_CACHE_ALIGN;

  /*! Can this #gpart use a M2P interaction ? */
  int *restrict use_mpole SWIFT_CACHE_ALIGN;

73
74
75
76
77
78
  /*! Cache size */
  int count;
};

/**
 * @brief Frees the memory allocated in a #gravity_cache
79
80
 *
 * @param c The #gravity_cache to free.
81
82
83
 */
static INLINE void gravity_cache_clean(struct gravity_cache *c) {

84
  if (c->count > 0) {
85
86
87
88
89
90
91
92
93
94
95
    swift_free("gravity_cache", c->x);
    swift_free("gravity_cache", c->y);
    swift_free("gravity_cache", c->z);
    swift_free("gravity_cache", c->epsilon);
    swift_free("gravity_cache", c->m);
    swift_free("gravity_cache", c->a_x);
    swift_free("gravity_cache", c->a_y);
    swift_free("gravity_cache", c->a_z);
    swift_free("gravity_cache", c->pot);
    swift_free("gravity_cache", c->active);
    swift_free("gravity_cache", c->use_mpole);
96
97
98
99
100
  }
  c->count = 0;
}

/**
101
102
 * @brief Allocates memory for the #gpart caches used in the leaf-leaf
 * interactions.
103
104
105
106
 *
 * The cache is padded for the vector size and aligned properly
 *
 * @param c The #gravity_cache to allocate.
107
108
 * @param count The number of #gpart to allocated for (space_splitsize is a good
 * choice).
109
 */
110
111
static INLINE void gravity_cache_init(struct gravity_cache *c,
                                      const int count) {
112
113
114

  /* Size of the gravity cache */
  const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE;
115
116
  const size_t sizeBytesF = padded_count * sizeof(float);
  const size_t sizeBytesI = padded_count * sizeof(int);
117

118
119
120
  /* Delete old stuff if any */
  gravity_cache_clean(c);

121
  int e = 0;
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
  e += swift_memalign("gravity_cache", (void **)&c->x, SWIFT_CACHE_ALIGNMENT,
                      sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->y, SWIFT_CACHE_ALIGNMENT,
                      sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->z, SWIFT_CACHE_ALIGNMENT,
                      sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->epsilon,
                      SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->m, SWIFT_CACHE_ALIGNMENT,
                      sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->a_x, SWIFT_CACHE_ALIGNMENT,
                      sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->a_y, SWIFT_CACHE_ALIGNMENT,
                      sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->a_z, SWIFT_CACHE_ALIGNMENT,
                      sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->pot, SWIFT_CACHE_ALIGNMENT,
                      sizeBytesF);
  e += swift_memalign("gravity_cache", (void **)&c->active,
                      SWIFT_CACHE_ALIGNMENT, sizeBytesI);
  e += swift_memalign("gravity_cache", (void **)&c->use_mpole,
                      SWIFT_CACHE_ALIGNMENT, sizeBytesI);
144
145

  if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count);
146
147
148
149

  c->count = padded_count;
}

150
/**
Matthieu Schaller's avatar
Matthieu Schaller committed
151
 * @brief Zero all the output fields (acceleration and potential) of a
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
 * #gravity_cache.
 *
 * @param c The #gravity_cache to zero.
 * @param gcount_padded The padded size of the cache arrays.
 */
__attribute__((always_inline)) INLINE static void gravity_cache_zero_output(
    struct gravity_cache *c, const int gcount_padded) {

#ifdef SWIFT_DEBUG_CHECKS
  if (gcount_padded % VEC_SIZE != 0)
    error("Padded gcount size not a multiple of the vector length");
#endif

  /* Make the compiler understand we are in happy vectorization land */
  swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, pot, c->pot, SWIFT_CACHE_ALIGNMENT);
  swift_assume_size(gcount_padded, VEC_SIZE);

  /* Zero everything */
  bzero(a_x, gcount_padded * sizeof(float));
  bzero(a_y, gcount_padded * sizeof(float));
  bzero(a_z, gcount_padded * sizeof(float));
  bzero(pot, gcount_padded * sizeof(float));
}

179
180
181
/**
 * @brief Fills a #gravity_cache structure with some #gpart and shift them.
 *
182
183
184
185
 * Also checks whether the #gpart can use a M2P interaction instead of the
 * more expensive P2P.
 *
 * @param max_active_bin The largest active bin in the current time-step.
186
 * @param allow_mpole Are we allowing the use of multipoles?
187
 * @param periodic Are we using periodic BCs ?
188
 * @param dim The size of the simulation volume along each dimension.
189
190
191
192
193
194
 * @param c The #gravity_cache to fill.
 * @param gparts The #gpart array to read from.
 * @param gcount The number of particles to read.
 * @param gcount_padded The number of particle to read padded to the next
 * multiple of the vector length.
 * @param shift A shift to apply to all the particles.
195
196
 * @param CoM The position of the multipole.
 * @param r_max2 The square of the multipole radius.
197
 * @param cell The cell we play with (to get reasonable padding positions).
198
 * @param grav_props The global gravity properties.
199
 */
200
__attribute__((always_inline)) INLINE static void gravity_cache_populate(
201
202
203
204
205
    const timebin_t max_active_bin, const int allow_mpole, const int periodic,
    const float dim[3], struct gravity_cache *c,
    const struct gpart *restrict gparts, const int gcount,
    const int gcount_padded, const double shift[3], const float CoM[3],
    const float r_max2, const struct cell *cell,
206
    const struct gravity_props *grav_props) {
207

208
209
  const float theta_crit2 = grav_props->theta_crit2;

210
211
212
213
214
215
216
217
#ifdef SWIFT_DEBUG_CHECKS
  if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
  if (gcount_padded % VEC_SIZE != 0)
    error("Padded gravity cache size invalid. Not a multiple of SIMD length.");
  if (c->count < gcount_padded)
    error("Size of the gravity cache is not large enough.");
#endif

218
  /* Make the compiler understand we are in happy vectorization land */
219
  swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
220
221
  swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
222
223
  swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
224
225
226
  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
                            SWIFT_CACHE_ALIGNMENT);
227
228
229
  swift_assume_size(gcount_padded, VEC_SIZE);

  /* Fill the input caches */
230
  for (int i = 0; i < gcount; ++i) {
231

232
233
234
    x[i] = (float)(gparts[i].x[0] - shift[0]);
    y[i] = (float)(gparts[i].x[1] - shift[1]);
    z[i] = (float)(gparts[i].x[2] - shift[2]);
235
    epsilon[i] = gravity_get_softening(&gparts[i], grav_props);
236
237
238
239
240
241
242
243
244

    /* Make a dummy particle out of the inhibted ones */
    if (gparts[i].time_bin == time_bin_inhibited) {
      m[i] = 0.f;
      active[i] = 0;
    } else {
      m[i] = gparts[i].mass;
      active[i] = (int)(gparts[i].time_bin <= max_active_bin);
    }
245

246
    /* Distance to the CoM of the other cell. */
247
248
249
250
    float dx = x[i] - CoM[0];
    float dy = y[i] - CoM[1];
    float dz = z[i] - CoM[2];

251
    /* Apply periodic BC */
252
    if (periodic) {
253
254
255
256
      dx = nearestf(dx, dim[0]);
      dy = nearestf(dy, dim[1]);
      dz = nearestf(dz, dim[2]);
    }
257
258
259
    const float r2 = dx * dx + dy * dy + dz * dz;

    /* Check whether we can use the multipole instead of P-P */
260
    use_mpole[i] =
261
        allow_mpole && gravity_M2P_accept(r_max2, theta_crit2, r2, epsilon[i]);
262
263
  }

264
265
266
267
#ifdef SWIFT_DEBUG_CHECKS
  if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif

268
269
  /* Particles used for padding should get impossible positions
   * that have a reasonable magnitude. We use the cell width for this */
270
271
272
  const float pos_padded[3] = {-2.f * (float)cell->width[0],
                               -2.f * (float)cell->width[1],
                               -2.f * (float)cell->width[2]};
273
  const float eps_padded = epsilon[0];
274

275
  /* Pad the caches */
276
  for (int i = gcount; i < gcount_padded; ++i) {
277
278
279
    x[i] = pos_padded[0];
    y[i] = pos_padded[1];
    z[i] = pos_padded[2];
280
    epsilon[i] = eps_padded;
281
    m[i] = 0.f;
282
283
    active[i] = 0;
    use_mpole[i] = 0;
284
  }
285
286
287

  /* Zero the output as well */
  gravity_cache_zero_output(c, gcount_padded);
288
289
}

290
/**
291
 * @brief Fills a #gravity_cache structure with some #gpart and shift them.
292
 *
293
 * @param max_active_bin The largest active bin in the current time-step.
294
295
296
297
298
 * @param c The #gravity_cache to fill.
 * @param gparts The #gpart array to read from.
 * @param gcount The number of particles to read.
 * @param gcount_padded The number of particle to read padded to the next
 * multiple of the vector length.
299
 * @param shift A shift to apply to all the particles.
300
 * @param cell The cell we play with (to get reasonable padding positions).
301
 * @param grav_props The global gravity properties.
302
 */
303
__attribute__((always_inline)) INLINE static void
304
gravity_cache_populate_no_mpole(const timebin_t max_active_bin,
305
                                struct gravity_cache *c,
306
307
308
                                const struct gpart *restrict gparts,
                                const int gcount, const int gcount_padded,
                                const double shift[3], const struct cell *cell,
309
                                const struct gravity_props *grav_props) {
310

311
312
313
314
315
316
317
318
#ifdef SWIFT_DEBUG_CHECKS
  if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
  if (gcount_padded % VEC_SIZE != 0)
    error("Padded gravity cache size invalid. Not a multiple of SIMD length.");
  if (c->count < gcount_padded)
    error("Size of the gravity cache is not large enough.");
#endif

319
  /* Make the compiler understand we are in happy vectorization land */
320
  swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
321
322
  swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
323
324
  swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
325
  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
326
  swift_assume_size(gcount_padded, VEC_SIZE);
327
328

  /* Fill the input caches */
329
  for (int i = 0; i < gcount; ++i) {
330
331
332
    x[i] = (float)(gparts[i].x[0] - shift[0]);
    y[i] = (float)(gparts[i].x[1] - shift[1]);
    z[i] = (float)(gparts[i].x[2] - shift[2]);
333
    epsilon[i] = gravity_get_softening(&gparts[i], grav_props);
334
335
336
337
338
339
340
341
342

    /* Make a dummy particle out of the inhibted ones */
    if (gparts[i].time_bin == time_bin_inhibited) {
      m[i] = 0.f;
      active[i] = 0;
    } else {
      m[i] = gparts[i].mass;
      active[i] = (int)(gparts[i].time_bin <= max_active_bin);
    }
343
344
  }

345
346
347
348
#ifdef SWIFT_DEBUG_CHECKS
  if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif

349
350
  /* Particles used for padding should get impossible positions
   * that have a reasonable magnitude. We use the cell width for this */
351
352
353
  const float pos_padded[3] = {-2.f * (float)cell->width[0],
                               -2.f * (float)cell->width[1],
                               -2.f * (float)cell->width[2]};
354
355
  const float eps_padded = epsilon[0];

356
  /* Pad the caches */
357
  for (int i = gcount; i < gcount_padded; ++i) {
358
359
360
    x[i] = pos_padded[0];
    y[i] = pos_padded[1];
    z[i] = pos_padded[2];
361
    epsilon[i] = eps_padded;
362
    m[i] = 0.f;
363
    active[i] = 0;
364
  }
365
366
367

  /* Zero the output as well */
  gravity_cache_zero_output(c, gcount_padded);
368
369
}

370
371
372
373
374
/**
 * @brief Fills a #gravity_cache structure with some #gpart and make them use
 * the multi-pole.
 *
 * @param max_active_bin The largest active bin in the current time-step.
Matthieu Schaller's avatar
Matthieu Schaller committed
375
376
 * @param periodic Are we using periodic BCs ?
 * @param dim The size of the simulation volume along each dimension.
377
378
379
380
381
382
 * @param c The #gravity_cache to fill.
 * @param gparts The #gpart array to read from.
 * @param gcount The number of particles to read.
 * @param gcount_padded The number of particle to read padded to the next
 * multiple of the vector length.
 * @param cell The cell we play with (to get reasonable padding positions).
Matthieu Schaller's avatar
Matthieu Schaller committed
383
384
 * @param CoM The position of the multipole.
 * @param r_max2 The square of the multipole radius.
385
386
387
388
 * @param grav_props The global gravity properties.
 */
__attribute__((always_inline)) INLINE static void
gravity_cache_populate_all_mpole(const timebin_t max_active_bin,
389
                                 const int periodic, const float dim[3],
390
391
392
                                 struct gravity_cache *c,
                                 const struct gpart *restrict gparts,
                                 const int gcount, const int gcount_padded,
393
394
                                 const struct cell *cell, const float CoM[3],
                                 const float r_max2,
395
396
                                 const struct gravity_props *grav_props) {

397
#ifdef SWIFT_DEBUG_CHECKS
398
399
400
401
402
403
  if (gcount_padded < gcount) error("Invalid padded cache size. Too small.");
  if (gcount_padded % VEC_SIZE != 0)
    error("Padded gravity cache size invalid. Not a multiple of SIMD length.");
  if (c->count < gcount_padded)
    error("Size of the gravity cache is not large enough.");

404
  const float theta_crit2 = grav_props->theta_crit2;
405
#endif
406

407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
  /* Make the compiler understand we are in happy vectorization land */
  swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
                            SWIFT_CACHE_ALIGNMENT);
  swift_assume_size(gcount_padded, VEC_SIZE);

  /* Fill the input caches */
  for (int i = 0; i < gcount; ++i) {
    x[i] = (float)(gparts[i].x[0]);
    y[i] = (float)(gparts[i].x[1]);
    z[i] = (float)(gparts[i].x[2]);
    epsilon[i] = gravity_get_softening(&gparts[i], grav_props);
    m[i] = gparts[i].mass;
    active[i] = (int)(gparts[i].time_bin <= max_active_bin);
    use_mpole[i] = 1;
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441

#ifdef SWIFT_DEBUG_CHECKS
    /* Distance to the CoM of the other cell. */
    float dx = x[i] - CoM[0];
    float dy = y[i] - CoM[1];
    float dz = z[i] - CoM[2];

    /* Apply periodic BC */
    if (periodic) {
      dx = nearestf(dx, dim[0]);
      dy = nearestf(dy, dim[1]);
      dz = nearestf(dz, dim[2]);
    }
    const float r2 = dx * dx + dy * dy + dz * dz;

442
    if (!gravity_M2P_accept(r_max2, theta_crit2, r2, epsilon[i]))
443
444
      error("Using m-pole where the test fails");
#endif
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
  }

#ifdef SWIFT_DEBUG_CHECKS
  if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif

  /* Particles used for padding should get impossible positions
   * that have a reasonable magnitude. We use the cell width for this */
  const float pos_padded[3] = {-2.f * (float)cell->width[0],
                               -2.f * (float)cell->width[1],
                               -2.f * (float)cell->width[2]};
  const float eps_padded = epsilon[0];

  /* Pad the caches */
  for (int i = gcount; i < gcount_padded; ++i) {
    x[i] = pos_padded[0];
    y[i] = pos_padded[1];
    z[i] = pos_padded[2];
    epsilon[i] = eps_padded;
    m[i] = 0.f;
    active[i] = 0;
    use_mpole[i] = 0;
  }
468
469
470

  /* Zero the output as well */
  gravity_cache_zero_output(c, gcount_padded);
471
472
}

473
/**
474
475
476
 * @brief Write the output cache values back to the active #gpart.
 *
 * This function obviously omits the padded values in the cache.
477
478
479
480
481
 *
 * @param c The #gravity_cache to read from.
 * @param gparts The #gpart array to write to.
 * @param gcount The number of particles to write.
 */
482
__attribute__((always_inline)) INLINE static void gravity_cache_write_back(
483
484
    const struct gravity_cache *c, struct gpart *restrict gparts,
    const int gcount) {
485
486

  /* Make the compiler understand we are in happy vectorization land */
487
488
489
  swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT);
490
  swift_declare_aligned_ptr(float, pot, c->pot, SWIFT_CACHE_ALIGNMENT);
491
  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
492
493

  /* Write stuff back to the particles */
494
  for (int i = 0; i < gcount; ++i) {
495
    if (active[i]) {
496
497
498
      accumulate_add_f(&gparts[i].a_grav[0], a_x[i]);
      accumulate_add_f(&gparts[i].a_grav[1], a_y[i]);
      accumulate_add_f(&gparts[i].a_grav[2], a_z[i]);
499
      gravity_add_comoving_potential(&gparts[i], pot[i]);
500
    }
501
502
503
504
  }
}

#endif /* SWIFT_GRAVITY_CACHE_H */