gravity_cache.h 8.96 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
#ifndef SWIFT_GRAVITY_CACHE_H
#define SWIFT_GRAVITY_CACHE_H

/* Config parameters. */
#include "../config.h"

/* Local headers */
#include "align.h"
#include "error.h"
#include "gravity.h"
#include "vector.h"

31
32
33
34
35
/**
 * @brief A SoA object for the #gpart of a cell.
 *
 * This is used to help vectorize the leaf-leaf gravity interactions.
 */
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
struct gravity_cache {

  /*! #gpart x position. */
  float *restrict x SWIFT_CACHE_ALIGN;

  /*! #gpart y position. */
  float *restrict y SWIFT_CACHE_ALIGN;

  /*! #gpart z position. */
  float *restrict z SWIFT_CACHE_ALIGN;

  /*! #gpart softening length. */
  float *restrict epsilon SWIFT_CACHE_ALIGN;

  /*! #gpart mass. */
  float *restrict m SWIFT_CACHE_ALIGN;

  /*! #gpart x acceleration. */
  float *restrict a_x SWIFT_CACHE_ALIGN;

  /*! #gpart y acceleration. */
  float *restrict a_y SWIFT_CACHE_ALIGN;

  /*! #gpart z acceleration. */
  float *restrict a_z SWIFT_CACHE_ALIGN;

62
63
64
65
66
67
  /*! Is this #gpart active ? */
  int *restrict active SWIFT_CACHE_ALIGN;

  /*! Can this #gpart use a M2P interaction ? */
  int *restrict use_mpole SWIFT_CACHE_ALIGN;

68
69
70
71
72
73
  /*! Cache size */
  int count;
};

/**
 * @brief Frees the memory allocated in a #gravity_cache
74
75
 *
 * @param c The #gravity_cache to free.
76
77
78
 */
static INLINE void gravity_cache_clean(struct gravity_cache *c) {

79
  if (c->count > 0) {
80
81
82
83
84
85
86
87
    free(c->x);
    free(c->y);
    free(c->z);
    free(c->epsilon);
    free(c->m);
    free(c->a_x);
    free(c->a_y);
    free(c->a_z);
88
89
    free(c->active);
    free(c->use_mpole);
90
91
92
93
94
  }
  c->count = 0;
}

/**
95
96
 * @brief Allocates memory for the #gpart caches used in the leaf-leaf
 * interactions.
97
98
99
100
 *
 * The cache is padded for the vector size and aligned properly
 *
 * @param c The #gravity_cache to allocate.
101
102
 * @param count The number of #gpart to allocated for (space_splitsize is a good
 * choice).
103
104
105
106
107
 */
static INLINE void gravity_cache_init(struct gravity_cache *c, int count) {

  /* Size of the gravity cache */
  const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE;
108
109
  const size_t sizeBytesF = padded_count * sizeof(float);
  const size_t sizeBytesI = padded_count * sizeof(int);
110

111
112
113
  /* Delete old stuff if any */
  gravity_cache_clean(c);

114
115
116
117
118
119
120
121
122
123
124
125
126
127
  int e = 0;
  e += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytesF);
  e += posix_memalign((void **)&c->active, SWIFT_CACHE_ALIGNMENT, sizeBytesI);
  e +=
      posix_memalign((void **)&c->use_mpole, SWIFT_CACHE_ALIGNMENT, sizeBytesI);

  if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count);
128
129
130
131

  c->count = padded_count;
}

132
133
134
135
136
137
138
139
140
141
/**
 * @brief Fills a #gravity_cache structure with some #gpart and shift them.
 *
 * @param c The #gravity_cache to fill.
 * @param gparts The #gpart array to read from.
 * @param gcount The number of particles to read.
 * @param gcount_padded The number of particle to read padded to the next
 * multiple of the vector length.
 * @param shift A shift to apply to all the particles.
 */
142
143
144
145
146
__attribute__((always_inline)) INLINE static void gravity_cache_populate(
    timebin_t max_active_bin, struct gravity_cache *c,
    const struct gpart *restrict gparts, int gcount, int gcount_padded,
    const double shift[3], const float CoM[3], float r_max2,
    float theta_crit2) {
147
148

  /* Make the compiler understand we are in happy vectorization land */
149
  swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
150
151
  swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
152
153
  swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
154
155
156
  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(int, use_mpole, c->use_mpole,
                            SWIFT_CACHE_ALIGNMENT);
157
158
159
  swift_assume_size(gcount_padded, VEC_SIZE);

  /* Fill the input caches */
160
  for (int i = 0; i < gcount; ++i) {
161
162
163
164
165
    x[i] = (float)(gparts[i].x[0] - shift[0]);
    y[i] = (float)(gparts[i].x[1] - shift[1]);
    z[i] = (float)(gparts[i].x[2] - shift[2]);
    epsilon[i] = gparts[i].epsilon;
    m[i] = gparts[i].mass;
166
167
168
169
170
171
172
173
    active[i] = (int)(gparts[i].time_bin <= max_active_bin);

    /* Check whether we can use the multipole instead of P-P */
    const float dx = x[i] - CoM[0];
    const float dy = y[i] - CoM[1];
    const float dz = z[i] - CoM[2];
    const float r2 = dx * dx + dy * dy + dz * dz;
    use_mpole[i] = gravity_M2P_accept(r_max2, theta_crit2, r2);
174
175
  }

176
177
178
179
#ifdef SWIFT_DEBUG_CHECKS
  if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif

180
  /* Pad the caches */
181
  for (int i = gcount; i < gcount_padded; ++i) {
182
    x[i] = 0.f;
183
    y[i] = 0.f;
184
185
186
    z[i] = 0.f;
    epsilon[i] = 0.f;
    m[i] = 0.f;
187
188
    active[i] = 0;
    use_mpole[i] = 0;
189
190
191
  }
}

192
/**
193
 * @brief Fills a #gravity_cache structure with some #gpart and shift them.
194
195
196
197
198
199
 *
 * @param c The #gravity_cache to fill.
 * @param gparts The #gpart array to read from.
 * @param gcount The number of particles to read.
 * @param gcount_padded The number of particle to read padded to the next
 * multiple of the vector length.
200
 * @param shift A shift to apply to all the particles.
201
 */
202
203
204
205
206
__attribute__((always_inline)) INLINE static void
gravity_cache_populate_no_mpole(timebin_t max_active_bin,
                                struct gravity_cache *c,
                                const struct gpart *restrict gparts, int gcount,
                                int gcount_padded, const double shift[3]) {
207
208

  /* Make the compiler understand we are in happy vectorization land */
209
  swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT);
210
211
  swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT);
212
213
  swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT);
214
  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
215
  swift_assume_size(gcount_padded, VEC_SIZE);
216
217

  /* Fill the input caches */
218
  for (int i = 0; i < gcount; ++i) {
219
220
221
    x[i] = (float)(gparts[i].x[0] - shift[0]);
    y[i] = (float)(gparts[i].x[1] - shift[1]);
    z[i] = (float)(gparts[i].x[2] - shift[2]);
222
223
    epsilon[i] = gparts[i].epsilon;
    m[i] = gparts[i].mass;
224
    active[i] = (int)(gparts[i].time_bin <= max_active_bin);
225
226
  }

227
228
229
230
#ifdef SWIFT_DEBUG_CHECKS
  if (gcount_padded < gcount) error("Padded counter smaller than counter");
#endif

231
  /* Pad the caches */
232
  for (int i = gcount; i < gcount_padded; ++i) {
233
    x[i] = 0.f;
234
    y[i] = 0.f;
235
236
237
    z[i] = 0.f;
    epsilon[i] = 0.f;
    m[i] = 0.f;
238
    active[i] = 0;
239
240
241
  }
}

242
243
244
245
246
247
248
249
250
/**
 * @brief Write the output cache values back to the #gpart.
 *
 * @param c The #gravity_cache to read from.
 * @param gparts The #gpart array to write to.
 * @param gcount The number of particles to write.
 */
__attribute__((always_inline)) INLINE void gravity_cache_write_back(
    const struct gravity_cache *c, struct gpart *restrict gparts, int gcount) {
251
252

  /* Make the compiler understand we are in happy vectorization land */
253
254
255
256
  swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT);
257
258

  /* Write stuff back to the particles */
259
  for (int i = 0; i < gcount; ++i) {
260
261
262
263
264
    if (active[i]) {
      gparts[i].a_grav[0] += a_x[i];
      gparts[i].a_grav[1] += a_y[i];
      gparts[i].a_grav[2] += a_z[i];
    }
265
266
267
268
  }
}

#endif /* SWIFT_GRAVITY_CACHE_H */