Commit 5b0080ee authored by James Willis's avatar James Willis
Browse files

Refactoring.

parent a0fe87bb
...@@ -198,11 +198,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles( ...@@ -198,11 +198,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
const struct part *restrict parts = ci->parts; const struct part *restrict parts = ci->parts;
double loc[3]; const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
loc[0] = ci->loc[0];
loc[1] = ci->loc[1];
loc[2] = ci->loc[2];
/* Shift the particles positions to a local frame so single precision can be /* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */ * used instead of double precision. */
for (int i = 0; i < ci->count; i++) { for (int i = 0; i < ci->count; i++) {
...@@ -210,7 +207,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles( ...@@ -210,7 +207,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
y[i] = (float)(parts[i].x[1] - loc[1]); y[i] = (float)(parts[i].x[1] - loc[1]);
z[i] = (float)(parts[i].x[2] - loc[2]); z[i] = (float)(parts[i].x[2] - loc[2]);
h[i] = parts[i].h; h[i] = parts[i].h;
m[i] = parts[i].mass; m[i] = parts[i].mass;
vx[i] = parts[i].v[0]; vx[i] = parts[i].v[0];
vy[i] = parts[i].v[1]; vy[i] = parts[i].v[1];
...@@ -254,10 +250,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles( ...@@ -254,10 +250,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
SWIFT_CACHE_ALIGNMENT); SWIFT_CACHE_ALIGNMENT);
const struct part *restrict parts = ci->parts; const struct part *restrict parts = ci->parts;
double loc[3]; const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
loc[0] = ci->loc[0];
loc[1] = ci->loc[1];
loc[2] = ci->loc[2];
/* Shift the particles positions to a local frame so single precision can be /* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */ * used instead of double precision. */
...@@ -266,12 +259,10 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles( ...@@ -266,12 +259,10 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
y[i] = (float)(parts[i].x[1] - loc[1]); y[i] = (float)(parts[i].x[1] - loc[1]);
z[i] = (float)(parts[i].x[2] - loc[2]); z[i] = (float)(parts[i].x[2] - loc[2]);
h[i] = parts[i].h; h[i] = parts[i].h;
m[i] = parts[i].mass; m[i] = parts[i].mass;
vx[i] = parts[i].v[0]; vx[i] = parts[i].v[0];
vy[i] = parts[i].v[1]; vy[i] = parts[i].v[1];
vz[i] = parts[i].v[2]; vz[i] = parts[i].v[2];
rho[i] = parts[i].rho; rho[i] = parts[i].rho;
grad_h[i] = parts[i].force.f; grad_h[i] = parts[i].force.f;
pOrho2[i] = parts[i].force.P_over_rho2; pOrho2[i] = parts[i].force.P_over_rho2;
...@@ -325,17 +316,13 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ...@@ -325,17 +316,13 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
const int last_pj_align = *last_pj; const int last_pj_align = *last_pj;
const struct part *restrict parts_i = ci->parts; const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts; const struct part *restrict parts_j = cj->parts;
double loc[3];
loc[0] = cj->loc[0];
loc[1] = cj->loc[1];
loc[2] = cj->loc[2];
/* Shift ci particles for boundary conditions and location of cell.*/
double total_ci_shift[3];
total_ci_shift[0] = loc[0] + shift[0];
total_ci_shift[1] = loc[1] + shift[1];
total_ci_shift[2] = loc[2] + shift[2];
/* Shift particles to the local frame and account for boundary conditions.*/
const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
cj->loc[2]};
/* Let the compiler know that the data is aligned and create pointers to the /* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */ * arrays inside the cache. */
swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
...@@ -348,6 +335,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ...@@ -348,6 +335,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
int ci_cache_count = ci->count - first_pi_align; int ci_cache_count = ci->count - first_pi_align;
/* Shift the particles positions to a local frame (ci frame) so single /* Shift the particles positions to a local frame (ci frame) so single
* precision * precision
* can be * can be
...@@ -355,12 +343,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ...@@ -355,12 +343,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
* positions * positions
* due to BCs but leave cell cj. */ * due to BCs but leave cell cj. */
for (int i = 0; i < ci_cache_count; i++) { for (int i = 0; i < ci_cache_count; i++) {
/* Make sure ci_cache is filled from the first element. */
idx = sort_i[i + first_pi_align].i; idx = sort_i[i + first_pi_align].i;
x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]); x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]); y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]); z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
h[i] = parts_i[idx].h; h[i] = parts_i[idx].h;
m[i] = parts_i[idx].mass; m[i] = parts_i[idx].mass;
vx[i] = parts_i[idx].v[0]; vx[i] = parts_i[idx].v[0];
vy[i] = parts_i[idx].v[1]; vy[i] = parts_i[idx].v[1];
...@@ -437,11 +425,10 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ...@@ -437,11 +425,10 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
for (int i = 0; i <= last_pj_align; i++) { for (int i = 0; i <= last_pj_align; i++) {
idx = sort_j[i].i; idx = sort_j[i].i;
xj[i] = (float)(parts_j[idx].x[0] - loc[0]); xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
yj[i] = (float)(parts_j[idx].x[1] - loc[1]); yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
zj[i] = (float)(parts_j[idx].x[2] - loc[2]); zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
hj[i] = parts_j[idx].h; hj[i] = parts_j[idx].h;
mj[i] = parts_j[idx].mass; mj[i] = parts_j[idx].mass;
vxj[i] = parts_j[idx].v[0]; vxj[i] = parts_j[idx].v[0];
vyj[i] = parts_j[idx].v[1]; vyj[i] = parts_j[idx].v[1];
...@@ -488,7 +475,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ...@@ -488,7 +475,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
yj[i] = pos_padded_j[1]; yj[i] = pos_padded_j[1];
zj[i] = pos_padded_j[2]; zj[i] = pos_padded_j[2];
hj[i] = 1.f; hj[i] = 1.f;
mj[i] = 1.f; mj[i] = 1.f;
vxj[i] = 1.f; vxj[i] = 1.f;
vyj[i] = 1.f; vyj[i] = 1.f;
...@@ -539,10 +525,12 @@ cache_read_two_partial_cells_sorted_force( ...@@ -539,10 +525,12 @@ cache_read_two_partial_cells_sorted_force(
const int last_pj_align = *last_pj; const int last_pj_align = *last_pj;
const struct part *restrict parts_i = ci->parts; const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts; const struct part *restrict parts_j = cj->parts;
double loc[3];
loc[0] = ci->loc[0]; /* Shift particles to the local frame and account for boundary conditions.*/
loc[1] = ci->loc[1]; const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
loc[2] = ci->loc[2]; cj->loc[2] + shift[2]};
const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
cj->loc[2]};
/* Let the compiler know that the data is aligned and create pointers to the /* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */ * arrays inside the cache. */
...@@ -573,18 +561,15 @@ cache_read_two_partial_cells_sorted_force( ...@@ -573,18 +561,15 @@ cache_read_two_partial_cells_sorted_force(
* due to BCs but leave cell cj. */ * due to BCs but leave cell cj. */
for (int i = 0; i < ci_cache_count; i++) { for (int i = 0; i < ci_cache_count; i++) {
/* Make sure ci_cache is filled from the first element. */ /* Make sure ci_cache is filled from the first element. */
idx = sort_i[i + first_pi_align].i; idx = sort_i[i + first_pi_align].i;
x[i] = (float)(parts_i[idx].x[0] - loc[0] - shift[0]); x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
y[i] = (float)(parts_i[idx].x[1] - loc[1] - shift[1]); y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
z[i] = (float)(parts_i[idx].x[2] - loc[2] - shift[2]); z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
h[i] = parts_i[idx].h; h[i] = parts_i[idx].h;
m[i] = parts_i[idx].mass; m[i] = parts_i[idx].mass;
vx[i] = parts_i[idx].v[0]; vx[i] = parts_i[idx].v[0];
vy[i] = parts_i[idx].v[1]; vy[i] = parts_i[idx].v[1];
vz[i] = parts_i[idx].v[2]; vz[i] = parts_i[idx].v[2];
rho[i] = parts_i[idx].rho; rho[i] = parts_i[idx].rho;
grad_h[i] = parts_i[idx].force.f; grad_h[i] = parts_i[idx].force.f;
pOrho2[i] = parts_i[idx].force.P_over_rho2; pOrho2[i] = parts_i[idx].force.P_over_rho2;
...@@ -606,12 +591,10 @@ cache_read_two_partial_cells_sorted_force( ...@@ -606,12 +591,10 @@ cache_read_two_partial_cells_sorted_force(
y[i] = pos_padded[1]; y[i] = pos_padded[1];
z[i] = pos_padded[2]; z[i] = pos_padded[2];
h[i] = h_padded; h[i] = h_padded;
m[i] = 1.f; m[i] = 1.f;
vx[i] = 1.f; vx[i] = 1.f;
vy[i] = 1.f; vy[i] = 1.f;
vz[i] = 1.f; vz[i] = 1.f;
rho[i] = 1.f; rho[i] = 1.f;
grad_h[i] = 1.f; grad_h[i] = 1.f;
pOrho2[i] = 1.f; pOrho2[i] = 1.f;
...@@ -641,16 +624,14 @@ cache_read_two_partial_cells_sorted_force( ...@@ -641,16 +624,14 @@ cache_read_two_partial_cells_sorted_force(
for (int i = 0; i <= last_pj_align; i++) { for (int i = 0; i <= last_pj_align; i++) {
idx = sort_j[i].i; idx = sort_j[i].i;
xj[i] = (float)(parts_j[idx].x[0] - loc[0]); xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
yj[i] = (float)(parts_j[idx].x[1] - loc[1]); yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
zj[i] = (float)(parts_j[idx].x[2] - loc[2]); zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
hj[i] = parts_j[idx].h; hj[i] = parts_j[idx].h;
mj[i] = parts_j[idx].mass; mj[i] = parts_j[idx].mass;
vxj[i] = parts_j[idx].v[0]; vxj[i] = parts_j[idx].v[0];
vyj[i] = parts_j[idx].v[1]; vyj[i] = parts_j[idx].v[1];
vzj[i] = parts_j[idx].v[2]; vzj[i] = parts_j[idx].v[2];
rhoj[i] = parts_j[idx].rho; rhoj[i] = parts_j[idx].rho;
grad_hj[i] = parts_j[idx].force.f; grad_hj[i] = parts_j[idx].force.f;
pOrho2j[i] = parts_j[idx].force.P_over_rho2; pOrho2j[i] = parts_j[idx].force.P_over_rho2;
...@@ -670,12 +651,10 @@ cache_read_two_partial_cells_sorted_force( ...@@ -670,12 +651,10 @@ cache_read_two_partial_cells_sorted_force(
yj[i] = pos_padded_j[1]; yj[i] = pos_padded_j[1];
zj[i] = pos_padded_j[2]; zj[i] = pos_padded_j[2];
hj[i] = h_padded_j; hj[i] = h_padded_j;
mj[i] = 1.f; mj[i] = 1.f;
vxj[i] = 1.f; vxj[i] = 1.f;
vyj[i] = 1.f; vyj[i] = 1.f;
vzj[i] = 1.f; vzj[i] = 1.f;
rhoj[i] = 1.f; rhoj[i] = 1.f;
grad_hj[i] = 1.f; grad_hj[i] = 1.f;
pOrho2j[i] = 1.f; pOrho2j[i] = 1.f;
...@@ -699,6 +678,11 @@ static INLINE void cache_clean(struct cache *c) { ...@@ -699,6 +678,11 @@ static INLINE void cache_clean(struct cache *c) {
free(c->vz); free(c->vz);
free(c->h); free(c->h);
free(c->max_index); free(c->max_index);
free(c->rho);
free(c->grad_h);
free(c->pOrho2);
free(c->balsara);
free(c->soundspeed);
} }
} }
......
...@@ -1072,37 +1072,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, ...@@ -1072,37 +1072,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
struct entry *restrict sort_j = cj->sort[sid]; struct entry *restrict sort_j = cj->sort[sid];
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e",
ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
ci->dx_max_sort_old);
}
for (int pjd = 0; pjd < cj->count; pjd++) {
const struct part *p = &cj->parts[sort_j[pjd].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e",
cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
cj->dx_max_sort_old);
}
/* Some constants used to checks that the parts are in the right frame */ /* Some constants used to checks that the parts are in the right frame */
const float shift_threshold_x = const float shift_threshold_x =
2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part); 2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part);
...@@ -1110,7 +1079,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, ...@@ -1110,7 +1079,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part); 2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part);
const float shift_threshold_z = const float shift_threshold_z =
2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part); 2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part);
#endif /* SWIFT_DEBUG_CHECKS */ #endif /* SWIFT_DEBUG_CHECKS */
/* Get some other useful values. */ /* Get some other useful values. */
...@@ -1514,7 +1482,6 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { ...@@ -1514,7 +1482,6 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
error("Interacting unsorted cells."); error("Interacting unsorted cells.");
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
/* Pick-out the sorted lists. */ /* Pick-out the sorted lists. */
const struct entry *restrict sort_i = ci->sort[sid]; const struct entry *restrict sort_i = ci->sort[sid];
const struct entry *restrict sort_j = cj->sort[sid]; const struct entry *restrict sort_j = cj->sort[sid];
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment