diff --git a/src/cache.h b/src/cache.h index bb72149607f496dd378a848a4270fe74d3f75ad9..5f2be08ddb0dce3f055b187a0dd4e43cb713ee67 100644 --- a/src/cache.h +++ b/src/cache.h @@ -327,9 +327,15 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( const struct part *restrict parts_i = ci->parts; const struct part *restrict parts_j = cj->parts; double loc[3]; - loc[0] = ci->loc[0]; - loc[1] = ci->loc[1]; - loc[2] = ci->loc[2]; + loc[0] = cj->loc[0]; + loc[1] = cj->loc[1]; + loc[2] = cj->loc[2]; + + /* Shift ci particles for boundary conditions and location of cell.*/ + double total_ci_shift[3]; + total_ci_shift[0] = loc[0] + shift[0]; + total_ci_shift[1] = loc[1] + shift[1]; + total_ci_shift[2] = loc[2] + shift[2]; /* Let the compiler know that the data is aligned and create pointers to the * arrays inside the cache. */ @@ -351,9 +357,9 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( * due to BCs but leave cell cj. */ for (int i = 0; i < ci_cache_count; i++) { idx = sort_i[i + first_pi_align].i; - x[i] = (float)(parts_i[idx].x[0] - loc[0] - shift[0]); - y[i] = (float)(parts_i[idx].x[1] - loc[1] - shift[1]); - z[i] = (float)(parts_i[idx].x[2] - loc[2] - shift[2]); + x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]); + y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]); + z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]); h[i] = parts_i[idx].h; m[i] = parts_i[idx].mass; @@ -362,6 +368,31 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( vz[i] = parts_i[idx].v[2]; } +#ifdef SWIFT_DEBUG_CHECKS + const float shift_threshold_x = 4. * ci->width[0] * (1. + 2.*space_maxreldx); + const float shift_threshold_y = 4. * ci->width[1] * (1. + 2.*space_maxreldx); + const float shift_threshold_z = 4. * ci->width[2] * (1. + 2.*space_maxreldx); + + /* Make sure that particle positions have been shifted correctly. */ + for (int i = 0; i < ci_cache_count; i++) { + if (x[i] > shift_threshold_x || x[i] < -shift_threshold_x) + error( + "Error: ci->loc[%lf,%lf,%lf],cj->loc[%lf,%lf,%lf] Particle %d x pos is not within " + "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. x=%f, ci->width[0]=%f", ci->loc[0], ci->loc[1], ci->loc[2], + loc[0], loc[1], loc[2], i, x[i], ci->width[0]); + if (y[i] > shift_threshold_y || y[i] < -shift_threshold_y) + error( + "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d y pos is not within " + "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. y=%f, ci->width[1]=%f", ci->loc[0], ci->loc[1], ci->loc[2], + loc[0], loc[1], loc[2], i, y[i], ci->width[1]); + if (z[i] > shift_threshold_z || z[i] < -shift_threshold_z) + error( + "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d z pos is not within " + "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. z=%f, ci->width[2]=%f", ci->loc[0], ci->loc[1], ci->loc[2], + loc[0], loc[1], loc[2], i, z[i], ci->width[2]); + } +#endif + /* Pad cache with fake particles that exist outside the cell so will not * interact.*/ float fake_pix = 2.0f * parts_i[sort_i[ci->count - 1].i].x[0]; @@ -402,6 +433,27 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( vzj[i] = parts_j[idx].v[2]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure that particle positions have been shifted correctly. */ + for (int i = 0; i <= last_pj_align; i++) { + if (xj[i] > shift_threshold_x || xj[i] < -shift_threshold_x) + error( + "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d xj pos is not within " + "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. xj=%f, ci->width[0]=%f", ci->loc[0], ci->loc[1], ci->loc[2], + loc[0], loc[1], loc[2], i, xj[i], ci->width[0]); + if (yj[i] > shift_threshold_y || yj[i] < -shift_threshold_y) + error( + "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d yj pos is not within " + "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. yj=%f, ci->width[1]=%f", ci->loc[0], ci->loc[1], ci->loc[2], + loc[0], loc[1], loc[2], i, yj[i], ci->width[1]); + if (zj[i] > shift_threshold_z || zj[i] < -shift_threshold_z) + error( + "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d zj pos is not within " + "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + 2*space_maxreldx)]. zj=%f, ci->width[2]=%f", ci->loc[0], ci->loc[1], ci->loc[2], + loc[0], loc[1], loc[2], i, zj[i], ci->width[2]); + } +#endif + /* Pad cache with fake particles that exist outside the cell so will not * interact.*/ float fake_pjx = 2.0f * cj->parts[sort_j[cj->count - 1].i].x[0]; diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 421108d382c7b8108b6c44e6b7d268df041bd558..9da43dba1f72b67a2afeee3dc6620b57d5e5b01d 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -285,11 +285,10 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( temp = 0; const struct part *pi = &parts_i[sort_i[first_pi].i]; + const float first_di = sort_i[first_pi].d + pi->h * kernel_gamma + dx_max - rshift; /* Loop through particles in cell j until they are not in range of pi. */ - while (temp <= cj->count && - (sort_i[first_pi].d + (pi->h * kernel_gamma + dx_max - rshift) > - sort_j[temp].d)) + while (temp < cj->count && first_di > sort_j[temp].d) temp++; max_index_i[first_pi] = temp; @@ -298,10 +297,10 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( for (int i = first_pi + 1; i < ci->count; i++) { temp = max_index_i[i - 1]; pi = &parts_i[sort_i[i].i]; + + const float di = sort_i[i].d + pi->h * kernel_gamma + dx_max - rshift; - while (temp <= cj->count && - (sort_i[i].d + (pi->h * kernel_gamma + dx_max - rshift) > - sort_j[temp].d)) + while (temp < cj->count && di > sort_j[temp].d) temp++; max_index_i[i] = temp; @@ -333,11 +332,10 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( temp = ci->count - 1; const struct part *pj = &parts_j[sort_j[last_pj].i]; + const float last_dj = sort_j[last_pj].d - dx_max - pj->h * kernel_gamma + rshift; /* Loop through particles in cell i until they are not in range of pj. */ - while (temp > 0 && - sort_j[last_pj].d - dx_max - (pj->h * kernel_gamma) < - sort_i[temp].d - rshift) + while (temp > 0 && last_dj < sort_i[temp].d) temp--; max_index_j[last_pj] = temp; @@ -346,10 +344,9 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( for (int i = last_pj - 1; i >= 0; i--) { temp = max_index_j[i + 1]; pj = &parts_j[sort_j[i].i]; + const float dj = sort_j[i].d - dx_max - (pj->h * kernel_gamma) + rshift; - while (temp > 0 && - sort_j[i].d - dx_max - (pj->h * kernel_gamma) < - sort_i[temp].d - rshift) + while (temp > 0 && dj < sort_i[temp].d) temp--; max_index_j[i] = temp; @@ -855,19 +852,24 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); + const int active_ci = cell_is_active(ci, e); + const int active_cj = cell_is_active(cj, e); /* Check if any particles are active and return if there are not. */ int numActive = 0; - for (int pid = count_i - 1; - pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { - struct part *restrict pi = &parts_i[sort_i[pid].i]; - if (part_is_active_no_debug(pi, max_active_bin)) { - numActive++; - break; + + if (active_ci) { + for (int pid = count_i - 1; + pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { + struct part *restrict pi = &parts_i[sort_i[pid].i]; + if (part_is_active(pi, e)) { + numActive++; + break; + } } } - if (!numActive) { + if (!numActive && active_cj) { for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; pjd++) { struct part *restrict pj = &parts_j[sort_j[pjd].i]; @@ -926,7 +928,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Get the number of particles read into the ci cache. */ int ci_cache_count = count_i - first_pi_align; - if (cell_is_active(ci, e)) { + if (active_ci) { /* Loop over the parts in ci until nothing is within range in cj. */ for (int pid = count_i - 1; pid >= first_pi_loop; pid--) { @@ -1056,7 +1058,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, } /* loop over the parts in ci. */ } - if (cell_is_active(cj, e)) { + if (active_cj) { /* Loop over the parts in cj until nothing is within range in ci. */ for (int pjd = 0; pjd <= last_pj_loop; pjd++) { @@ -1068,11 +1070,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Set the cache index. */ int cj_cache_idx = pjd; - /*TODO: rshift term. */ /* Skip this particle if no particle in ci is within range of it. */ const float hj = cj_cache->h[cj_cache_idx]; - const double dj_test = - sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift; + const double dj_test = sort_j[pjd].d - hj * kernel_gamma - dx_max; if (dj_test > di_max) continue; /* Determine the exit iteration of the interaction loop. */ @@ -1188,8 +1188,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, } /* loop over the parts in cj. */ - TIMER_TOC(timer_dopair_density); } + + TIMER_TOC(timer_dopair_density); #endif /* WITH_VECTORIZATION */ } diff --git a/tests/test125cells.c b/tests/test125cells.c index 9e19d8889c4aab7c3ac5b02da9409901dc8a825a..dbb3a7c3f5b052a3585aaf04b2cd8b24ded9adba 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -272,6 +272,8 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, bzero(cell->parts, count * sizeof(struct part)); bzero(cell->xparts, count * sizeof(struct xpart)); + float h_max = 0.f; + /* Construct the parts */ struct part *part = cell->parts; struct xpart *xpart = cell->xparts; @@ -288,6 +290,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, offset[2] + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; part->h = size * h / (float)n; + h_max = fmax(h_max, part->h); #if defined(GIZMO_SPH) || defined(SHADOWFAX_SPH) part->conserved.mass = density * volume / count; @@ -333,7 +336,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, /* Cell properties */ cell->split = 0; - cell->h_max = h; + cell->h_max = h_max; cell->count = count; cell->gcount = 0; cell->dx_max_part = 0.; diff --git a/tests/testPeriodicBC.c b/tests/testPeriodicBC.c index 6fa2dc607b996b9e8508338a9806633c5a4d1a89..1afe829ce70c5d02f565e8fd002ba414abcb7388 100644 --- a/tests/testPeriodicBC.c +++ b/tests/testPeriodicBC.c @@ -290,7 +290,7 @@ void test_boundary_conditions(struct cell **cells, struct runner runner, struct cell *main_cell = cells[loc_i * (dim * dim) + loc_j * dim + loc_k]; /* Zero the fields */ - for (int j = 0; j < 512; ++j) zero_particle_fields(cells[j]); + for (int j = 0; j < dim * dim * dim; ++j) zero_particle_fields(cells[j]); /* Run all the pairs */ #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) @@ -337,7 +337,7 @@ void test_boundary_conditions(struct cell **cells, struct runner runner, /* Now perform a brute-force version for accuracy tests */ /* Zero the fields */ - for (int i = 0; i < 512; ++i) zero_particle_fields(cells[i]); + for (int i = 0; i < dim * dim * dim; ++i) zero_particle_fields(cells[i]); #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) @@ -467,11 +467,12 @@ int main(int argc, char *argv[]) { printf("\n"); /* Build the infrastructure */ + const int dim = 8; struct space space; space.periodic = 1; - space.dim[0] = 8.; - space.dim[1] = 8.; - space.dim[2] = 8.; + space.dim[0] = dim; + space.dim[1] = dim; + space.dim[2] = dim; struct hydro_props hp; hp.h_max = FLT_MAX; @@ -487,8 +488,7 @@ int main(int argc, char *argv[]) { runner.e = &engine; /* Construct some cells */ - struct cell *cells[512]; - const int dim = 8; + struct cell *cells[dim * dim * dim]; static long long partId = 0; for (int i = 0; i < dim; ++i) { for (int j = 0; j < dim; ++j) { @@ -581,7 +581,7 @@ int main(int argc, char *argv[]) { swiftOutputFileName, bruteForceOutputFileName); /* Clean things to make the sanitizer happy ... */ - for (int i = 0; i < 512; ++i) clean_up(cells[i]); + for (int i = 0; i < dim * dim * dim; ++i) clean_up(cells[i]); return 0; } diff --git a/tests/tolerance_27_perturbed_h.dat b/tests/tolerance_27_perturbed_h.dat index 5142c2a2090e15381a19b2bc71e253a482973b11..e25c3373afe6f2a8e7788dfc2d9eba0b74efdbee 100644 --- a/tests/tolerance_27_perturbed_h.dat +++ b/tests/tolerance_27_perturbed_h.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2.4e-6 1e-4 5e-4 1.2e-2 1e-5 3e-6 3e-6 8e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2.4e-6 1e-4 5e-4 1.2e-2 1.1e-5 3e-6 3e-6 8e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1.4e-2 1e-5 2e-3 2.5e-4 3e-3 3e-3 3e-3 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e0 1e-6 4e-6 4e-6 4e-6 diff --git a/tests/tolerance_27_perturbed_h2.dat b/tests/tolerance_27_perturbed_h2.dat index 23f6a5006124f6233aebd111005760a5dcc5b6a3..d83f36a887d92b55c90b21adae65ffe734a2f188 100644 --- a/tests/tolerance_27_perturbed_h2.dat +++ b/tests/tolerance_27_perturbed_h2.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 1e-4 5e-4 1.5e-2 1.4e-5 3e-6 3e-6 9e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-6 1.57e-2 1e-5 4.74e-3 3.89e-4 3e-3 3e-3 3e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 1e-4 5e-4 1.5e-2 1.4e-5 3e-6 3e-6 1e-5 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.5e-6 1.57e-2 1e-5 5.86e-3 4.96e-4 3e-3 3e-3 3e-3 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e0 1e-6 4e-6 4e-6 4e-6