Commit d7a357f9 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'master' into gravity_multi_dt

parents d3956b70 e03a3a62
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE) #define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)
#define C2_CACHE_ALIGN sizeof(float) * VEC_SIZE #define C2_CACHE_ALIGN sizeof(float) * VEC_SIZE
#ifdef WITH_VECTORIZATION
/* Cache struct to hold a local copy of a cells' particle /* Cache struct to hold a local copy of a cells' particle
* properties required for density/force calculations.*/ * properties required for density/force calculations.*/
struct cache { struct cache {
...@@ -433,4 +434,6 @@ static INLINE void cache_clean(struct cache *c) { ...@@ -433,4 +434,6 @@ static INLINE void cache_clean(struct cache *c) {
} }
} }
#endif /* WITH_VECTORIZATION */
#endif /* SWIFT_CACHE_H */ #endif /* SWIFT_CACHE_H */
...@@ -4343,11 +4343,13 @@ void engine_init(struct engine *e, struct space *s, ...@@ -4343,11 +4343,13 @@ void engine_init(struct engine *e, struct space *s,
e->runners[k].qid = k * nr_queues / e->nr_threads; e->runners[k].qid = k * nr_queues / e->nr_threads;
} }
#ifdef WITH_VECTORIZATION
/* Allocate particle caches. */ /* Allocate particle caches. */
e->runners[k].ci_cache.count = 0; e->runners[k].ci_cache.count = 0;
e->runners[k].cj_cache.count = 0; e->runners[k].cj_cache.count = 0;
cache_init(&e->runners[k].ci_cache, CACHE_SIZE); cache_init(&e->runners[k].ci_cache, CACHE_SIZE);
cache_init(&e->runners[k].cj_cache, CACHE_SIZE); cache_init(&e->runners[k].cj_cache, CACHE_SIZE);
#endif
if (verbose) { if (verbose) {
if (with_aff) if (with_aff)
...@@ -4434,8 +4436,10 @@ void engine_compute_next_snapshot_time(struct engine *e) { ...@@ -4434,8 +4436,10 @@ void engine_compute_next_snapshot_time(struct engine *e) {
*/ */
void engine_clean(struct engine *e) { void engine_clean(struct engine *e) {
#ifdef WITH_VECTORIZATION
for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].ci_cache); for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].ci_cache);
for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].cj_cache); for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].cj_cache);
#endif
free(e->runners); free(e->runners);
free(e->snapshotUnits); free(e->snapshotUnits);
free(e->links); free(e->links);
......
...@@ -49,11 +49,13 @@ struct runner { ...@@ -49,11 +49,13 @@ struct runner {
/*! The engine owing this runner. */ /*! The engine owing this runner. */
struct engine *e; struct engine *e;
#ifdef WITH_VECTORIZATION
/*! The particle cache of cell ci. */ /*! The particle cache of cell ci. */
struct cache ci_cache; struct cache ci_cache;
/*! The particle cache of cell cj. */ /*! The particle cache of cell cj. */
struct cache cj_cache; struct cache cj_cache;
#endif
}; };
/* Function prototypes. */ /* Function prototypes. */
......
...@@ -35,13 +35,18 @@ file1 = sys.argv[1] ...@@ -35,13 +35,18 @@ file1 = sys.argv[1]
file2 = sys.argv[2] file2 = sys.argv[2]
number_to_check = -1 number_to_check = -1
if len(sys.argv) == 5:
number_to_check = int(sys.argv[4])
fileTol = "" fileTol = ""
if len(sys.argv) >= 4: if len(sys.argv) >= 4:
fileTol = sys.argv[3] fileTol = sys.argv[3]
if len(sys.argv) >= 5:
number_to_check = int(sys.argv[4])
if len(sys.argv) == 6:
ignoreSmallRhoDh = int(sys.argv[5])
else:
ignoreSmallRhoDh = 0
data1 = loadtxt(file1) data1 = loadtxt(file1)
data2 = loadtxt(file2) data2 = loadtxt(file2)
if fileTol != "": if fileTol != "":
...@@ -102,8 +107,11 @@ for i in range(n_lines_to_check): ...@@ -102,8 +107,11 @@ for i in range(n_lines_to_check):
print "" print ""
error = True error = True
if abs(data1[i,j]) < 1e-6 and + abs(data2[i,j]) < 1e-6 : continue if abs(data1[i,j]) + abs(data2[i,j]) < 1e-6 : continue
# Ignore pathological cases with rho_dh
if ignoreSmallRhoDh and j == 8 and abs(data1[i,j]) < 2e-4: continue
if( rel_diff > 1.1*relTol[j]): if( rel_diff > 1.1*relTol[j]):
print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j) print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j)
print "%10s: a = %e"%("File 1", data1[i,j]) print "%10s: a = %e"%("File 1", data1[i,j])
......
...@@ -10,7 +10,7 @@ do ...@@ -10,7 +10,7 @@ do
if [ -e brute_force_27_perturbed.dat ] if [ -e brute_force_27_perturbed.dat ]
then then
python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed.dat 6 python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed.dat 6 1
else else
exit 1 exit 1
fi fi
......
...@@ -39,7 +39,7 @@ int main() { ...@@ -39,7 +39,7 @@ int main() {
const float numPoints_inv = 1. / numPoints; const float numPoints_inv = 1. / numPoints;
for (int i = 0; i < numPoints; ++i) { for (int i = 0; i < numPoints; ++i) {
u[i] = i * 2.5f * numPoints_inv / h; u[i] = i * 2.25f * numPoints_inv / h;
} }
for (int i = 0; i < numPoints; ++i) { for (int i = 0; i < numPoints; ++i) {
...@@ -55,18 +55,22 @@ int main() { ...@@ -55,18 +55,22 @@ int main() {
#ifdef WITH_VECTORIZATION #ifdef WITH_VECTORIZATION
printf("\nVector Output for kernel_deval_1_vec\n");
printf("-------------\n");
/* Test vectorised kernel that uses one vector. */
for (int i = 0; i < numPoints; i += VEC_SIZE) { for (int i = 0; i < numPoints; i += VEC_SIZE) {
vector vx, vx_h; vector vx, vx_h;
vector W_vec, dW_vec; vector W_vec, dW_vec;
for (int j = 0; j < VEC_SIZE; j++) { for (int j = 0; j < VEC_SIZE; j++) {
vx.f[j] = (i + j) * 2.5f / numPoints; vx.f[j] = (i + j) * 2.25f / numPoints;
} }
vx_h.v = vx.v / vec_set1(h); vx_h.v = vx.v / vec_set1(h);
kernel_deval_vec(&vx_h, &W_vec, &dW_vec); kernel_deval_1_vec(&vx_h, &W_vec, &dW_vec);
for (int j = 0; j < VEC_SIZE; j++) { for (int j = 0; j < VEC_SIZE; j++) {
printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h, printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h,
...@@ -85,6 +89,63 @@ int main() { ...@@ -85,6 +89,63 @@ int main() {
} }
} }
printf("\nVector Output for kernel_deval_2_vec\n");
printf("-------------\n");
/* Test vectorised kernel that uses two vectors. */
for (int i = 0; i < numPoints; i += VEC_SIZE) {
vector vx, vx_h;
vector W_vec, dW_vec;
vector vx_2, vx_h_2;
vector W_vec_2, dW_vec_2;
for (int j = 0; j < VEC_SIZE; j++) {
vx.f[j] = (i + j) * 2.25f / numPoints;
vx_2.f[j] = (i + j) * 2.25f / numPoints;
}
vx_h.v = vx.v / vec_set1(h);
vx_h_2.v = vx_2.v / vec_set1(h);
kernel_deval_2_vec(&vx_h, &W_vec, &dW_vec, &vx_h_2, &W_vec_2, &dW_vec_2);
/* Check first vector results. */
for (int j = 0; j < VEC_SIZE; j++) {
printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h,
h * kernel_gamma, vx.f[j], W_vec.f[j], dW_vec.f[j]);
if (fabsf(W_vec.f[j] - W[i + j]) > 2e-7) {
printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j],
W_vec.f[j]);
return 1;
}
if (fabsf(dW_vec.f[j] - dW[i + j]) > 2e-7) {
printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j],
dW_vec.f[j]);
return 1;
}
}
/* Check second vector results. */
for (int j = 0; j < VEC_SIZE; j++) {
printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h,
h * kernel_gamma, vx_2.f[j], W_vec_2.f[j], dW_vec_2.f[j]);
if (fabsf(W_vec_2.f[j] - W[i + j]) > 2e-7) {
printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j],
W_vec_2.f[j]);
return 1;
}
if (fabsf(dW_vec_2.f[j] - dW[i + j]) > 2e-7) {
printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j],
dW_vec_2.f[j]);
return 1;
}
}
}
printf("\nAll values are consistent\n"); printf("\nAll values are consistent\n");
#endif #endif
......
# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 5e-5 2e-3 3.1e-6 3e-6 3e-6 3e-6 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 5e-5 2e-3 3.1e-6 3e-6 3e-6 3e-6
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-2 1e-5 1e-4 2e-5 2e-3 2e-3 2e-3 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-3 1e-5 1e-4 2e-5 2e-3 2e-3 2e-3
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment