diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 0df1f91194b6d1e7e98cb1b75be7d3eaaca7fc32..0193760d3114aecab91f0c2ad27a9c1dd77dec9a 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -1988,6 +1988,9 @@ INCLUDE_FILE_PATTERNS = # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. PREDEFINED = "__attribute__(x)= " +PREDEFINED += HAVE_HDF5 +PREDEFINED += WITH_MPI +PREDEFINED += WITH_VECTORIZATION # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/src/collectgroup.c b/src/collectgroup.c index 0b4ddc405772a45a1e444ef48b65fcb7d37a248f..b7e5486b59a2ec5e47b7b864071a2bb1e5ce1850 100644 --- a/src/collectgroup.c +++ b/src/collectgroup.c @@ -170,7 +170,7 @@ static void doreduce1(struct mpicollectgroup1 *mpigrp11, } /** - * @brief MPI reduce operator for #mpicollectgroup structures. + * @brief MPI reduce operator for #mpicollectgroup1 structures. */ static void mpicollectgroup1_reduce(void *in, void *inout, int *len, MPI_Datatype *datatype) { diff --git a/src/runner.c b/src/runner.c index 9be0a9ee2ce23888d04346679ccb36fdc6f13a02..36074144e14ec31741ee5964d8e2b98c9673192b 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1818,13 +1818,8 @@ void *runner_main(void *data) { break; case task_type_pair: - if (t->subtype == task_subtype_density) { -#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) - runner_dopair1_density_vec(r, ci, cj); -#else - runner_dopair1_density(r, ci, cj); -#endif - } + if (t->subtype == task_subtype_density) + runner_dopair1_branch_density(r, ci, cj); #ifdef EXTRA_HYDRO_LOOP else if (t->subtype == task_subtype_gradient) runner_dopair1_gradient(r, ci, cj); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index f4513ca75b994c51a74011f0da4ee6863a625968..a2abd9458f8b490497fe0b9b33c2179b283d639d 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -884,8 +884,11 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. + * @param sid The direction of the pair + * @param shift The shift vector to apply to the particles in ci. */ -void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { +void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, + const double *shift) { const struct engine *restrict e = r->e; @@ -900,22 +903,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { TIMER_TIC; - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); - - /* Get the sort ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) - runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) - runner_do_sort(r, cj, (1 << sid), 1); - /* Get the cutoff shift. */ double rshift = 0.0; for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; @@ -1116,6 +1103,51 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { TIMER_TOC(TIMER_DOPAIR); } +/** + * @brief Determine which version of DOPAIR1 needs to be called depending on the + * orientation of the cells or whether DOPAIR1 needs to be called at all. + * + * @param r #runner + * @param ci #cell ci + * @param cj #cell cj + * + */ +void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *restrict e = r->e; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + /* Check that cells are drifted. */ + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) + error("Interacting undrifted cells."); + + /* Get the sort ID. */ + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); + + /* Have the cells been sorted? */ + if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) + runner_do_sort(r, cj, (1 << sid), 1); + + /* Have the cells been sorted? */ + if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) + error("Trying to interact unsorted cells."); + +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ + (DOPAIR1_BRANCH == runner_dopair1_density_branch) + if (!sort_is_corner(sid)) + runner_dopair1_density_vec(r, ci, cj, sid, shift); + else + DOPAIR1(r, ci, cj, sid, shift); +#else + DOPAIR1(r, ci, cj, sid, shift); +#endif +} + /** * @brief Compute the interactions between a cell pair (symmetric) * @@ -2290,13 +2322,8 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, cj->dx_max_sort > cj->dmin * space_maxreldx) runner_do_sort(r, cj, (1 << sid), 1); -/* Compute the interactions. */ -#if (DOPAIR1 == runner_dopair1_density) && defined(WITH_VECTORIZATION) && \ - defined(GADGET2_SPH) - runner_dopair1_density_vec(r, ci, cj); -#else - DOPAIR1(r, ci, cj); -#endif + /* Compute the interactions. */ + DOPAIR1_BRANCH(r, ci, cj); } if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index a302817f7fec4089f1046eec5cf7ff09aadd25a6..b05a150047d7b15de97e0b1f5ea9308bfb7f60f7 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -20,13 +20,12 @@ /* Config parameters. */ #include "../config.h" -#include "swift.h" - -#include "active.h" - /* This object's header. */ #include "runner_doiact_vec.h" +/* Local headers. */ +#include "active.h" + #ifdef WITH_VECTORIZATION /** * @brief Compute the vector remainder interactions from the secondary cache. @@ -262,40 +261,40 @@ __attribute__((always_inline)) INLINE static void storeInteractions( } } -/* @brief Populates the arrays max_di and max_dj with the maximum distances of +/** + * @brief Populates the arrays max_di and max_dj with the maximum distances of * particles into their neighbouring cells. Also finds the first pi that * interacts with any particle in cj and the last pj that interacts with any * particle in ci. + * * @param ci #cell pointer to ci * @param cj #cell pointer to cj * @param sort_i #entry array for particle distance in ci * @param sort_j #entry array for particle distance in cj - * @param ci_cache #cache for cell ci - * @param cj_cache #cache for cell cj * @param dx_max maximum particle movement allowed in cell * @param rshift cutoff shift + * @param hi_max Maximal smoothing length in cell ci + * @param hj_max Maximal smoothing length in cell cj + * @param di_max Maximal position on the axis that can interact in cell ci + * @param dj_min Minimal position on the axis that can interact in cell ci * @param max_di array to hold the maximum distances of pi particles into cell * cj * @param max_dj array to hold the maximum distances of pj particles into cell * cj * @param init_pi first pi to interact with a pj particle * @param init_pj last pj to interact with a pi particle + * @param e The #engine. */ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( const struct cell *ci, const struct cell *cj, const struct entry *restrict sort_i, const struct entry *restrict sort_j, - const float dx_max, const float rshift, float *max_di, float *max_dj, - int *init_pi, int *init_pj, const struct engine *e) { + const float dx_max, const float rshift, const double hi_max, + const double hj_max, const double di_max, const double dj_min, + float *max_di, float *max_dj, int *init_pi, int *init_pj, + const struct engine *e) { - struct part *restrict parts_i = ci->parts; - struct part *restrict parts_j = cj->parts; - struct part *p = &parts_i[sort_i[0].i]; - - float h, d; - - /* Get the distance of the last pi and the first pj on the sorted axis.*/ - const float di_max = sort_i[ci->count - 1].d - rshift; - const float dj_min = sort_j[0].d; + const struct part *restrict parts_i = ci->parts; + const struct part *restrict parts_j = cj->parts; int first_pi = 0, last_pj = cj->count - 1; @@ -304,19 +303,19 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( /* Populate max_di with distances. */ int active_id = ci->count - 1; for (int k = ci->count - 1; k >= 0; k--) { - p = &parts_i[sort_i[k].i]; - h = p->h; - d = sort_i[k].d + h * kernel_gamma + dx_max - rshift; + const struct part *pi = &parts_i[sort_i[k].i]; + const float d = sort_i[k].d + dx_max; - max_di[k] = d; + // max_di[k] = d + h * kernel_gamma - rshift; + max_di[k] = d + hi_max; /* If the particle is out of range set the index to * the last active particle within range. */ - if (d < dj_min) { + if (d + hi_max < dj_min) { first_pi = active_id; break; } else { - if (part_is_active(p, e)) active_id = k; + if (part_is_active(pi, e)) active_id = k; } } @@ -329,19 +328,20 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( /* Populate max_dj with distances. */ active_id = 0; for (int k = 0; k < cj->count; k++) { - p = &parts_j[sort_j[k].i]; - h = p->h; - d = sort_j[k].d - h * kernel_gamma - dx_max - rshift; + const struct part *pj = &parts_j[sort_j[k].i]; + const float d = sort_j[k].d - dx_max; - max_dj[k] = d; + /*TODO: don't think rshift should be taken off here, waiting on Pedro. */ + // max_dj[k] = d - h * kernel_gamma - rshift; + max_dj[k] = d - hj_max; /* If the particle is out of range set the index to * the last active particle within range. */ - if (d > di_max) { + if (d - hj_max > di_max) { last_pj = active_id; break; } else { - if (part_is_active(p, e)) active_id = k; + if (part_is_active(pj, e)) active_id = k; } } @@ -611,9 +611,12 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. + * @param sid The direction of the pair + * @param shift The shift vector to apply to the particles in ci. */ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, - struct cell *cj) { + struct cell *cj, const int sid, + const double *shift) { #ifdef WITH_VECTORIZATION const struct engine *restrict e = r->e; @@ -622,22 +625,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, TIMER_TIC; - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); - - /* Get the sort ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) - runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) - runner_do_sort(r, cj, (1 << sid), 1); - /* Get the cutoff shift. */ double rshift = 0.0; for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; @@ -726,8 +713,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */ /* Also find the first pi that interacts with any particle in cj and the last * pj that interacts with any particle in ci. */ - populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, max_di, - max_dj, &first_pi, &last_pj, e); + populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, hi_max, + hj_max, di_max, dj_min, max_di, max_dj, &first_pi, + &last_pj, e); /* Find the maximum index into cj that is required by a particle in ci. */ /* Find the maximum index into ci that is required by a particle in cj. */ @@ -770,13 +758,22 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, if (cell_is_active(ci, e)) { - /* Loop over the parts in ci. */ + /* Loop over the parts in ci until nothing is within range in cj. */ for (int pid = count_i - 1; pid >= first_pi_loop && max_ind_j >= 0; pid--) { /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[sort_i[pid].i]; if (!part_is_active(pi, e)) continue; + /* Set the cache index. */ + int ci_cache_idx = pid - first_pi_align; + + /* Skip this particle if no particle in cj is within range of it. */ + const float hi = ci_cache->h[ci_cache_idx]; + const double di_test = + sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; + if (di_test < dj_min) continue; + /* Determine the exit iteration of the interaction loop. */ dj = sort_j[max_ind_j].d; while (max_ind_j > 0 && max_di[pid] < dj) { @@ -786,10 +783,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, } int exit_iteration = max_ind_j + 1; - /* Set the cache index. */ - int ci_cache_idx = pid - first_pi_align; - - const float hi = ci_cache->h[ci_cache_idx]; const float hig2 = hi * hi * kernel_gamma2; vector pix, piy, piz; @@ -903,13 +896,24 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, } if (cell_is_active(cj, e)) { - /* Loop over the parts in cj. */ + + /* Loop over the parts in cj until nothing is within range in ci. */ for (int pjd = 0; pjd <= last_pj_loop && max_ind_i < count_i; pjd++) { /* Get a hold of the jth part in cj. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; if (!part_is_active(pj, e)) continue; + /* Set the cache index. */ + int cj_cache_idx = pjd; + + /*TODO: rshift term. */ + /* Skip this particle if no particle in ci is within range of it. */ + const float hj = cj_cache->h[cj_cache_idx]; + const double dj_test = + sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift; + if (dj_test > di_max) continue; + /* Determine the exit iteration of the interaction loop. */ di = sort_i[max_ind_i].d; while (max_ind_i < count_i - 1 && max_dj[pjd] > di) { @@ -919,10 +923,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, } int exit_iteration = max_ind_i; - /* Set the cache index. */ - int cj_cache_idx = pjd; - - const float hj = cj_cache->h[cj_cache_idx]; const float hjg2 = hj * hj * kernel_gamma2; vector pjx, pjy, pjz; diff --git a/src/runner_doiact_vec.h b/src/runner_doiact_vec.h index e252083ae743248a5f23d1772c2e770c5e1c6c14..13c88cebaa47c5051b4119033a055e49a9a1079c 100644 --- a/src/runner_doiact_vec.h +++ b/src/runner_doiact_vec.h @@ -35,8 +35,8 @@ /* Function prototypes. */ void runner_doself1_density_vec(struct runner *r, struct cell *restrict c); -void runner_doself1_density_vec_2(struct runner *r, struct cell *restrict c); void runner_dopair1_density_vec(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj); + struct cell *restrict cj, const int sid, + const double *shift); #endif /* SWIFT_RUNNER_VEC_H */ diff --git a/src/serial_io.c b/src/serial_io.c index a7e342f0a90fcf4c57f334526ff91b1923de4585..58237b49c331c34639eb5ae7ce79fd5272f73057 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -59,19 +59,15 @@ * @brief Reads a data array from a given HDF5 group. * * @param grp The group from which to read. - * @param name The name of the array to read. - * @param type The #DATA_TYPE of the attribute. - * @param N The number of particles. - * @param dim The dimension of the data (1 for scalar, 3 for vector) - * @param part_c A (char*) pointer on the first occurrence of the field of - *interest in the parts array - * @param partSize The size in bytes of the particle structure. - * @param importance If COMPULSORY, the data must be present in the IC file. If - *OPTIONAL, the array will be zeroed when the data is not present. + * @param props The #io_props of the field to read + * @param N The number of particles to read on this rank. + * @param N_total The total number of particles on all ranks. + * @param offset The offset position where this rank starts reading. + * @param internal_units The #unit_system used internally + * @param ic_units The #unit_system used in the ICs * * @todo A better version using HDF5 hyper-slabs to read the file directly into - *the part array - * will be written once the structures have been stabilized. + * the part array will be written once the structures have been stabilized. */ void readArray(hid_t grp, const struct io_props props, size_t N, long long N_total, long long offset, @@ -274,16 +270,17 @@ void prepareArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description * @param partTypeGroupName The name of the group containing the particles in - *the HDF5 file. - * @param name The name of the array to write. - * @param type The #DATA_TYPE of the array. + * the HDF5 file. + * @param props The #io_props of the field to read * @param N The number of particles to write. - * @param dim The dimension of the data (1 for scalar, 3 for vector) - * @param part_c A (char*) pointer on the first occurrence of the field of - *interest in the parts array - * @param partSize The size in bytes of the particle structure. - * @param us The unit_system currently in use - * @param convFactor The UnitConversionFactor for this arrayo + * @param N_total The total number of particles on all ranks. + * @param offset The offset position where this rank starts writing. + * @param mpi_rank The MPI rank of this node + * @param internal_units The #unit_system used internally + * @param snapshot_units The #unit_system used in the snapshots + * + * @todo A better version using HDF5 hyper-slabs to write the file directly from + * the part array will be written once the structures have been stabilized. */ void writeArray(struct engine* e, hid_t grp, char* fileName, FILE* xmfFile, char* partTypeGroupName, const struct io_props props, size_t N, diff --git a/src/single_io.c b/src/single_io.c index 0b091a5997504e5f5a4cc3b8af7ca06c994e993c..2de42f8a170620c983f7992771f2bd03da80b5a7 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -64,8 +64,7 @@ * @param ic_units The #unit_system used in the ICs * * @todo A better version using HDF5 hyper-slabs to read the file directly into - *the part array - * will be written once the structures have been stabilized. + * the part array will be written once the structures have been stabilized. */ void readArray(hid_t h_grp, const struct io_props prop, size_t N, const struct unit_system* internal_units, diff --git a/tests/Makefile.am b/tests/Makefile.am index 3b3da04dcf17f10a3bd60451813467f7961315bc..b356086b54b6f4f07c8bedafe1fccb543121871e 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -97,6 +97,6 @@ EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ test27cells.sh test27cellsPerturbed.sh testParser.sh testPeriodicBC.sh \ testPeriodicBCPerturbed.sh test125cells.sh test125cellsPerturbed.sh testParserInput.yaml \ difffloat.py tolerance_125_normal.dat tolerance_125_perturbed.dat \ - tolerance_27_normal.dat tolerance_27_perturbed.dat \ + tolerance_27_normal.dat tolerance_27_perturbed.dat tolerance_27_perturbed_h.dat \ tolerance_pair_normal.dat tolerance_pair_perturbed.dat \ fft_params.yml tolerance_periodic_BC_normal.dat tolerance_periodic_BC_perturbed.dat diff --git a/tests/difffloat.py b/tests/difffloat.py index 0bdc706a1c44ee6c42c54ad37e93f634742e06bc..8c8713a8d2968005a86a9c1e7fdce34bf3e624f7 100644 --- a/tests/difffloat.py +++ b/tests/difffloat.py @@ -46,7 +46,13 @@ if len(sys.argv) == 6: ignoreSmallRhoDh = int(sys.argv[5]) else: ignoreSmallRhoDh = 0 - + +# Get the particle properties being compared from the header. +with open(file1, 'r') as f: + line = f.readline() + if 'ID' in line: + part_props = line.split()[1:] + data1 = loadtxt(file1) data2 = loadtxt(file2) if fileTol != "": @@ -100,7 +106,7 @@ for i in range(n_lines_to_check): rel_diff = 0. if( abs_diff > 1.1*absTol[j]): - print "Absolute difference larger than tolerance (%e) for particle %d, column %d:"%(absTol[j], i,j) + print "Absolute difference larger than tolerance (%e) for particle %d, column %s:"%(absTol[j], data1[i,0], part_props[j]) print "%10s: a = %e"%("File 1", data1[i,j]) print "%10s: b = %e"%("File 2", data2[i,j]) print "%10s: |a-b| = %e"%("Difference", abs_diff) @@ -113,7 +119,7 @@ for i in range(n_lines_to_check): if ignoreSmallRhoDh and j == 8 and abs(data1[i,j]) < 2e-4: continue if( rel_diff > 1.1*relTol[j]): - print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j) + print "Relative difference larger than tolerance (%e) for particle %d, column %s:"%(relTol[j], data1[i,0], part_props[j]) print "%10s: a = %e"%("File 1", data1[i,j]) print "%10s: b = %e"%("File 2", data2[i,j]) print "%10s: |a-b|/|a+b| = %e"%("Difference", rel_diff) diff --git a/tests/test125cells.c b/tests/test125cells.c index e4c73b5e75df56436d277d719b3b83a179924a6f..9c113f98c89e6213d8c01fcd4515da076787e93e 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -432,6 +432,8 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, /* Just a forward declaration... */ void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); +void runner_dopair1_branch_density(struct runner *r, struct cell *ci, + struct cell *cj); void runner_doself1_density(struct runner *r, struct cell *ci); void runner_dopair2_force(struct runner *r, struct cell *ci, struct cell *cj); void runner_doself2_force(struct runner *r, struct cell *ci); @@ -621,6 +623,14 @@ int main(int argc, char *argv[]) { /* Do the density calculation */ #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) + /* Initialise the particle cache. */ +#ifdef WITH_VECTORIZATION + runner.ci_cache.count = 0; + cache_init(&runner.ci_cache, 512); + runner.cj_cache.count = 0; + cache_init(&runner.cj_cache, 512); +#endif + /* Run all the pairs (only once !)*/ for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { @@ -643,7 +653,7 @@ int main(int argc, char *argv[]) { struct cell *cj = cells[iii * 25 + jjj * 5 + kkk]; - if (cj > ci) runner_dopair1_density(&runner, ci, cj); + if (cj > ci) runner_dopair1_branch_density(&runner, ci, cj); } } } diff --git a/tests/test27cells.c b/tests/test27cells.c index a0f541d17100a13079580aabbef065fa5adbd5e1..fd9d62756ad0e8461f60b2e452c6ca86135a8e89 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -30,11 +30,9 @@ /* Local headers. */ #include "swift.h" -#define ACC_THRESHOLD 1e-5 - #if defined(WITH_VECTORIZATION) #define DOSELF1 runner_doself1_density_vec -#define DOPAIR1 runner_dopair1_density_vec +#define DOPAIR1 runner_dopair1_branch_density #define DOSELF1_NAME "runner_doself1_density_vec" #define DOPAIR1_NAME "runner_dopair1_density_vec" #endif @@ -45,7 +43,7 @@ #endif #ifndef DOPAIR1 -#define DOPAIR1 runner_dopair1_density +#define DOPAIR1 runner_dopair1_branch_density #define DOPAIR1_NAME "runner_dopair1_density" #endif @@ -64,18 +62,20 @@ enum velocity_types { * @param offset The position of the cell offset from (0,0,0). * @param size The cell size. * @param h The smoothing length of the particles in units of the inter-particle - *separation. + * separation. * @param density The density of the fluid. * @param partId The running counter of IDs. * @param pert The perturbation to apply to the particles in the cell in units - *of the inter-particle separation. + * of the inter-particle separation. * @param vel The type of velocity field (0, random, divergent, rotating) + * @param h_pert The perturbation to apply to the smoothing length. */ struct cell *make_cell(size_t n, double *offset, double size, double h, double density, long long *partId, double pert, - enum velocity_types vel) { + enum velocity_types vel, double h_pert) { const size_t count = n * n * n; const double volume = size * size * size; + float h_max = 0.f; struct cell *cell = malloc(sizeof(struct cell)); bzero(cell, sizeof(struct cell)); @@ -121,7 +121,11 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, part->v[2] = 0.f; break; } - part->h = size * h / (float)n; + if (h_pert) + part->h = size * h * random_uniform(1.f, 1.1f) / (float)n; + else + part->h = size * h / (float)n; + h_max = fmaxf(h_max, part->h); part->id = ++(*partId); #if defined(GIZMO_SPH) || defined(SHADOWFAX_SPH) @@ -156,7 +160,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, /* Cell properties */ cell->split = 0; - cell->h_max = h; + cell->h_max = h_max; cell->count = count; cell->dx_max_part = 0.; cell->dx_max_sort = 0.; @@ -288,33 +292,11 @@ void dump_particle_fields(char *fileName, struct cell *main_cell, fclose(file); } -/** - * @brief Compares the vectorised result against - * the serial result of the interaction. - * - * @param serial_parts Particle array that has been interacted serially - * @param vec_parts Particle array to be interacted using vectors - * @param count No. of particles that have been interacted - * @param threshold Level of accuracy needed - * - * @return Non-zero value if difference found, 0 otherwise - */ -int check_results(struct part *serial_parts, struct part *vec_parts, int count, - double threshold) { - int result = 0; - - for (int i = 0; i < count; i++) - result += compare_particles(serial_parts[i], vec_parts[i], threshold); - - return result; -} - /* Just a forward declaration... */ void runner_doself1_density(struct runner *r, struct cell *ci); void runner_doself1_density_vec(struct runner *r, struct cell *ci); -void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); -void runner_dopair1_density_vec(struct runner *r, struct cell *ci, - struct cell *cj); +void runner_dopair1_branch_density(struct runner *r, struct cell *ci, + struct cell *cj); /* And go... */ int main(int argc, char *argv[]) { @@ -322,8 +304,7 @@ int main(int argc, char *argv[]) { engine_pin(); size_t runs = 0, particles = 0; double h = 1.23485, size = 1., rho = 1.; - double perturbation = 0.; - double threshold = ACC_THRESHOLD; + double perturbation = 0., h_pert = 0.; char outputFileNameExtension[200] = ""; char outputFileName[200] = ""; enum velocity_types vel = velocity_zero; @@ -339,11 +320,14 @@ int main(int argc, char *argv[]) { srand(0); char c; - while ((c = getopt(argc, argv, "m:s:h:n:r:t:d:f:v:a:")) != -1) { + while ((c = getopt(argc, argv, "m:s:h:p:n:r:t:d:f:v:")) != -1) { switch (c) { case 'h': sscanf(optarg, "%lf", &h); break; + case 'p': + sscanf(optarg, "%lf", &h_pert); + break; case 's': sscanf(optarg, "%lf", &size); break; @@ -365,9 +349,6 @@ int main(int argc, char *argv[]) { case 'v': sscanf(optarg, "%d", (int *)&vel); break; - case 'a': - sscanf(optarg, "%lf", &threshold); - break; case '?': error("Unknown option."); break; @@ -382,6 +363,7 @@ int main(int argc, char *argv[]) { "runner_doself1_density()." "\n\nOptions:" "\n-h DISTANCE=1.2348 - Smoothing length in units of <x>" + "\n-p - Random fractional change in h, h=h*random(1,p)" "\n-m rho - Physical density in the cell" "\n-s size - Physical size of the cell" "\n-d pert - Perturbation to apply to the particles [0,1[" @@ -435,8 +417,9 @@ int main(int argc, char *argv[]) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 3; ++k) { double offset[3] = {i * size, j * size, k * size}; - cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho, - &partId, perturbation, vel); + cells[i * 9 + j * 3 + k] = + make_cell(particles, offset, size, h, rho, &partId, perturbation, + vel, h_pert); runner_do_drift_part(&runner, cells[i * 9 + j * 3 + k], 0); @@ -504,10 +487,6 @@ int main(int argc, char *argv[]) { } } - /* Store the vectorised particle results. */ - struct part vec_parts[main_cell->count]; - for (int i = 0; i < main_cell->count; i++) vec_parts[i] = main_cell->parts[i]; - /* Output timing */ ticks corner_time = timings[0] + timings[2] + timings[6] + timings[8] + timings[18] + timings[20] + timings[24] + timings[26]; @@ -552,10 +531,6 @@ int main(int argc, char *argv[]) { sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension); dump_particle_fields(outputFileName, main_cell, cells); - /* Check serial results against the vectorised results. */ - if (check_results(main_cell->parts, vec_parts, main_cell->count, threshold)) - message("Differences found..."); - /* Output timing */ message("Brute force calculation took : %15lli ticks.", toc - tic); diff --git a/tests/test27cells.sh.in b/tests/test27cells.sh.in index 4312ce55e13097d4ae40c289b9c5caa885ff37cc..a313a594c10c80598852a92e7920ac846e15ba2d 100755 --- a/tests/test27cells.sh.in +++ b/tests/test27cells.sh.in @@ -1,13 +1,14 @@ #!/bin/bash +# Test for particles with the same smoothing length for v in {0..3} do echo "" rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat - echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4" - ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -a 1e-4 + echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v" + ./test27cells -n 6 -r 1 -d 0 -f standard -v $v if [ -e brute_force_27_standard.dat ] then @@ -27,4 +28,32 @@ do done +# Test for particles with random smoothing lengths +for v in {0..3} +do + echo "" + + rm -f brute_force_27_standard.dat swift_dopair_27_standard.dat + + echo "Running ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -p 1.1" + ./test27cells -n 6 -r 1 -d 0 -f standard -v $v -p 1.1 + + if [ -e brute_force_27_standard.dat ] + then + if python @srcdir@/difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat @srcdir@/tolerance_27_perturbed_h.dat 6 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + +done + exit $? diff --git a/tests/test27cellsPerturbed.sh.in b/tests/test27cellsPerturbed.sh.in index 2f2e1db76346ca8f0ea4c2365ee349e232a1ce53..bbee027bda407c25f40cbf797e2bfcdc6a4827e4 100755 --- a/tests/test27cellsPerturbed.sh.in +++ b/tests/test27cellsPerturbed.sh.in @@ -1,13 +1,14 @@ #!/bin/bash +# Test for particles with the same smoothing length for v in {0..3} do echo "" rm -f brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat - echo "Running ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4" - ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -a 5e-4 + echo "Running ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v" + ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v if [ -e brute_force_27_perturbed.dat ] then @@ -27,4 +28,32 @@ do done +# Test for particles with random smoothing lengths +for v in {0..3} +do + echo "" + + rm -f brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat + + echo "Running ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -p 1.1" + ./test27cells -n 6 -r 1 -d 0.1 -f perturbed -v $v -p 1.1 + + if [ -e brute_force_27_perturbed.dat ] + then + if python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed_h.dat 6 1 + then + echo "Accuracy test passed" + else + echo "Accuracy test failed" + exit 1 + fi + else + echo "Error Missing test output file" + exit 1 + fi + + echo "------------" + +done + exit $? diff --git a/tests/testPair.c b/tests/testPair.c index 92987d2fdb625fec6e186a280837f145787f599b..2ca2092cf1bb41c4910ee2639c22677e2fb1dbd2 100644 --- a/tests/testPair.c +++ b/tests/testPair.c @@ -16,14 +16,36 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ +#include "../config.h" +/* Some standard headers. */ #include <fenv.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> + +/* Local headers. */ #include "swift.h" +#if defined(WITH_VECTORIZATION) +#define DOSELF1 runner_doself1_density_vec +#define DOPAIR1 runner_dopair1_branch_density +#define DOSELF1_NAME "runner_doself1_density_vec" +#define DOPAIR1_NAME "runner_dopair1_density_vec" +#endif + +#ifndef DOSELF1 +#define DOSELF1 runner_doself1_density +#define DOSELF1_NAME "runner_doself1_density" +#endif + +#ifndef DOPAIR1 +#define DOPAIR1 runner_dopair1_branch_density +#define DOPAIR1_NAME "runner_dopair1_density" +#endif + + /* n is both particles per axis and box size: * particles are generated on a mesh with unit spacing */ @@ -187,6 +209,9 @@ void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { /* Just a forward declaration... */ void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); +void runner_doself1_density_vec(struct runner *r, struct cell *ci); +void runner_dopair1_branch_density(struct runner *r, struct cell *ci, + struct cell *cj); int main(int argc, char *argv[]) { size_t particles = 0, runs = 0, volume, type = 0; @@ -206,6 +231,9 @@ int main(int argc, char *argv[]) { unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); + /* Choke on FP-exceptions */ + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); + srand(0); while ((c = getopt(argc, argv, "h:p:r:t:d:f:")) != -1) { @@ -276,7 +304,7 @@ int main(int argc, char *argv[]) { #if defined(DEFAULT_SPH) || !defined(WITH_VECTORIZATION) /* Run the test */ - runner_dopair1_density(&runner, ci, cj); + DOPAIR1(&runner, ci, cj); #endif toc = getticks(); diff --git a/tests/tolerance_125_perturbed.dat b/tests/tolerance_125_perturbed.dat index 04e642b28cb3729cb81f8183c3e69595ac651876..a45a419c1b484be226591ddf5ce16c8ef328e9a5 100644 --- a/tests/tolerance_125_perturbed.dat +++ b/tests/tolerance_125_perturbed.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z h rho div_v S u P c a_x a_y a_z h_dt v_sig dS/dt du/dt 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 - 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 5e-3 5e-3 5e-3 1e-4 1e-4 1e-4 1e-4 + 0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-2 1e-2 1e-2 1e-4 1e-4 1e-4 1e-4 diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat index 9c6ee8c77cc6d53e67db9dbb86be197d49149b10..cd31b4c5aa25059e5abc012ba3d84a1c88a2c746 100644 --- a/tests/tolerance_27_perturbed.dat +++ b/tests/tolerance_27_perturbed.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 5e-5 2e-3 4e-6 3e-6 3e-6 3e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-5 1e-4 4e-5 2e-3 2e-3 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 3e-3 1e-5 3e-6 3e-6 7e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-5 1e-4 6e-5 2e-3 2e-3 2e-3 diff --git a/tests/tolerance_27_perturbed_h.dat b/tests/tolerance_27_perturbed_h.dat new file mode 100644 index 0000000000000000000000000000000000000000..43521a94c4339adbcf43f2e46ae578a101c23bc6 --- /dev/null +++ b/tests/tolerance_27_perturbed_h.dat @@ -0,0 +1,3 @@ +# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 1e-4 2e-4 4e-3 1e-5 3e-6 3e-6 7e-6 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.4e-2 1e-5 1e-4 2.5e-4 3e-3 3e-3 3e-3