diff --git a/examples/Makefile.am b/examples/Makefile.am index 15fceb236057d21ba878cb09da39aca5bb70b692..735817c24cc52786e4c562e46e3619fb4a9a2e34 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -1,4 +1,3 @@ - # This file is part of SWIFT. # Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), # Matthieu Schaller (matthieu.schaller@durham.ac.uk). diff --git a/examples/main.c b/examples/main.c index 6422ccceef0a5f5c5f3b5acce9ba60b957986aac..fa0ac2567ac253568903ce8c47b2c9c38fee3704 100644 --- a/examples/main.c +++ b/examples/main.c @@ -93,7 +93,6 @@ void print_help_message() { * @brief Main routine that loads a few particles and generates some output. * */ - int main(int argc, char *argv[]) { struct clocks_time tic, toc; @@ -482,10 +481,8 @@ int main(int argc, char *argv[]) { if (j % 100 == 2) e.forcerepart = reparttype; #endif + /* Reset timers */ timers_reset(timers_mask_all); -#ifdef COUNTER - for (k = 0; k < runner_counter_count; k++) runner_counter[k] = 0; -#endif /* Take a step. */ engine_step(&e); diff --git a/src/approx_math.h b/src/approx_math.h index ef93ea63c383c74caa3eaff65446962872389a35..cbca602b3fafcc5044b0939b2207b8f9d50a7446 100644 --- a/src/approx_math.h +++ b/src/approx_math.h @@ -19,6 +19,8 @@ #ifndef SWIFT_APPROX_MATH_H #define SWIFT_APPROX_MATH_H +#include "inline.h" + /** * @brief Approximate version of expf(x) using a 4th order Taylor expansion * diff --git a/src/cell.h b/src/cell.h index 65665c9dcfcd652cc688f652008044d618b10790..ad39354c6ced334c8c6dfa8420dc6c7f649009a3 100644 --- a/src/cell.h +++ b/src/cell.h @@ -27,8 +27,9 @@ #include "lock.h" #include "multipole.h" #include "part.h" +#include "task.h" -/* Forward declaration of space, needed for cell_unpack. */ +/* Avoid cyclic inclusions */ struct space; /* Max tag size set to 2^29 to take into account some MPI implementations diff --git a/src/common_io.c b/src/common_io.c index 4399c83f7ebd2bb5355b7df9cc1fdecd301e36aa..03d5618cd0cafe5ff6067cf72107baacffb5d85a 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -43,6 +43,8 @@ #include "const.h" #include "error.h" #include "kernel_hydro.h" +#include "part.h" +#include "units.h" #include "version.h" const char* particle_type_names[NUM_PARTICLE_TYPES] = { diff --git a/src/common_io.h b/src/common_io.h index ed1c96801c904d9952c7b3ca995b847afdc3fb43..fa6811a26671a2ed85c12ada7bb380094f55795d 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -23,13 +23,11 @@ /* Config parameters. */ #include "../config.h" -/* Includes. */ -#include "kernel_hydro.h" +#if defined(HAVE_HDF5) + #include "part.h" #include "units.h" -#if defined(HAVE_HDF5) - /** * @brief The different types of data used in the GADGET IC files. * @@ -107,6 +105,6 @@ void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName, void writeCodeDescription(hid_t h_file); void writeUnitSystem(hid_t h_file, struct UnitSystem* us); -#endif +#endif /* defined HDF5 */ #endif /* SWIFT_COMMON_IO_H */ diff --git a/src/debug.c b/src/debug.c index a3647915d96a9456cb6d8177d144dab56fd0b97e..77238ab135e3f27b8c2c43b0ec18e7745493b138 100644 --- a/src/debug.c +++ b/src/debug.c @@ -20,11 +20,19 @@ * ******************************************************************************/ +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ #include <stdio.h> +/* This object's header. */ +#include "debug.h" + +/* Local includes. */ #include "config.h" #include "const.h" -#include "debug.h" +#include "inline.h" #include "part.h" /* Import the right hydro definition */ @@ -51,7 +59,6 @@ * * (Should be used for debugging only as it runs in O(N).) */ - void printParticle(struct part *parts, struct xpart *xparts, long long int id, size_t N) { @@ -69,6 +76,17 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id, if (!found) printf("## Particles[???] id=%lld not found\n", id); } +/** + * @brief Looks for the g-particle with the given id and prints its information + * to + * the standard output. + * + * @param gparts The array of g-particles. + * @param id The id too look for. + * @param N The size of the array of g-particles. + * + * (Should be used for debugging only as it runs in O(N).) + */ void printgParticle(struct gpart *gparts, long long int id, size_t N) { int found = 0; @@ -95,9 +113,7 @@ void printgParticle(struct gpart *gparts, long long int id, size_t N) { * * @param p The particle to print * @param xp The extended data ot the particle to print - * */ - void printParticle_single(struct part *p, struct xpart *xp) { printf("## Particle: id=%lld ", p->id); @@ -105,6 +121,18 @@ void printParticle_single(struct part *p, struct xpart *xp) { printf("\n"); } +/** + * @brief Prints the details of a given particle to stdout + * + * @param gp The g-particle to print + */ +void printgParticle_single(struct gpart *gp) { + + printf("## g-Particle: id=%lld ", gp->id); + gravity_debug_particle(gp); + printf("\n"); +} + #ifdef HAVE_METIS /** diff --git a/src/debug.h b/src/debug.h index fba5cb68680472715025f9fee39a5bf06abacf81..13be15adb867e8bafe95e2900f68caaa36192510 100644 --- a/src/debug.h +++ b/src/debug.h @@ -19,14 +19,15 @@ #ifndef SWIFT_DEBUG_H #define SWIFT_DEBUG_H -/* Includes. */ -#include "cell.h" -#include "part.h" +struct part; +struct gpart; +struct xpart; void printParticle(struct part *parts, struct xpart *xparts, long long int id, size_t N); void printgParticle(struct gpart *parts, long long int id, size_t N); void printParticle_single(struct part *p, struct xpart *xp); +void printgParticle_single(struct gpart *gp); #ifdef HAVE_METIS #include "metis.h" diff --git a/src/engine.c b/src/engine.c index aa65f45678fae1e21276422b69f1c76cd7e9eaef..bc2bf839c9a8108bbedaa7de1f9b138348374ffa 100644 --- a/src/engine.c +++ b/src/engine.c @@ -59,9 +59,12 @@ #include "parallel_io.h" #include "part.h" #include "partition.h" +#include "proxy.h" +#include "runner.h" #include "serial_io.h" #include "single_io.h" #include "timers.h" +#include "units.h" const char *engine_policy_names[13] = {"none", "rand", diff --git a/src/engine.h b/src/engine.h index 6c17de54ba62a95c4a5feb3c126390a5b5877d05..d1bfecc568355a9cf5aa57590ccba1df81b05b8a 100644 --- a/src/engine.h +++ b/src/engine.h @@ -37,13 +37,10 @@ #include <stdio.h> /* Includes. */ -#include "hydro_properties.h" -#include "lock.h" +#include "clocks.h" #include "parser.h" #include "partition.h" -#include "physical_constants.h" #include "potentials.h" -#include "proxy.h" #include "runner.h" #include "scheduler.h" #include "space.h" diff --git a/src/hydro/Default/hydro_iact.h b/src/hydro/Default/hydro_iact.h index 4d651b61bd934388414d54fa813820111d26e682..0a577931b5e0ca67ab07dfe414a548da66e82cdd 100644 --- a/src/hydro/Default/hydro_iact.h +++ b/src/hydro/Default/hydro_iact.h @@ -20,12 +20,6 @@ #ifndef SWIFT_RUNNER_IACT_H #define SWIFT_RUNNER_IACT_H -/* Includes. */ -#include "const.h" -#include "kernel_hydro.h" -#include "part.h" -#include "vector.h" - /** * @brief SPH interaction functions following the Gadget-2 version of SPH. * @@ -44,7 +38,6 @@ /** * @brief Density loop */ - __attribute__((always_inline)) INLINE static void runner_iact_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -218,7 +211,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( /** * @brief Density loop (non-symmetric version) */ - __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -267,7 +259,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( /** * @brief Density loop (non-symmetric vectorized version) */ - __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { @@ -360,7 +351,6 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, /** * @brief Force loop */ - __attribute__((always_inline)) INLINE static void runner_iact_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -456,7 +446,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /** * @brief Force loop (Vectorized version) */ - __attribute__((always_inline)) INLINE static void runner_iact_vec_force( float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { @@ -675,7 +664,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( /** * @brief Force loop (non-symmetric version) */ - __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -766,7 +754,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /** * @brief Force loop (Vectorized non-symmetric version) */ - __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, struct part **pj) { diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 8fcae293280e55bb838edf3c243f4e322914216f..8738b4be09931df4c938f1dff3adeed11468dcfc 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -20,12 +20,6 @@ #ifndef SWIFT_RUNNER_IACT_LEGACY_H #define SWIFT_RUNNER_IACT_LEGACY_H -/* Includes. */ -#include "const.h" -#include "kernel_hydro.h" -#include "part.h" -#include "vector.h" - /** * @brief SPH interaction functions following the Gadget-2 version of SPH. * @@ -42,7 +36,6 @@ /** * @brief Density loop */ - __attribute__((always_inline)) INLINE static void runner_iact_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -113,7 +106,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( /** * @brief Density loop (non-symmetric version) */ - __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -162,7 +154,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( /** * @brief Force loop */ - __attribute__((always_inline)) INLINE static void runner_iact_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -260,7 +251,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /** * @brief Force loop (non-symmetric version) */ - __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index f453d8fa1c495472af27ccf98356a3dab0894e98..c9da185b8a29eafe2a58420ae5de3a05ff043225 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -19,12 +19,6 @@ #ifndef SWIFT_RUNNER_IACT_MINIMAL_H #define SWIFT_RUNNER_IACT_MINIMAL_H -/* Includes. */ -#include "const.h" -#include "kernel_hydro.h" -#include "part.h" -#include "vector.h" - /** * @brief Minimal conservative implementation of SPH * @@ -70,7 +64,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( /** * @brief Density loop (non-symmetric version) */ - __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -95,7 +88,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( /** * @brief Force loop */ - __attribute__((always_inline)) INLINE static void runner_iact_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { @@ -168,7 +160,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /** * @brief Force loop (non-symmetric version) */ - __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { diff --git a/src/lock.h b/src/lock.h index 5eb97de15f81d17bad45e85226cb0881d24482ba..ca7f01ee029cd1c57ed8fd0f3237ea54cb43e9a7 100644 --- a/src/lock.h +++ b/src/lock.h @@ -24,7 +24,6 @@ /* Includes. */ #include "atomic.h" -#include "inline.h" #ifdef PTHREAD_SPINLOCK #include <pthread.h> diff --git a/src/parallel_io.c b/src/parallel_io.c index e779b56a85da3db72ea860fb336fa42037fbcd0e..c5cac1cb5efc6e533e599867e39cdd7c7b2c87fa 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -37,7 +37,11 @@ /* Local includes. */ #include "common_io.h" +#include "engine.h" #include "error.h" +#include "kernel_hydro.h" +#include "part.h" +#include "units.h" /** * @brief Reads a data array from a given HDF5 group. diff --git a/src/parallel_io.h b/src/parallel_io.h index 970ad8c41dcbc2df3a85b178f836e16926147788..26757cb679e475d6acd2ce3c408135dfe5e49081 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -19,6 +19,9 @@ #ifndef SWIFT_PARALLEL_IO_H #define SWIFT_PARALLEL_IO_H +/* Config parameters. */ +#include "../config.h" + /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> diff --git a/src/part.h b/src/part.h index 5d4c9c88a1acadea3d23a3df618c04da389fb61d..e99be6e51a9bd74dd9eec8f590e80989e83ec2e1 100644 --- a/src/part.h +++ b/src/part.h @@ -22,9 +22,6 @@ /* Config parameters. */ #include "../config.h" -/* Some standard headers. */ -#include <stdlib.h> - /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> @@ -38,7 +35,7 @@ #define xpart_align 32 #define gpart_align 32 -/* Import the right particle definition */ +/* Import the right hydro particle definition */ #if defined(MINIMAL_SPH) #include "./hydro/Minimal/hydro_part.h" #elif defined(GADGET2_SPH) @@ -49,6 +46,7 @@ #error "Invalid choice of SPH variant" #endif +/* Import the right gravity particle definition */ #include "./gravity/Default/gravity_part.h" #ifdef WITH_MPI diff --git a/src/runner.c b/src/runner.c index addeebda93a4d3f4b031f6fdc1f9eb856adc0a32..d38b52ce38f6fc16d97fc642b3f4d6890d9387d2 100644 --- a/src/runner.c +++ b/src/runner.c @@ -40,6 +40,7 @@ /* Local headers. */ #include "approx_math.h" #include "atomic.h" +#include "cell.h" #include "const.h" #include "debug.h" #include "drift.h" @@ -135,7 +136,6 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { * @param sort The entries * @param N The number of entries. */ - void runner_do_sort_ascending(struct entry *sort, int N) { struct { @@ -217,7 +217,6 @@ void runner_do_sort_ascending(struct entry *sort, int N) { * @param clock Flag indicating whether to record the timing or not, needed * for recursive calls. */ - void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { struct entry *finger; @@ -423,7 +422,6 @@ void runner_do_init(struct runner *r, struct cell *c, int timer) { * @param r The runner thread. * @param c The cell. */ - void runner_do_ghost(struct runner *r, struct cell *c) { struct part *p, *parts = c->parts; diff --git a/src/runner.h b/src/runner.h index 758b6cf57dbc1a46b6fc068a23615f3b28c8707e..6838b959955c4e54e208b8d2d16339e7fdb1740f 100644 --- a/src/runner.h +++ b/src/runner.h @@ -23,13 +23,12 @@ #ifndef SWIFT_RUNNER_H #define SWIFT_RUNNER_H -/* Includes. */ -#include "cell.h" -#include "inline.h" - extern const double runner_shift[13][3]; extern const char runner_flip[27]; +struct cell; +struct engine; + /* A struct representing a runner's thread and its data. */ struct runner { diff --git a/src/runner_doiact.h b/src/runner_doiact.h index bf16cb8f82f0d1d9a0d9b9d955831ada152f4307..4da83b940d55460c4bf8d2f650534aedf94dbd5d 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1,6 +1,7 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -17,10 +18,6 @@ * ******************************************************************************/ -/* Includes. */ -#include "cell.h" -#include "part.h" - /* Before including this file, define FUNCTION, which is the name of the interaction function. This creates the interaction functions runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION, @@ -77,6 +74,12 @@ #define _IACT(f) PASTE(runner_iact, f) #define IACT _IACT(FUNCTION) +#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f) +#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION) + +#define _IACT_VEC(f) PASTE(runner_iact_vec, f) +#define IACT_VEC _IACT_VEC(FUNCTION) + #define _TIMER_DOSELF(f) PASTE(timer_doself, f) #define TIMER_DOSELF _TIMER_DOSELF(FUNCTION) @@ -95,12 +98,6 @@ #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) -#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f) -#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION) - -#define _IACT_VEC(f) PASTE(runner_iact_vec, f) -#define IACT_VEC _IACT_VEC(FUNCTION) - /** * @brief Compute the interactions between a cell pair. * @@ -108,18 +105,14 @@ * @param ci The first #cell. * @param cj The second #cell. */ - void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, struct cell *restrict cj) { - struct engine *e = r->e; - int pid, pjd, k, count_i = ci->count, count_j = cj->count; - double shift[3] = {0.0, 0.0, 0.0}; - struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts; - struct part *restrict pi, *restrict pj; - double pix[3]; - float dx[3], hi, hig2, r2; + const struct engine *e = r->e; const int ti_current = e->ti_current; + + error("Don't use in actual runs ! Slow code !"); + #ifdef VECTORIZE int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -128,44 +121,47 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; #endif + TIMER_TIC /* Anything to do here? */ if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + /* Get the relative distance between the pairs, wrapping. */ - for (k = 0; k < 3; k++) { + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) shift[k] = e->s->dim[k]; else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) shift[k] = -e->s->dim[k]; } - /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with - %i/%i parts and shift = [ %g %g %g ].\n" , - ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , - cj->loc[2] , - ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); - tic = getticks(); */ - /* Loop over the parts in ci. */ - for (pid = 0; pid < count_i; pid++) { + for (int pid = 0; pid < count_i; pid++) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[pid]; - for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; - hi = pi->h; - hig2 = hi * hi * kernel_gamma2; + struct part *restrict pi = &parts_i[pid]; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_j; pjd++) { + for (int pjd = 0; pjd < count_j; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts_j[pjd]; + struct part *restrict pj = &parts_j[pjd]; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } @@ -206,7 +202,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, #ifdef VECTORIZE /* Pick up any leftovers. */ if (icount > 0) - for (k = 0; k < icount; k++) + for (int k = 0; k < icount; k++) IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif @@ -215,12 +211,10 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { - int pid, pjd, k, count = c->count; - struct part *restrict parts = c->parts; - struct part *restrict pi, *restrict pj; - double pix[3] = {0.0, 0.0, 0.0}; - float dx[3], hi, hig2, r2; const int ti_current = r->e->ti_current; + + error("Don't use in actual runs ! Slow code !"); + #ifdef VECTORIZE int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -229,38 +223,34 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; #endif + TIMER_TIC /* Anything to do here? */ if (c->ti_end_min > ti_current) return; - /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with - %i/%i parts and shift = [ %g %g %g ].\n" , - ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , - cj->loc[2] , - ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); - tic = getticks(); */ + const int count = c->count; + struct part *restrict parts = c->parts; /* Loop over the parts in ci. */ - for (pid = 0; pid < count; pid++) { + for (int pid = 0; pid < count; pid++) { /* Get a hold of the ith part in ci. */ - pi = &parts[pid]; - pix[0] = pi->x[0]; - pix[1] = pi->x[1]; - pix[2] = pi->x[2]; - hi = pi->h; - hig2 = hi * hi * kernel_gamma2; + struct part *restrict pi = &parts[pid]; + const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]}; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; /* Loop over the parts in cj. */ - for (pjd = pid + 1; pjd < count; pjd++) { + for (int pjd = pid + 1; pjd < count; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts[pjd]; + struct part *restrict pj = &parts[pjd]; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } @@ -301,7 +291,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { #ifdef VECTORIZE /* Pick up any leftovers. */ if (icount > 0) - for (k = 0; k < icount; k++) + for (int k = 0; k < icount; k++) IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif @@ -319,18 +309,121 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { * @param count The number of particles in @c ind. * @param cj The second #cell. */ +void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, + struct part *restrict parts_i, int *restrict ind, + int count, struct cell *restrict cj) { + + struct engine *e = r->e; + + error("Don't use in actual runs ! Slow code !"); + +#ifdef VECTORIZE + int icount = 0; + float r2q[VEC_SIZE] __attribute__((aligned(16))); + float hiq[VEC_SIZE] __attribute__((aligned(16))); + float hjq[VEC_SIZE] __attribute__((aligned(16))); + float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); + struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; +#endif + + TIMER_TIC + + const int count_j = cj->count; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the parts_i. */ + for (int pid = 0; pid < count; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[ind[pid]]; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + +#ifndef VECTORIZE + + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + +#else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3 * icount + 0] = dx[0]; + dxq[3 * icount + 1] = dx[1]; + dxq[3 * icount + 2] = dx[2]; + hiq[icount] = hi; + hjq[icount] = pj->h; + piq[icount] = pi; + pjq[icount] = pj; + icount += 1; + + /* Flush? */ + if (icount == VEC_SIZE) { + IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); + icount = 0; + } + +#endif + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ +#ifdef VECTORIZE + /* Pick up any leftovers. */ + if (icount > 0) + for (int k = 0; k < icount; k++) + IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); +#endif + + TIMER_TOC(timer_dopair_subset); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param parts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param count The number of particles in @c ind. + * @param cj The second #cell. + */ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts_i, int *restrict ind, int count, struct cell *restrict cj) { struct engine *e = r->e; - int pid, pjd, sid, k, count_j = cj->count, flipped; - double shift[3] = {0.0, 0.0, 0.0}; - struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts; - double pix[3]; - float dx[3], hi, hig2, r2, di, dxj; - struct entry *sort_j; + #ifdef VECTORIZE int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -339,10 +432,15 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; #endif + TIMER_TIC + const int count_j = cj->count; + struct part *restrict parts_j = cj->parts; + /* Get the relative distance between the pairs, wrapping. */ - for (k = 0; k < 3; k++) { + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) shift[k] = e->s->dim[k]; else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) @@ -350,52 +448,50 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, } /* Get the sorting index. */ - for (sid = 0, k = 0; k < 3; k++) + int sid = 0; + for (int k = 0; k < 3; k++) sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); /* Switch the cells around? */ - flipped = runner_flip[sid]; + const int flipped = runner_flip[sid]; sid = sortlistID[sid]; /* Have the cells been sorted? */ if (!(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells."); - /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with - %i/%i parts and shift = [ %g %g %g ].\n" , - ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , - cj->loc[2] , - ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); - tic = getticks(); */ - /* Pick-out the sorted lists. */ - sort_j = &cj->sort[sid * (cj->count + 1)]; - dxj = cj->dx_max; + const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; + const float dxj = cj->dx_max; /* Parts are on the left? */ if (!flipped) { /* Loop over the parts_i. */ - for (pid = 0; pid < count; pid++) { + for (int pid = 0; pid < count; pid++) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[ind[pid]]; - for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; - hi = pi->h; - hig2 = hi * hi * kernel_gamma2; - di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] + - pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2]; + struct part *restrict pi = &parts_i[ind[pid]]; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] + + pix[1] * runner_shift[sid][1] + + pix[2] * runner_shift[sid][2]; /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts_j[sort_j[pjd].i]; + struct part *restrict pj = &parts_j[sort_j[pjd].i]; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } @@ -439,25 +535,28 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, else { /* Loop over the parts_i. */ - for (pid = 0; pid < count; pid++) { + for (int pid = 0; pid < count; pid++) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[ind[pid]]; - for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; - hi = pi->h; - hig2 = hi * hi * kernel_gamma2; - di = -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] + - pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2]; + struct part *restrict pi = &parts_i[ind[pid]]; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; + const float di = + -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] + + pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2]; /* Loop over the parts in cj. */ - for (pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) { + for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) { /* Get a pointer to the jth particle. */ - pj = &parts_j[sort_j[pjd].i]; + struct part *restrict pj = &parts_j[sort_j[pjd].i]; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } @@ -499,119 +598,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, #ifdef VECTORIZE /* Pick up any leftovers. */ if (icount > 0) - for (k = 0; k < icount; k++) - IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); -#endif - - TIMER_TOC(timer_dopair_subset); -} - -/** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. - * - * @param r The #runner. - * @param ci The first #cell. - * @param parts_i The #part to interact with @c cj. - * @param ind The list of indices of particles in @c ci to interact with. - * @param count The number of particles in @c ind. - * @param cj The second #cell. - */ - -void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, - struct part *restrict parts_i, int *restrict ind, - int count, struct cell *restrict cj) { - - struct engine *e = r->e; - int pid, pjd, k, count_j = cj->count; - double shift[3] = {0.0, 0.0, 0.0}; - struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts; - double pix[3]; - float dx[3], hi, hig2, r2; -#ifdef VECTORIZE - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float hiq[VEC_SIZE] __attribute__((aligned(16))); - float hjq[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; -#endif - TIMER_TIC - - /* Get the relative distance between the pairs, wrapping. */ - for (k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - - /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with - %i/%i parts and shift = [ %g %g %g ].\n" , - ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , - cj->loc[2] , - ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); - tic = getticks(); */ - - /* Loop over the parts_i. */ - for (pid = 0; pid < count; pid++) { - - /* Get a hold of the ith part in ci. */ - pi = &parts_i[ind[pid]]; - for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; - hi = pi->h; - hig2 = hi * hi * kernel_gamma2; - - /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_j; pjd++) { - - /* Get a pointer to the jth particle. */ - pj = &parts_j[pjd]; - - /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } - - /* Hit or miss? */ - if (r2 < hig2) { - -#ifndef VECTORIZE - - IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); - -#else - - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - hiq[icount] = hi; - hjq[icount] = pj->h; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; - - /* Flush? */ - if (icount == VEC_SIZE) { - IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); - icount = 0; - } - -#endif - } - - } /* loop over the parts in cj. */ - - } /* loop over the parts in ci. */ - -#ifdef VECTORIZE - /* Pick up any leftovers. */ - if (icount > 0) - for (k = 0; k < icount; k++) + for (int k = 0; k < icount; k++) IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif @@ -628,15 +615,9 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, * @param ind The list of indices of particles in @c ci to interact with. * @param count The number of particles in @c ind. */ - void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, struct part *restrict parts, int *restrict ind, int count) { - int pid, pjd, k, count_i = ci->count; - struct part *restrict parts_j = ci->parts; - struct part *restrict pi, *restrict pj; - double pix[3] = {0.0, 0.0, 0.0}; - float dx[3], hi, hig2, r2; #ifdef VECTORIZE int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -645,35 +626,31 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; #endif + TIMER_TIC - /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with - %i/%i parts and shift = [ %g %g %g ].\n" , - ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , - cj->loc[2] , - ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout); - tic = getticks(); */ + const int count_i = ci->count; + struct part *restrict parts_j = ci->parts; /* Loop over the parts in ci. */ - for (pid = 0; pid < count; pid++) { + for (int pid = 0; pid < count; pid++) { /* Get a hold of the ith part in ci. */ - pi = &parts[ind[pid]]; - pix[0] = pi->x[0]; - pix[1] = pi->x[1]; - pix[2] = pi->x[2]; - hi = pi->h; - hig2 = hi * hi * kernel_gamma2; + struct part *restrict pi = &parts[ind[pid]]; + const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]}; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_i; pjd++) { + for (int pjd = 0; pjd < count_i; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts_j[pjd]; + struct part *restrict pj = &parts_j[pjd]; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } @@ -714,7 +691,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, #ifdef VECTORIZE /* Pick up any leftovers. */ if (icount > 0) - for (k = 0; k < icount; k++) + for (int k = 0; k < icount; k++) IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif @@ -722,26 +699,17 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, } /** - * @brief Compute the interactions between a cell pair. + * @brief Compute the interactions between a cell pair (non-symmetric). * * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. */ - void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; - int pid, pjd, k, sid; - double rshift, shift[3] = {0.0, 0.0, 0.0}; - struct entry *restrict sort_i, *restrict sort_j; - struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j; - double pix[3], pjx[3], di, dj; - float dx[3], hi, hig2, hj, hjg2, r2, dx_max; - double hi_max, hj_max; - double di_max, dj_min; - int count_i, count_j; const int ti_current = e->ti_current; + #ifdef VECTORIZE int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -750,60 +718,64 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; #endif + TIMER_TIC /* Anything to do here? */ if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; /* Get the sort ID. */ - sid = space_getsid(e->s, &ci, &cj, shift); + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells."); /* Get the cutoff shift. */ - for (rshift = 0.0, k = 0; k < 3; k++) - rshift += shift[k] * runner_shift[sid][k]; + double rshift = 0.0; + for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - sort_i = &ci->sort[sid * (ci->count + 1)]; - sort_j = &cj->sort[sid * (cj->count + 1)]; + const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)]; + const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; /* Get some other useful values. */ - hi_max = ci->h_max * kernel_gamma - rshift; - hj_max = cj->h_max * kernel_gamma; - count_i = ci->count; - count_j = cj->count; - parts_i = ci->parts; - parts_j = cj->parts; - di_max = sort_i[count_i - 1].d - rshift; - dj_min = sort_j[0].d; - dx_max = (ci->dx_max + cj->dx_max); + const double hi_max = ci->h_max * kernel_gamma - rshift; + const double hj_max = cj->h_max * kernel_gamma; + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + const double di_max = sort_i[count_i - 1].d - rshift; + const double dj_min = sort_j[0].d; + const float dx_max = (ci->dx_max + cj->dx_max); /* Loop over the parts in ci. */ - for (pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; - pid--) { + for (int pid = count_i - 1; + pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[sort_i[pid].i]; + struct part *restrict pi = &parts_i[sort_i[pid].i]; if (pi->ti_end > ti_current) continue; - hi = pi->h; - di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; + const float hi = pi->h; + const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; if (di < dj_min) continue; - hig2 = hi * hi * kernel_gamma2; - for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts_j[sort_j[pjd].i]; + struct part *restrict pj = &parts_j[sort_j[pjd].i]; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } @@ -841,33 +813,31 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ - /* printf( "runner_dopair: first half took %.3f %s...\n" , - clocks_from_ticks(getticks() - tic), clocks_getunit()); - tic = getticks(); */ - /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; + for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; pjd++) { /* Get a hold of the jth part in cj. */ - pj = &parts_j[sort_j[pjd].i]; + struct part *restrict pj = &parts_j[sort_j[pjd].i]; if (pj->ti_end > ti_current) continue; - hj = pj->h; - dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift; + const float hj = pj->h; + const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift; if (dj > di_max) continue; - for (k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; - hjg2 = hj * hj * kernel_gamma2; + double pjx[3]; + for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + const float hjg2 = hj * hj * kernel_gamma2; /* Loop over the parts in ci. */ - for (pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { + for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { /* Get a pointer to the jth particle. */ - pi = &parts_i[sort_i[pid].i]; + struct part *restrict pi = &parts_i[sort_i[pid].i]; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pjx[k] - pi->x[k]; r2 += dx[k] * dx[k]; } @@ -908,28 +878,25 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { #ifdef VECTORIZE /* Pick up any leftovers. */ if (icount > 0) - for (k = 0; k < icount; k++) + for (int k = 0; k < icount; k++) IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif TIMER_TOC(TIMER_DOPAIR); } +/** + * @brief Compute the interactions between a cell pair (symmetric) + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; - int pid, pjd, k, sid; - double rshift, shift[3] = {0.0, 0.0, 0.0}; - struct entry *sort_i, *sort_j; - struct entry *sortdt_i = NULL, *sortdt_j = NULL; - int countdt_i = 0, countdt_j = 0; - struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j; - double pix[3], pjx[3], di, dj; - float dx[3], hi, hig2, hj, hjg2, r2, dx_max; - double hi_max, hj_max; - double di_max, dj_min; - int count_i, count_j; const int ti_current = e->ti_current; + #ifdef VECTORIZE int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); @@ -944,38 +911,42 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { float dxq2[3 * VEC_SIZE] __attribute__((aligned(16))); struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE]; #endif + TIMER_TIC /* Anything to do here? */ if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; /* Get the shift ID. */ - sid = space_getsid(e->s, &ci, &cj, shift); + double shift[3] = {0.0, 0.0, 0.0}; + const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells."); /* Get the cutoff shift. */ - for (rshift = 0.0, k = 0; k < 3; k++) - rshift += shift[k] * runner_shift[sid][k]; + double rshift = 0.0; + for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - sort_i = &ci->sort[sid * (ci->count + 1)]; - sort_j = &cj->sort[sid * (cj->count + 1)]; + struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)]; + struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; /* Get some other useful values. */ - hi_max = ci->h_max * kernel_gamma - rshift; - hj_max = cj->h_max * kernel_gamma; - count_i = ci->count; - count_j = cj->count; - parts_i = ci->parts; - parts_j = cj->parts; - di_max = sort_i[count_i - 1].d - rshift; - dj_min = sort_j[0].d; - dx_max = (ci->dx_max + cj->dx_max); + const double hi_max = ci->h_max * kernel_gamma - rshift; + const double hj_max = cj->h_max * kernel_gamma; + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + const double di_max = sort_i[count_i - 1].d - rshift; + const double dj_min = sort_j[0].d; + const double dx_max = (ci->dx_max + cj->dx_max); /* Collect the number of parts left and right below dt. */ + int countdt_i = 0, countdt_j = 0; + struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL; if (ci->ti_end_max <= ti_current) { sortdt_i = sort_i; countdt_i = count_i; @@ -983,7 +954,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { if ((sortdt_i = (struct entry *)alloca(sizeof(struct entry) * count_i)) == NULL) error("Failed to allocate dt sortlists."); - for (k = 0; k < count_i; k++) + for (int k = 0; k < count_i; k++) if (parts_i[sort_i[k].i].ti_end <= ti_current) { sortdt_i[countdt_i] = sort_i[k]; countdt_i += 1; @@ -996,7 +967,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { if ((sortdt_j = (struct entry *)alloca(sizeof(struct entry) * count_j)) == NULL) error("Failed to allocate dt sortlists."); - for (k = 0; k < count_j; k++) + for (int k = 0; k < count_j; k++) if (parts_j[sort_j[k].i].ti_end <= ti_current) { sortdt_j[countdt_j] = sort_j[k]; countdt_j += 1; @@ -1004,31 +975,33 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { } /* Loop over the parts in ci. */ - for (pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; - pid--) { + for (int pid = count_i - 1; + pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[sort_i[pid].i]; - hi = pi->h; - di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; + struct part *restrict pi = &parts_i[sort_i[pid].i]; + const float hi = pi->h; + const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; if (di < dj_min) continue; - hig2 = hi * hi * kernel_gamma2; - for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; /* Look at valid dt parts only? */ if (pi->ti_end > ti_current) { /* Loop over the parts in cj within dt. */ - for (pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) { + for (int pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts_j[sortdt_j[pjd].i]; - hj = pj->h; + struct part *restrict pj = &parts_j[sortdt_j[pjd].i]; + const float hj = pj->h; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pj->x[k] - pix[k]; r2 += dx[k] * dx[k]; } @@ -1070,15 +1043,16 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { else { /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts_j[sort_j[pjd].i]; - hj = pj->h; + struct part *restrict pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } @@ -1144,36 +1118,34 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { } /* loop over the parts in ci. */ - /* printf( "runner_dopair: first half took %.3f %s...\n" , - clocks_from_ticks(getticks() - tic), clocks_getunit()); - tic = getticks(); */ - /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; + for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; pjd++) { /* Get a hold of the jth part in cj. */ - pj = &parts_j[sort_j[pjd].i]; - hj = pj->h; - dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift; + struct part *restrict pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; + const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift; if (dj > di_max) continue; - for (k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; - hjg2 = hj * hj * kernel_gamma2; + double pjx[3]; + for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + const float hjg2 = hj * hj * kernel_gamma2; /* Is this particle outside the dt? */ if (pj->ti_end > ti_current) { /* Loop over the parts in ci. */ - for (pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) { + for (int pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) { /* Get a pointer to the jth particle. */ - pi = &parts_i[sortdt_i[pid].i]; - hi = pi->h; + struct part *restrict pi = &parts_i[sortdt_i[pid].i]; + const float hi = pi->h; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pi->x[k] - pjx[k]; r2 += dx[k] * dx[k]; } @@ -1214,15 +1186,16 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { else { /* Loop over the parts in ci. */ - for (pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { + for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) { /* Get a pointer to the jth particle. */ - pi = &parts_i[sort_i[pid].i]; - hi = pi->h; + struct part *restrict pi = &parts_i[sort_i[pid].i]; + const float hi = pi->h; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pjx[k] - pi->x[k]; r2 += dx[k] * dx[k]; } @@ -1291,10 +1264,10 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { #ifdef VECTORIZE /* Pick up any leftovers. */ if (icount1 > 0) - for (k = 0; k < icount1; k++) + for (int k = 0; k < icount1; k++) IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]); if (icount2 > 0) - for (k = 0; k < icount2; k++) + for (int k = 0; k < icount2; k++) IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); #endif @@ -1302,20 +1275,15 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { } /** - * @brief Compute the cell self-interaction. + * @brief Compute the cell self-interaction (non-symmetric). * * @param r The #runner. * @param c The #cell. */ - void DOSELF1(struct runner *r, struct cell *restrict c) { - int k, pid, pjd, count = c->count; - double pix[3]; - float dx[3], hi, hj, hig2, r2; - struct part *restrict parts = c->parts, *restrict pi, *restrict pj; const int ti_current = r->e->ti_current; - int firstdt = 0, countdt = 0, *indt = NULL, doj; + #ifdef VECTORIZE int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); @@ -1332,43 +1300,49 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #endif TIMER_TIC - /* Set up indt if needed. */ - if (c->ti_end_min > ti_current) - return; - else if (c->ti_end_max > ti_current) { - if ((indt = (int *)alloca(sizeof(int) * count)) == NULL) - error("Failed to allocate indt."); - for (k = 0; k < count; k++) - if (parts[k].ti_end <= ti_current) { - indt[countdt] = k; - countdt += 1; - } - } + if (c->ti_end_min > ti_current) return; + if (c->ti_end_max < ti_current) error("Cell in an impossible time-zone"); + + struct part *restrict parts = c->parts; + const int count = c->count; + + /* Set up indt. */ + int *indt = NULL; + int countdt = 0, firstdt = 0; + if ((indt = (int *)alloca(sizeof(int) * count)) == NULL) + error("Failed to allocate indt."); + for (int k = 0; k < count; k++) + if (parts[k].ti_end <= ti_current) { + indt[countdt] = k; + countdt += 1; + } /* Loop over the particles in the cell. */ - for (pid = 0; pid < count; pid++) { + for (int pid = 0; pid < count; pid++) { /* Get a pointer to the ith particle. */ - pi = &parts[pid]; + struct part *restrict pi = &parts[pid]; /* Get the particle position and radius. */ - for (k = 0; k < 3; k++) pix[k] = pi->x[k]; - hi = pi->h; - hig2 = hi * hi * kernel_gamma2; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; /* Is the ith particle inactive? */ if (pi->ti_end > ti_current) { /* Loop over the other particles .*/ - for (pjd = firstdt; pjd < countdt; pjd++) { + for (int pjd = firstdt; pjd < countdt; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts[indt[pjd]]; - hj = pj->h; + struct part *restrict pj = &parts[indt[pjd]]; + const float hj = pj->h; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pj->x[k] - pix[k]; r2 += dx[k] * dx[k]; } @@ -1413,19 +1387,21 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { firstdt += 1; /* Loop over the other particles .*/ - for (pjd = pid + 1; pjd < count; pjd++) { + for (int pjd = pid + 1; pjd < count; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts[pjd]; - hj = pj->h; + struct part *restrict pj = &parts[pjd]; + const float hj = pj->h; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } - doj = (pj->ti_end <= ti_current) && (r2 < hj * hj * kernel_gamma2); + const int doj = + (pj->ti_end <= ti_current) && (r2 < hj * hj * kernel_gamma2); /* Hit or miss? */ if (r2 < hig2 || doj) { @@ -1516,24 +1492,26 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #ifdef VECTORIZE /* Pick up any leftovers. */ if (icount1 > 0) - for (k = 0; k < icount1; k++) + for (int k = 0; k < icount1; k++) IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]); if (icount2 > 0) - for (k = 0; k < icount2; k++) + for (int k = 0; k < icount2; k++) IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); #endif TIMER_TOC(TIMER_DOSELF); } +/** + * @brief Compute the cell self-interaction (symmetric). + * + * @param r The #runner. + * @param c The #cell. + */ void DOSELF2(struct runner *r, struct cell *restrict c) { - int k, pid, pjd, count = c->count; - double pix[3]; - float dx[3], hi, hj, hig2, r2; - struct part *restrict parts = c->parts, *restrict pi, *restrict pj; const int ti_current = r->e->ti_current; - int firstdt = 0, countdt = 0, *indt = NULL; + #ifdef VECTORIZE int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); @@ -1550,43 +1528,49 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { #endif TIMER_TIC - /* Set up indt if needed. */ - if (c->ti_end_min > ti_current) - return; - else if (c->ti_end_max > ti_current) { - if ((indt = (int *)alloca(sizeof(int) * count)) == NULL) - error("Failed to allocate indt."); - for (k = 0; k < count; k++) - if (parts[k].ti_end <= ti_current) { - indt[countdt] = k; - countdt += 1; - } - } + if (c->ti_end_min > ti_current) return; + if (c->ti_end_max < ti_current) error("Cell in an impossible time-zone"); + + struct part *restrict parts = c->parts; + const int count = c->count; + + /* Set up indt. */ + int *indt = NULL; + int countdt = 0, firstdt = 0; + if ((indt = (int *)alloca(sizeof(int) * count)) == NULL) + error("Failed to allocate indt."); + for (int k = 0; k < count; k++) + if (parts[k].ti_end <= ti_current) { + indt[countdt] = k; + countdt += 1; + } /* Loop over the particles in the cell. */ - for (pid = 0; pid < count; pid++) { + for (int pid = 0; pid < count; pid++) { /* Get a pointer to the ith particle. */ - pi = &parts[pid]; + struct part *restrict pi = &parts[pid]; /* Get the particle position and radius. */ - for (k = 0; k < 3; k++) pix[k] = pi->x[k]; - hi = pi->h; - hig2 = hi * hi * kernel_gamma2; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; /* Is the ith particle not active? */ if (pi->ti_end > ti_current) { /* Loop over the other particles .*/ - for (pjd = firstdt; pjd < countdt; pjd++) { + for (int pjd = firstdt; pjd < countdt; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts[indt[pjd]]; - hj = pj->h; + struct part *restrict pj = &parts[indt[pjd]]; + const float hj = pj->h; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pj->x[k] - pix[k]; r2 += dx[k] * dx[k]; } @@ -1631,15 +1615,16 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { firstdt += 1; /* Loop over the other particles .*/ - for (pjd = pid + 1; pjd < count; pjd++) { + for (int pjd = pid + 1; pjd < count; pjd++) { /* Get a pointer to the jth particle. */ - pj = &parts[pjd]; - hj = pj->h; + struct part *restrict pj = &parts[pjd]; + const float hj = pj->h; /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = pix[k] - pj->x[k]; r2 += dx[k] * dx[k]; } @@ -1708,10 +1693,10 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { #ifdef VECTORIZE /* Pick up any leftovers. */ if (icount1 > 0) - for (k = 0; k < icount1; k++) + for (int k = 0; k < icount1; k++) IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]); if (icount2 > 0) - for (k = 0; k < icount2; k++) + for (int k = 0; k < icount2; k++) IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]); #endif @@ -2290,17 +2275,14 @@ void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) { void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, int *ind, int count, struct cell *cj, int sid, int gettimer) { - int j, k; - double shift[3]; - float h; struct space *s = r->e->s; - struct cell *sub = NULL; const int ti_current = r->e->ti_current; TIMER_TIC /* Find out in which sub-cell of ci the parts are. */ - for (k = 0; k < 8; k++) + struct cell *sub = NULL; + for (int k = 0; k < 8; k++) if (ci->progeny[k] != NULL) { // if ( parts[ ind[ 0 ] ].x[0] >= ci->progeny[k]->loc[0] && // parts[ ind[ 0 ] ].x[0] <= ci->progeny[k]->loc[0] + @@ -2326,7 +2308,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, /* Loop over all progeny. */ DOSUB_SUBSET(r, sub, parts, ind, count, NULL, -1, 0); - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != sub && ci->progeny[j] != NULL) DOSUB_SUBSET(r, sub, parts, ind, count, ci->progeny[j], -1, 0); @@ -2342,7 +2324,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, else { /* Get the cell dimensions. */ - h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2])); + const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2])); /* Recurse? */ if (ci->split && cj->split && @@ -2350,6 +2332,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, h / 2) { /* Get the type of pair if not specified explicitly. */ + double shift[3] = {0.0, 0.0, 0.0}; sid = space_getsid(s, &ci, &cj, shift); /* Different types of flags. */ @@ -2858,7 +2841,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) { /* Get the relative distance between the pairs, wrapping. */ - for (k = 0; k < 3; k++) { + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { if (cj->loc[k] - ci->loc[k] < -s->dim[k] / 2) shift[k] = s->dim[k]; else if (cj->loc[k] - ci->loc[k] > s->dim[k] / 2) @@ -2866,7 +2850,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, } /* Get the sorting index. */ - for (sid = 0, k = 0; k < 3; k++) + int sid = 0; + for (int k = 0; k < 3; k++) sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 13e6fe68ca223fa95a307fd063208e1f5d5efa6c..e3788dfa1123584c913bca6baa6fc2db6698e6d0 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -19,11 +19,6 @@ #ifndef SWIFT_RUNNER_DOIACT_GRAV_H #define SWIFT_RUNNER_DOIACT_GRAV_H -/* Includes. */ -#include "cell.h" -#include "clocks.h" -#include "part.h" - /** * @brief Compute the sorted gravity interactions between a cell pair. * diff --git a/src/scheduler.c b/src/scheduler.c index 496df93adf5a656460f7b39904a3cc58ac3e5caa..8f833dad5085c675f129cb1061fa354f236ec9bc 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -44,6 +44,9 @@ #include "error.h" #include "intrinsics.h" #include "kernel_hydro.h" +#include "queue.h" +#include "space.h" +#include "task.h" #include "timers.h" /** diff --git a/src/scheduler.h b/src/scheduler.h index a867f7bc36cc8865d28d751565663641d92aa7fb..62af23152e7e2d2bc68e0cc4d7122f4dd0483aa1 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -35,7 +35,6 @@ #include "cell.h" #include "lock.h" #include "queue.h" -#include "space.h" #include "task.h" /* Some constants. */ diff --git a/src/serial_io.c b/src/serial_io.c index 5abd3ebc28672d68c4135efe5753dc4713c2d3c6..7e78276dc83430655b4ea4de2fb7425e71e07966 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -26,22 +26,22 @@ /* Some standard headers. */ #include <hdf5.h> #include <math.h> +#include <mpi.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -/* MPI headers. */ -#ifdef WITH_MPI -#include <mpi.h> -#endif - /* This object's header. */ #include "serial_io.h" /* Local includes. */ #include "common_io.h" +#include "engine.h" #include "error.h" +#include "kernel_hydro.h" +#include "part.h" +#include "units.h" /*----------------------------------------------------------------------------- * Routines reading an IC file diff --git a/src/serial_io.h b/src/serial_io.h index b7ed6eb62d823829a473f828696c291e552effa3..6b64624772214494a43316d3a8aa3910c7238dc8 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -19,6 +19,9 @@ #ifndef SWIFT_SERIAL_IO_H #define SWIFT_SERIAL_IO_H +/* Config parameters. */ +#include "../config.h" + /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> diff --git a/src/single_io.c b/src/single_io.c index fb3bf4368feed9892b098228d85c99f0bc3e724b..3f65aae0b5d495670f2b4862e466ec849f997d63 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -36,8 +36,11 @@ /* Local includes. */ #include "common_io.h" -#include "const.h" +#include "engine.h" #include "error.h" +#include "kernel_hydro.h" +#include "part.h" +#include "units.h" /*----------------------------------------------------------------------------- * Routines reading an IC file diff --git a/src/single_io.h b/src/single_io.h index adfc5b43941b2c0d69d9ce0924164aff56864a23..d2c87655e1c91b92e8ccf2aa50d2e0bf98f13482 100644 --- a/src/single_io.h +++ b/src/single_io.h @@ -19,6 +19,9 @@ #ifndef SWIFT_SINGLE_IO_H #define SWIFT_SINGLE_IO_H +/* Config parameters. */ +#include "../config.h" + /* Includes. */ #include "engine.h" #include "part.h" diff --git a/src/space.h b/src/space.h index d53c0f2a5784ef25654309741b4455e3dbcc3e0c..5cfb2cb8368ed60586aad62e0f753aab17bf55d1 100644 --- a/src/space.h +++ b/src/space.h @@ -23,16 +23,18 @@ #ifndef SWIFT_SPACE_H #define SWIFT_SPACE_H -/* Includes. */ +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ #include <stddef.h> -/* Local includes. */ +/* Includes. */ #include "cell.h" +#include "lock.h" #include "parser.h" #include "part.h" - -/* Forward-declare the engine to avoid cyclic includes. */ -struct engine; +#include "space.h" /* Some constants. */ #define space_maxdepth 10 @@ -158,4 +160,5 @@ void space_do_split(struct space *s, struct cell *c); void space_do_parts_sort(); void space_do_gparts_sort(); void space_link_cleanup(struct space *s); + #endif /* SWIFT_SPACE_H */ diff --git a/src/tools.c b/src/tools.c index 0363100331ad5b298279e9ebadcf87eab5f9e896..1a2b794f688047183827e5c2ed6ba80ba1339080 100644 --- a/src/tools.c +++ b/src/tools.c @@ -19,16 +19,25 @@ * ******************************************************************************/ +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ #include <math.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> +/* This object's header. */ +#include "tools.h" + +/* Local includes. */ #include "cell.h" #include "error.h" +#include "gravity.h" +#include "hydro.h" #include "part.h" -#include "swift.h" -#include "tools.h" +#include "runner.h" /** * Factorize a given integer, attempts to keep larger pair of factors. @@ -56,9 +65,7 @@ void factor(int value, int *f1, int *f2) { * @param N The number of parts. * @param periodic Periodic boundary conditions flag. */ - -void pairs_n2(double *dim, struct part *__restrict__ parts, int N, - int periodic) { +void pairs_n2(double *dim, struct part *restrict parts, int N, int periodic) { int i, j, k, count = 0; // int mj, mk; // double maxratio = 1.0; @@ -121,8 +128,7 @@ void pairs_n2(double *dim, struct part *__restrict__ parts, int N, } void pairs_single_density(double *dim, long long int pid, - struct part *__restrict__ parts, int N, - int periodic) { + struct part *restrict parts, int N, int periodic) { int i, k; // int mj, mk; // double maxratio = 1.0; @@ -271,7 +277,7 @@ void self_all_density(struct runner *r, struct cell *ci) { } void pairs_single_grav(double *dim, long long int pid, - struct gpart *__restrict__ parts, int N, int periodic) { + struct gpart *restrict parts, int N, int periodic) { int i, k; // int mj, mk; // double maxratio = 1.0; @@ -328,7 +334,6 @@ void pairs_single_grav(double *dim, long long int pid, * * @param N number of intervals in [0,1]. */ - void density_dump(int N) { int k; float r2[4] = {0.0f, 0.0f, 0.0f, 0.0f}, hi[4], hj[4]; @@ -363,10 +368,8 @@ void density_dump(int N) { /** * @brief Compute the force on a single particle brute-force. */ - void engine_single_density(double *dim, long long int pid, - struct part *__restrict__ parts, int N, - int periodic) { + struct part *restrict parts, int N, int periodic) { int i, k; double r2, dx[3]; float fdx[3], ih; @@ -412,7 +415,7 @@ void engine_single_density(double *dim, long long int pid, } void engine_single_force(double *dim, long long int pid, - struct part *__restrict__ parts, int N, int periodic) { + struct part *restrict parts, int N, int periodic) { int i, k; double r2, dx[3]; float fdx[3]; diff --git a/src/tools.h b/src/tools.h index 5f9f41d033ab03983bde3bb37e87f8a39d2deecd..2a4462f274d8e9acd1f2d8e79996ad92c630d404 100644 --- a/src/tools.h +++ b/src/tools.h @@ -22,21 +22,25 @@ #ifndef SWIFT_TOOL_H #define SWIFT_TOOL_H +/* Config parameters. */ +#include "../config.h" + +/* Includes. */ #include "cell.h" +#include "part.h" #include "runner.h" void factor(int value, int *f1, int *f2); void density_dump(int N); void pairs_single_grav(double *dim, long long int pid, - struct gpart *__restrict__ parts, int N, int periodic); + struct gpart *restrict parts, int N, int periodic); void pairs_single_density(double *dim, long long int pid, - struct part *__restrict__ parts, int N, int periodic); + struct part *restrict parts, int N, int periodic); void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj); void self_all_density(struct runner *r, struct cell *ci); -void pairs_n2(double *dim, struct part *__restrict__ parts, int N, - int periodic); +void pairs_n2(double *dim, struct part *restrict parts, int N, int periodic); double random_uniform(double a, double b); void shuffle_particles(struct part *parts, const int count);