diff --git a/src/engine.c b/src/engine.c index 317b64fc6d28a91dbb76568f8524b3babbd582bd..93481aca3d25fd9755b7c7f69ef25ddb4d9d9d06 100644 --- a/src/engine.c +++ b/src/engine.c @@ -4505,8 +4505,8 @@ void engine_init(struct engine *e, struct space *s, /* Init the scheduler with enough tasks for the initial sorting tasks. */ const int nr_tasks = 2 * s->tot_cells + 2 * e->nr_threads; - scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, (policy & scheduler_flag_steal), - e->nodeID, &e->threadpool); + scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, + (policy & scheduler_flag_steal), e->nodeID, &e->threadpool); /* Allocate and init the threads. */ if ((e->runners = (struct runner *)malloc(sizeof(struct runner) * diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 7de0d0220c749aa920bcbcd6605f8b34827e89af..9e06fec92f9667d21ce7dc196ba26f9870cb24f4 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -438,7 +438,8 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz, /* Mask updates to intermediate vector sums for particle pi. */ rhoSum->v = vec_mask_add(rhoSum->v, vec_mul(mj.v, wi.v), mask); - rho_dhSum->v = vec_mask_sub(rho_dhSum->v, vec_mul(mj.v, wcount_dh_update.v), mask); + rho_dhSum->v = + vec_mask_sub(rho_dhSum->v, vec_mul(mj.v, wcount_dh_update.v), mask); wcountSum->v = vec_mask_add(wcountSum->v, wi.v, mask); wcount_dhSum->v = vec_mask_sub(wcount_dhSum->v, wcount_dh_update.v, mask); div_vSum->v = diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index eb6cdf0270b8944d6274890d7ed1949d304aa831..5355c15f8f1a0d0c3d811c7039da04caf0522cc9 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -475,7 +475,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec( mask_t mask_reg; /* Form a mask for one part of the kernel. */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ vec_create_mask(mask_reg, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together @@ -499,7 +500,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec( w2.v = vec_fma(x.v, w2.v, cubic_2_const_c3.v); /* Blend both kernel regions into one vector (mask out unneeded values). */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ w->v = vec_blend(mask_reg, w->v, w2.v); dw_dx->v = vec_blend(mask_reg, dw_dx->v, dw_dx2.v); @@ -582,7 +584,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( mask_t mask_reg, mask_reg_v2; /* Form a mask for one part of the kernel for each vector. */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ vec_create_mask(mask_reg, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ vec_create_mask(mask_reg_v2, vec_cmp_gte(x2.v, cond.v)); /* 0.5 < x < 1 */ @@ -619,7 +622,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( w2_2.v = vec_fma(x2.v, w2_2.v, cubic_2_const_c3.v); /* Blend both kernel regions into one vector (mask out unneeded values). */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ w->v = vec_blend(mask_reg, w->v, w_2.v); w2->v = vec_blend(mask_reg_v2, w2->v, w2_2.v); dw_dx->v = vec_blend(mask_reg, dw_dx->v, dw_dx_2.v); @@ -668,7 +672,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u, mask_t mask_reg; /* Form a mask for each part of the kernel. */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ vec_create_mask(mask_reg, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together @@ -686,7 +691,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u, w2.v = vec_fma(x.v, w2.v, cubic_2_const_c3.v); /* Mask out unneeded values. */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ w->v = vec_blend(mask_reg, w->v, w2.v); #else @@ -730,7 +736,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec( mask_t mask_reg; /* Form a mask for each part of the kernel. */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ vec_create_mask(mask_reg, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ /* Work out w for both regions of the kernel and combine the results together @@ -745,7 +752,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec( dw_dx2.v = vec_fma(dw_dx2.v, x.v, cubic_2_dwdx_const_c2.v); /* Mask out unneeded values. */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ dw_dx->v = vec_blend(mask_reg, dw_dx->v, dw_dx2.v); #else @@ -771,8 +779,10 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec( * * @param u The ratio of the distance to the smoothing length $u = x/h$. * @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$. - * @param u_2 The ratio of the distance to the smoothing length $u = x/h$ for second particle. - * @param dw_dx_2 (return) The norm of the gradient of $|\\nabla W(x,h)|$ for second particle. + * @param u_2 The ratio of the distance to the smoothing length $u = x/h$ for + * second particle. + * @param dw_dx_2 (return) The norm of the gradient of $|\\nabla W(x,h)|$ for + * second particle. */ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec( vector *u, vector *dw_dx, vector *u_2, vector *dw_dx_2) { @@ -804,7 +814,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec( mask_t mask_reg_v2; /* Form a mask for one part of the kernel. */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ vec_create_mask(mask_reg, vec_cmp_gte(x.v, cond.v)); /* 0.5 < x < 1 */ vec_create_mask(mask_reg_v2, vec_cmp_gte(x_2.v, cond.v)); /* 0.5 < x < 1 */ @@ -824,7 +835,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec( dw_dx2_2.v = vec_fma(dw_dx2_2.v, x_2.v, cubic_2_dwdx_const_c2.v); /* Mask out unneeded values. */ - /* Only need the mask for one region as the vec_blend defaults to the vector when the mask is 0.*/ + /* Only need the mask for one region as the vec_blend defaults to the vector + * when the mask is 0.*/ dw_dx->v = vec_blend(mask_reg, dw_dx->v, dw_dx2.v); dw_dx_2->v = vec_blend(mask_reg_v2, dw_dx_2->v, dw_dx2_2.v); diff --git a/src/parser.c b/src/parser.c index 1981bcb111b62cff140a8a3942968b71a4656226..0b608b29263342240af68fd99d2fdd3241e2a1e6 100644 --- a/src/parser.c +++ b/src/parser.c @@ -133,8 +133,8 @@ void parser_set_param(struct swift_params *params, const char *namevalue) { int updated = 0; for (int i = 0; i < params->paramCount; i++) { if (strcmp(name, params->data[i].name) == 0) { - message("Value of '%s' changed from '%s' to '%s'", - params->data[i].name, params->data[i].value, value); + message("Value of '%s' changed from '%s' to '%s'", params->data[i].name, + params->data[i].value, value); strcpy(params->data[i].value, value); updated = 1; } diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 7a02bbe69026efb46bec3a7d324b978bd10b3b6a..1557c1ec84863c3518f8671cdf0e9ca44732e94e 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -224,7 +224,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions( } /** - * @brief Populates the arrays max_index_i and max_index_j with the maximum indices of + * @brief Populates the arrays max_index_i and max_index_j with the maximum + * indices of * particles into their neighbouring cells. Also finds the first pi that * interacts with any particle in cj and the last pj that interacts with any * particle in ci. @@ -239,9 +240,11 @@ __attribute__((always_inline)) INLINE static void storeInteractions( * @param hj_max Maximal smoothing length in cell cj * @param di_max Maximal position on the axis that can interact in cell ci * @param dj_min Minimal position on the axis that can interact in cell ci - * @param max_index_i array to hold the maximum distances of pi particles into cell + * @param max_index_i array to hold the maximum distances of pi particles into + * cell * cj - * @param max_index_j array to hold the maximum distances of pj particles into cell + * @param max_index_j array to hold the maximum distances of pj particles into + * cell * cj * @param init_pi first pi to interact with a pj particle * @param init_pj last pj to interact with a pi particle @@ -265,7 +268,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( * particle in cell j. */ first_pi = ci->count; int active_id = first_pi - 1; - while(first_pi > 0 && sort_i[first_pi - 1].d + dx_max + hi_max > dj_min) { + while (first_pi > 0 && sort_i[first_pi - 1].d + dx_max + hi_max > dj_min) { first_pi--; /* Store the index of the particle if it is active. */ if (part_is_active(&parts_i[sort_i[first_pi].i], e)) active_id = first_pi; @@ -322,7 +325,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( last_pj = active_id; /* Find the maximum index into cell i for each particle in range in cell j. */ - if(last_pj > 0) { + if (last_pj > 0) { /* Start from the last particle in cell i. */ temp = ci->count - 1; @@ -341,7 +344,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( for (int i = last_pj - 1; i >= 0; i--) { temp = max_index_j[i + 1]; pj = &parts_j[sort_j[i].i]; - + while (temp > 0 && sort_j[i].d - dx_max - (pj->h * kernel_gamma) < sort_i[temp].d - rshift) @@ -696,12 +699,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, max_index_i = r->ci_cache.max_index; max_index_j = r->cj_cache.max_index; - /* Find particles maximum index into cj, max_index_i[] and ci, max_index_j[]. */ + /* Find particles maximum index into cj, max_index_i[] and ci, max_index_j[]. + */ /* Also find the first pi that interacts with any particle in cj and the last * pj that interacts with any particle in ci. */ populate_max_index_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, hi_max, - hj_max, di_max, dj_min, max_index_i, max_index_j, - &first_pi, &last_pj, e); + hj_max, di_max, dj_min, max_index_i, max_index_j, + &first_pi, &last_pj, e); /* Limits of the outer loops. */ int first_pi_loop = first_pi; diff --git a/src/vector.h b/src/vector.h index fc9d96e73693e10ee2271023ae44f092dc93c3bf..6a7c6837989025785c1f9134004f2ebcc226a205 100644 --- a/src/vector.h +++ b/src/vector.h @@ -83,15 +83,15 @@ #define vec_cmp_lt(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ) #define vec_cmp_lte(a, b) _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ) #define vec_cmp_gte(a, b) _mm512_cmp_ps_mask(a, b, _CMP_GE_OQ) -#define vec_cmp_result(a) ({a;}) -#define vec_form_int_mask(a) ({a;}) +#define vec_cmp_result(a) ({ a; }) +#define vec_form_int_mask(a) ({ a; }) #define vec_and(a, b) _mm512_and_ps(a, b) #define vec_mask_and(a, b) _mm512_kand(a, b) #define vec_and_mask(a, mask) _mm512_maskz_mov_ps(mask, a) -#define vec_init_mask_true(mask) ({mask = 0xFFFF;}) -#define vec_zero_mask(mask) ({mask = 0;}) -#define vec_create_mask(mask, cond) ({mask = cond;}) -#define vec_pad_mask(mask, pad) ({mask = mask >> (pad);}) +#define vec_init_mask_true(mask) ({ mask = 0xFFFF; }) +#define vec_zero_mask(mask) ({ mask = 0; }) +#define vec_create_mask(mask, cond) ({ mask = cond; }) +#define vec_pad_mask(mask, pad) ({ mask = mask >> (pad); }) #define vec_blend(mask, a, b) _mm512_mask_blend_ps(mask, a, b) #define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 0)) #define vec_todbl_hi(a) _mm512_cvtps_pd(_mm512_extract128_ps(a, 1)) diff --git a/tests/testActivePair.c b/tests/testActivePair.c index 3d4320d595019abfb6f9873972f8cd8d2677e4b8..1e0111b4f0e480d0f66463b4c2264cdd89bd28c8 100644 --- a/tests/testActivePair.c +++ b/tests/testActivePair.c @@ -43,7 +43,8 @@ * @param pert The perturbation to apply to the particles in the cell in units * of the inter-particle separation. * @param h_pert The perturbation to apply to the smoothing length. - * @param fraction_active The fraction of particles that should be active in the cell. + * @param fraction_active The fraction of particles that should be active in the + * cell. */ struct cell *make_cell(size_t n, double *offset, double size, double h, double density, long long *partId, double pert, @@ -460,8 +461,8 @@ int main(int argc, char *argv[]) { engine.ti_current = 8; engine.max_active_bin = num_time_bins; - if (posix_memalign((void **)&runner, SWIFT_STRUCT_ALIGNMENT, sizeof(struct runner)) != - 0) { + if (posix_memalign((void **)&runner, SWIFT_STRUCT_ALIGNMENT, + sizeof(struct runner)) != 0) { error("couldn't allocate runner"); }