diff --git a/src/cell.c b/src/cell.c index 044f64d57f5983765e9a5f5f24b6821a040aa621..ef258bf23af217024ae1cf25a16dc3e63370c5b5 100644 --- a/src/cell.c +++ b/src/cell.c @@ -564,12 +564,27 @@ void cell_init_parts(struct cell *c, void *data) { xp[i].v_full[0] = p[i].v[0]; xp[i].v_full[1] = p[i].v[1]; xp[i].v_full[2] = p[i].v[2]; - hydro_init_part(p); - hydro_reset_acceleration(p); + hydro_init_part(&p[i]); + hydro_reset_acceleration(&p[i]); } c->t_end_min = 0.; } +/** + * @brief Converts hydro quantities to a valid state after the initial density calculation + * + * @param c Cell to act upon + * @param data Unused parameter + */ +void cell_convert_hydro(struct cell *c, void *data) { + + struct part *p = c->parts; + + for(int i=0; i<c->count; ++i) { + hydro_convert_quantities(&p[i]); + } +} + /** * @brief Cleans the links in a given cell. diff --git a/src/cell.h b/src/cell.h index a837e782db8dd3ff4acf418e133a235e77e7a207..0341278a9e83cd88a1b3b35d1128e78b4b2883ae 100644 --- a/src/cell.h +++ b/src/cell.h @@ -178,6 +178,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); int cell_getsize(struct cell *c); int cell_link(struct cell *c, struct part *parts); void cell_init_parts(struct cell *c, void *data); +void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell * c, void * data); #endif /* SWIFT_CELL_H */ diff --git a/src/debug.c b/src/debug.c index a48a41925cc9e502bd5cf3fe9cf415da87280ce0..db63c8df3ab9256140bd472af02f658caa153bc5 100644 --- a/src/debug.c +++ b/src/debug.c @@ -46,17 +46,24 @@ void printParticle(struct part *parts, long long int id, int N) { for (i = 0; i < N; i++) if (parts[i].id == id) { printf( - "## Particle[%d]: id=%lld, x=[%.16e,%.16e,%.16e], " - "v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], h=%.3e, h_dt=%.3e, " - "wcount=%.3e, m=%.3e, rho=%.3e, rho_dh=%.3e, div_v=%.3e, u=%.3e, " - "dudt=%.3e, bals=%.3e, POrho2=%.3e, v_sig=%.3e, t_begin=%.3e, " - "t_end=%.3e\n", + "## Particle[%d]:\n id=%lld, x=[%.3e,%.3e,%.3e], " + "v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e],\n h=%.3e, " + "wcount=%d, m=%.3e, dh_drho=%.3e, rho=%.3e, P=%3.e, S=%.3e,\n divV=%.3e, " + " curlV=%.3e rotV=[%.3e,%.3e,%.3e] \n " + "t_begin=%.3e, t_end=%.3e\n", i, parts[i].id, parts[i].x[0], parts[i].x[1], parts[i].x[2], parts[i].v[0], parts[i].v[1], parts[i].v[2], parts[i].a[0], - parts[i].a[1], parts[i].a[2], parts[i].h, parts[i].force.h_dt, - parts[i].density.wcount, parts[i].mass, parts[i].rho, parts[i].rho_dh, - parts[i].density.div_v, parts[i].u, parts[i].force.u_dt, - parts[i].force.balsara, parts[i].force.POrho2, parts[i].force.v_sig, + parts[i].a[1], parts[i].a[2], 2.*parts[i].h, + (int)parts[i].density.wcount, parts[i].mass, + parts[i].rho_dh, + parts[i].rho, + parts[i].pressure, + parts[i].entropy, + parts[i].div_v, + parts[i].curl_v, + parts[i].rot_v[0], + parts[i].rot_v[1], + parts[i].rot_v[2], parts[i].t_begin, parts[i].t_end); found = 1; } @@ -94,15 +101,17 @@ void printgParticle(struct gpart *parts, long long int id, int N) { void printParticle_single(struct part *p) { - printf( - "## Particle: id=%lld, x=[%e,%e,%e], v=[%.3e,%.3e,%.3e], " - "a=[%.3e,%.3e,%.3e], h=%.3e, h_dt=%.3e, wcount=%.3e, m=%.3e, rho=%.3e, " - "rho_dh=%.3e, div_v=%.3e, u=%.3e, dudt=%.3e, bals=%.3e, POrho2=%.3e, " - "v_sig=%.3e, t_begin=%.3e, t_end=%.3e\n", - p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a[0], - p->a[1], p->a[2], p->h, p->force.h_dt, p->density.wcount, p->mass, p->rho, - p->rho_dh, p->density.div_v, p->u, p->force.u_dt, p->force.balsara, - p->force.POrho2, p->force.v_sig, p->t_begin, p->t_end); + /* printf( */ + /* "## Particle: id=%lld, x=[%e,%e,%e], v=[%.3e,%.3e,%.3e], " */ + /* "a=[%.3e,%.3e,%.3e], h=%.3e, h_dt=%.3e, wcount=%.3e, m=%.3e, rho=%.3e, " */ + /* "rho_dh=%.3e, div_v=%.3e, u=%.3e, dudt=%.3e, bals=%.3e, POrho2=%.3e, " */ + /* "v_sig=%.3e, t_begin=%.3e, t_end=%.3e\n", */ + /* p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a[0], */ + /* p->a[1], p->a[2], p->h, p->force.h_dt, p->density.wcount, p->mass, p->rho, */ + /* p->rho_dh, p->density.div_v, p->u, p->force.u_dt, p->force.balsara, */ + /* p->force.POrho2, p->force.v_sig, p->t_begin, p->t_end); */ + + // MATTHIEU } #ifdef HAVE_METIS diff --git a/src/engine.c b/src/engine.c index 2a24502e6a971d87dc76e5398e2dc6ccb6585031..6ac37ff1c0555beccbf8712f77f55b1d75a09d12 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1714,8 +1714,8 @@ void engine_init_particles(struct engine *e) { message("Initialising particles"); space_map_cells_pre(s, 1, cell_init_parts, NULL); - printParticle(e->s->parts, 1000, e->s->nr_parts); - printParticle(e->s->parts, 515050, e->s->nr_parts); + //printParticle(e->s->parts, 1000, e->s->nr_parts); + //printParticle(e->s->parts, 515050, e->s->nr_parts); message("\n0th DENSITY CALC\n"); @@ -1729,11 +1729,15 @@ void engine_init_particles(struct engine *e) { 1 << task_subtype_density); TIMER_TOC(timer_runners); - + + message("\n0th ENTROPY CONVERSION\n"); + + space_map_cells_pre(s, 1, cell_convert_hydro, NULL); + printParticle(e->s->parts, 1000, e->s->nr_parts); printParticle(e->s->parts, 515050, e->s->nr_parts); - - abort(); + + exit(0); /* Ready to go */ e->step = -1; @@ -1788,13 +1792,13 @@ void engine_step(struct engine *e) { MPI_SUCCESS) error("Failed to aggregate t_end_max."); t_end_max = in[0]; - out[0] = count; + out[0] = updates; out[1] = ekin; out[2] = epot; if (MPI_Allreduce(out, in, 3, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate energies."); - count = in[0]; + updates = in[0]; ekin = in[1]; epot = in[2]; /* int nr_parts; @@ -1819,8 +1823,6 @@ if ( e->nodeID == 0 ) printParticle(e->s->parts, 1000, e->s->nr_parts); printParticle(e->s->parts, 515050, e->s->nr_parts); - abort(); - /* Move forward in time */ e->timeOld = e->time; e->time = t_end_min; @@ -1830,6 +1832,7 @@ if ( e->nodeID == 0 ) printf("%d %f %f %d\n", e->step, e->time, e->timeStep, updates); fflush(stdout); + message("\nACCELERATION AND KICK\n"); /* Re-distribute the particles amongst the nodes? */ if (e->forcerepart) engine_repartition(e); @@ -1845,11 +1848,17 @@ if ( e->nodeID == 0 ) (1 << task_type_init) | (1 << task_type_ghost) | (1 << task_type_kick) | (1 << task_type_send) | (1 << task_type_recv), - 0); + (1 << task_subtype_density) | (1 << task_subtype_force)); TIMER_TOC(timer_runners); TIMER_TOC2(timer_step); + + + printParticle(e->s->parts, 1000, e->s->nr_parts); + printParticle(e->s->parts, 515050, e->s->nr_parts); + exit(0); + } /** diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 6e06061707d71bf6340f3ae73a6d6dfe3472c21c..e8fb93516f493b3d5e037aab2b3d04b68e62ab4e 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -73,7 +73,7 @@ struct part { float alpha; /* Store density/force specific stuff. */ - union { + //union { struct { @@ -112,7 +112,7 @@ struct part { float c; } force; - }; + //}; /* Particle mass. */ float mass; diff --git a/src/hydro/Gadget2/hydro.h b/src/hydro/Gadget2/hydro.h index 6459c9511f7b9b37d4f536b3d9d62829e78db96f..a974a92645e2dced387340ddaabd1076a98ebe0d 100644 --- a/src/hydro/Gadget2/hydro.h +++ b/src/hydro/Gadget2/hydro.h @@ -28,19 +28,20 @@ __attribute__((always_inline)) INLINE static float hydro_compute_timestep( struct part* p, struct xpart* xp) { /* CFL condition */ - float dt_cfl = const_cfl * p->h / p->force.v_sig; + const float dt_cfl = const_cfl * p->h / p->v_sig; - /* Limit change in h */ - float dt_h_change = (p->force.h_dt != 0.0f) - ? fabsf(const_ln_max_h_change * p->h / p->force.h_dt) - : FLT_MAX; + /* /\* Limit change in h *\/ */ + /* float dt_h_change = (p->h_dt != 0.0f) */ + /* ? fabsf(const_ln_max_h_change * p->h / p->h_dt) */ + /* : FLT_MAX; */ - /* Limit change in u */ - float dt_u_change = (p->force.u_dt != 0.0f) - ? fabsf(const_max_u_change * p->u / p->force.u_dt) - : FLT_MAX; + /* /\* Limit change in u *\/ */ + /* float dt_u_change = (p->force.u_dt != 0.0f) */ + /* ? fabsf(const_max_u_change * p->u / p->force.u_dt) */ + /* : FLT_MAX; */ - return fminf(dt_cfl, fminf(dt_h_change, dt_u_change)); + // return fminf(dt_cfl, fminf(dt_h_change, dt_u_change)); + return dt_cfl; } @@ -58,10 +59,11 @@ __attribute__((always_inline)) INLINE static void hydro_init_part(struct part* p p->density.wcount_dh = 0.f; p->rho = 0.f; p->rho_dh = 0.f; - p->density.div_v = 0.f; - p->density.curl_v[0] = 0.f; - p->density.curl_v[1] = 0.f; - p->density.curl_v[2] = 0.f; + p->div_v = 0.f; + p->curl_v = 0.f; + p->rot_v[0] = 0.f; + p->rot_v[1] = 0.f; + p->rot_v[2] = 0.f; } /** @@ -71,8 +73,9 @@ __attribute__((always_inline)) INLINE static void hydro_init_part(struct part* p * and add the self-contribution term. * * @param p The particle to act upon + * @param time The current time */ -__attribute__((always_inline)) INLINE static void hydro_end_density(struct part* p) { +__attribute__((always_inline)) INLINE static void hydro_end_density(struct part* p, float time) { /* Some smoothing length multiples. */ const float h = p->h; @@ -80,12 +83,31 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(struct part* const float ih2 = ih * ih; const float ih4 = ih2 * ih2; - /* Final operation on the density. */ - p->rho = ih * ih2 * (p->rho + p->mass * kernel_root); - p->rho_dh = (p->rho_dh - 3.0f * p->mass * kernel_root) * ih4; - p->density.wcount = (p->density.wcount + kernel_root) * - (4.0f / 3.0 * M_PI * kernel_gamma3); - p->density.wcount_dh = p->density.wcount_dh * ih * (4.0f / 3.0 * M_PI * kernel_gamma3); + /* Final operation on the density (add self-contribution). */ + p->rho += p->mass * kernel_root; + p->rho_dh -= 3.0f * p->mass * kernel_root * kernel_igamma; + p->density.wcount += kernel_root; + + /* Finish the calculation by inserting the missing h-factors */ + p->rho *= ih * ih2; + p->rho_dh *= ih4; + p->density.wcount *= (4.0f / 3.0 * M_PI * kernel_gamma3); + p->density.wcount_dh *= ih * (4.0f / 3.0 * M_PI * kernel_gamma3); + + /* Compute the derivative term */ + p->rho_dh = 1.f / (1.f + 0.33333333f * p->h * p->rho_dh / p->rho); + + /* Finish calculation of the velocity curl */ + p->curl_v = sqrtf(p->rot_v[0] * p->rot_v[0] + + p->rot_v[1] * p->rot_v[1] + + p->rot_v[2] * p->rot_v[2]) / p->rho; + + /* Finish calculation of the velocity divergence */ + p->div_v /= p->rho; + + /* Compute the pressure */ + const float dt = time - 0.5f*(p->t_begin + p->t_end); + p->pressure = (p->entropy + p->entropy_dt * dt) * pow(p->rho, const_hydro_gamma); } @@ -102,31 +124,31 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(struct par struct xpart* xp) { /* Some smoothing length multiples. */ - const float h = p->h; - const float ih = 1.0f / h; - const float ih2 = ih * ih; - const float ih4 = ih2 * ih2; + /* const float h = p->h; */ + /* const float ih = 1.0f / h; */ + /* const float ih2 = ih * ih; */ + /* const float ih4 = ih2 * ih2; */ - /* Pre-compute some stuff for the balsara switch. */ - const float normDiv_v = fabs(p->density.div_v / p->rho * ih4); - const float normCurl_v = sqrtf(p->density.curl_v[0] * p->density.curl_v[0] + - p->density.curl_v[1] * p->density.curl_v[1] + - p->density.curl_v[2] * p->density.curl_v[2]) / - p->rho * ih4; - - /* Compute this particle's sound speed. */ - const float u = p->u; - const float fc = p->force.c = - sqrtf(const_hydro_gamma * (const_hydro_gamma - 1.0f) * u); + /* /\* Pre-compute some stuff for the balsara switch. *\/ */ + /* const float normDiv_v = fabs(p->density.div_v / p->rho * ih4); */ + /* const float normCurl_v = sqrtf(p->density.curl_v[0] * p->density.curl_v[0] + */ + /* p->density.curl_v[1] * p->density.curl_v[1] + */ + /* p->density.curl_v[2] * p->density.curl_v[2]) / */ + /* p->rho * ih4; */ + + /* /\* Compute this particle's sound speed. *\/ */ + /* const float u = p->u; */ + /* const float fc = p->force.c = */ + /* sqrtf(const_hydro_gamma * (const_hydro_gamma - 1.0f) * u); */ - /* Compute the P/Omega/rho2. */ - xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho; - p->force.POrho2 = u * (const_hydro_gamma - 1.0f) / (p->rho * xp->omega); + /* /\* Compute the P/Omega/rho2. *\/ */ + /* xp->omega = 1.0f + 0.3333333333f * h * p->rho_dh / p->rho; */ + /* p->force.POrho2 = u * (const_hydro_gamma - 1.0f) / (p->rho * xp->omega); */ - /* Balsara switch */ - p->force.balsara = - normDiv_v / (normDiv_v + normCurl_v + 0.0001f * fc * ih); + /* /\* Balsara switch *\/ */ + /* p->force.balsara = */ + /* normDiv_v / (normDiv_v + normCurl_v + 0.0001f * fc * ih); */ } @@ -147,9 +169,7 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(struc p->a[2] = 0.0f; /* Reset the time derivatives. */ - p->force.u_dt = 0.0f; - p->force.h_dt = 0.0f; - p->force.v_sig = 0.0f; + p->v_sig = 0.0f; } @@ -163,19 +183,6 @@ __attribute__((always_inline)) INLINE static void hydro_reset_acceleration(struc __attribute__((always_inline)) INLINE static void hydro_predict_extra(struct part* p, struct xpart* xp, float dt) { - float u, w; - - /* Predict internal energy */ - w = p->force.u_dt / p->u * dt; - if (fabsf(w) < 0.01f) /* 1st order expansion of exp(w) */ - u = p->u *= - 1.0f + - w * (1.0f + w * (0.5f + w * (1.0f / 6.0f + 1.0f / 24.0f * w))); - else - u = p->u *= expf(w); - - /* Predict gradient term */ - p->force.POrho2 = u * (const_hydro_gamma - 1.0f) / (p->rho * xp->omega); } @@ -190,6 +197,17 @@ __attribute__((always_inline)) INLINE static void hydro_predict_extra(struct par */ __attribute__((always_inline)) INLINE static void hydro_end_force(struct part* p) { - p->force.h_dt *= p->h * 0.333333333f; - +} + +/** + * @brief Converts hydro quantity of a particle + * + * Requires the density to be known + * + * @param p The particle to act upon + */ +__attribute__((always_inline)) INLINE static void hydro_convert_quantities(struct part* p) { + + p->entropy = (const_hydro_gamma - 1.f) * p->entropy * pow(p->rho, -(const_hydro_gamma - 1.f)); + } diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 0664e78586d3e17336a98393933768449ca11666..461d2f99ad76eb566f33ad84f0eef629f40b6cc6 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -49,172 +49,79 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2), ri = 1.0f / r; - float xi, xj; - float h_inv; - float wi, wj, wi_dx, wj_dx; - float mi, mj; - float dvdr; + float wi, wi_dx; + float wj, wj_dx; float dv[3], curlvr[3]; - int k; - + /* Get the masses. */ - mi = pi->mass; - mj = pj->mass; + const float mi = pj->mass; + const float mj = pj->mass; + + /* Get r and r inverse. */ + const float r = sqrtf(r2); + const float r_inv = 1.0f / r; + + /* Compute the kernel function for pi */ + const float hi_inv = 1.f / hi; + const float ui = r * hi_inv; + kernel_deval(ui, &wi, &wi_dx); + + /* Compute contribution to the density */ + pi->rho += mj * wi; + pi->rho_dh -= mj * kernel_igamma * (3.f * wi + ui * wi_dx); + + /* Compute contribution to the number of neighbours */ + pi->density.wcount += wi; + pi->density.wcount_dh -= ui * wi_dx; + + + /* Compute the kernel function for pj */ + const float hj_inv = 1.f / hj; + const float uj = r * hj_inv; + kernel_deval(uj, &wj, &wj_dx); + + /* Compute contribution to the density */ + pj->rho += mi * wj; + pj->rho_dh -= mi * kernel_igamma * (3.f * wj + uj * wj_dx); + + /* Compute contribution to the number of neighbours */ + pj->density.wcount += wj; + pj->density.wcount_dh -= uj * wj_dx; + + const float faci = mj * wi_dx * r_inv; + const float facj = mi * wj_dx * r_inv; + /* Compute dv dot r */ dv[0] = pi->v[0] - pj->v[0]; dv[1] = pi->v[1] - pj->v[1]; dv[2] = pi->v[2] - pj->v[2]; - dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - dvdr *= ri; + const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; + + pi->div_v -= faci * dvdr; + pj->div_v -= facj * dvdr; /* Compute dv cross r */ curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2]; curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; - for (k = 0; k < 3; k++) curlvr[k] *= ri; - /* Compute density of pi. */ - h_inv = 1.0 / hi; - xi = r * h_inv; - kernel_deval(xi, &wi, &wi_dx); + pi->rot_v[0] += faci * curlvr[0]; + pi->rot_v[1] += faci * curlvr[1]; + pi->rot_v[2] += faci * curlvr[2]; - pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); - pi->density.wcount += wi; - pi->density.wcount_dh -= xi * wi_dx; + pj->rot_v[0] += facj * curlvr[0]; + pj->rot_v[1] += facj * curlvr[1]; + pj->rot_v[2] += facj * curlvr[2]; - pi->density.div_v += mj * dvdr * wi_dx; - for (k = 0; k < 3; k++) pi->density.curl_v[k] += mj * curlvr[k] * wi_dx; + /* if(pi->id == 1000) */ + /* message("Interacting with %lld. r=%f\n", pj->id, r); */ - /* Compute density of pj. */ - h_inv = 1.0 / hj; - xj = r * h_inv; - kernel_deval(xj, &wj, &wj_dx); + /* if(pj->id == 1000) */ + /* message("Interacting with %lld. r=%f\n", pi->id, r); */ - pj->rho += mi * wj; - pj->rho_dh -= mi * (3.0 * wj + xj * wj_dx); - pj->density.wcount += wj; - pj->density.wcount_dh -= xj * wj_dx; - - pj->density.div_v += mi * dvdr * wj_dx; - for (k = 0; k < 3; k++) pj->density.curl_v[k] += mi * curlvr[k] * wj_dx; } -/** - * @brief Density loop (Vectorized version) - */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_density( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef VECTORIZE - - vector r, r2, ri, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; - vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; - vector mi, mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr, div_vi, div_vj; - vector curlvr[3], curl_vi[3], curl_vj[3]; - int k, j; - -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); - xi.v = r.v * hi_inv.v; - - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj_inv.v * hj.v - vec_set1(1.0f)); - xj.v = r.v * hj_inv.v; - - kernel_deval_vec(&xi, &wi, &wi_dx); - kernel_deval_vec(&xj, &wj, &wj_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(3.0f) * wi.v + xi.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - rhoj.v = mi.v * wj.v; - rhoj_dh.v = mi.v * (vec_set1(3.0f) * wj.v + xj.v * wj_dx.v); - wcountj.v = wj.v; - wcountj_dh.v = xj.v * wj_dx.v; - div_vj.v = mi.v * dvdr.v * wj_dx.v; - for (k = 0; k < 3; k++) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; - - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v += div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.curl_v[j] += curl_vi[j].f[k]; - pj[k]->rho += rhoj.f[k]; - pj[k]->rho_dh -= rhoj_dh.f[k]; - pj[k]->density.wcount += wcountj.f[k]; - pj[k]->density.wcount_dh -= wcountj_dh.f[k]; - pj[k]->density.div_v += div_vj.f[k]; - for (j = 0; j < 3; j++) pj[k]->density.curl_v[j] += curl_vj[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} /** * @brief Density loop (non-symmetric version) @@ -223,142 +130,66 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r, ri; - float xi; - float h_inv; float wi, wi_dx; - float mj; - float dvdr; float dv[3], curlvr[3]; - int k; /* Get the masses. */ - mj = pj->mass; + const float mj = pj->mass; /* Get r and r inverse. */ - r = sqrtf(r2); - ri = 1.0f / r; + const float r = sqrtf(r2); + const float ri = 1.0f / r; + + if(pi->id == 1000 && pj->id == 1103) hi = 0.2234976 / 2.f; + + /* Compute the kernel function */ + const float h_inv = 1.0f / hi; + const float u = r * h_inv; + kernel_deval(u, &wi, &wi_dx); + + /* Compute contribution to the density */ + pi->rho += mj * wi; + pi->rho_dh -= mj * kernel_igamma * (3.f * wi + u * wi_dx); + + /* Compute contribution to the number of neighbours */ + pi->density.wcount += wi; + pi->density.wcount_dh -= u * wi_dx; + const float ih3 = h_inv * h_inv * h_inv; + const float ih4 = h_inv * h_inv * h_inv * h_inv; + + if(pi->id == 1000 && pj->id == 1103) + message("Interacting with %lld. r=%e hi=%e u=%e W=%e dW/dx=%e dh_drho1=%e dh_drho2=%e %e\n", + pj->id, + r, + hi, + u, + wi * ih3, + wi_dx * ih4, + -mj * (3.f * kernel_igamma * wi) * ih4, + -mj * u * wi_dx * kernel_igamma * ih4, + kernel_igamma + ); + + const float fac = mj * wi_dx * ri; + /* Compute dv dot r */ dv[0] = pi->v[0] - pj->v[0]; dv[1] = pi->v[1] - pj->v[1]; dv[2] = pi->v[2] - pj->v[2]; - dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - dvdr *= ri; + const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; + pi->div_v -= fac * dvdr; /* Compute dv cross r */ curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; curlvr[1] = dv[2] * dx[0] - dv[0] * dx[2]; curlvr[2] = dv[0] * dx[1] - dv[1] * dx[0]; - for (k = 0; k < 3; k++) curlvr[k] *= ri; - - h_inv = 1.0 / hi; - xi = r * h_inv; - kernel_deval(xi, &wi, &wi_dx); - pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); - pi->density.wcount += wi; - pi->density.wcount_dh -= xi * wi_dx; - - pi->density.div_v += mj * dvdr * wi_dx; - for (k = 0; k < 3; k++) pi->density.curl_v[k] += mj * curlvr[k] * wi_dx; + pi->rot_v[0] += fac * curlvr[0]; + pi->rot_v[1] += fac * curlvr[1]; + pi->rot_v[2] += fac * curlvr[2]; } -/** - * @brief Density loop (non-symmetric vectorized version) - */ - -__attribute__((always_inline)) - INLINE static void runner_iact_nonsym_vec_density(float *R2, float *Dx, - float *Hi, float *Hj, - struct part **pi, - struct part **pj) { - -#ifdef VECTORIZE - - vector r, r2, ri, xi, hi, hi_inv, wi, wi_dx; - vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; - vector mj; - vector dx[3], dv[3]; - vector vi[3], vj[3]; - vector dvdr; - vector curlvr[3], curl_vi[3]; - int k, j; - -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi_inv.v * hi.v - vec_set1(1.0f)); - xi.v = r.v * hi_inv.v; - - kernel_deval_vec(&xi, &wi, &wi_dx); - - /* Compute dv. */ - dv[0].v = vi[0].v - vj[0].v; - dv[1].v = vi[1].v - vj[1].v; - dv[2].v = vi[2].v - vj[2].v; - - /* Compute dv dot r */ - dvdr.v = (dv[0].v * dx[0].v) + (dv[1].v * dx[1].v) + (dv[2].v * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Compute dv cross r */ - curlvr[0].v = dv[1].v * dx[2].v - dv[2].v * dx[1].v; - curlvr[1].v = dv[2].v * dx[0].v - dv[0].v * dx[2].v; - curlvr[2].v = dv[0].v * dx[1].v - dv[1].v * dx[0].v; - for (k = 0; k < 3; k++) curlvr[k].v *= ri.v; - - rhoi.v = mj.v * wi.v; - rhoi_dh.v = mj.v * (vec_set1(3.0f) * wi.v + xi.v * wi_dx.v); - wcounti.v = wi.v; - wcounti_dh.v = xi.v * wi_dx.v; - div_vi.v = mj.v * dvdr.v * wi_dx.v; - for (k = 0; k < 3; k++) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; - - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->rho += rhoi.f[k]; - pi[k]->rho_dh -= rhoi_dh.f[k]; - pi[k]->density.wcount += wcounti.f[k]; - pi[k]->density.wcount_dh -= wcounti_dh.f[k]; - pi[k]->density.div_v += div_vi.f[k]; - for (j = 0; j < 3; j++) pi[k]->density.curl_v[j] += curl_vi[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_nonsym_density(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} /** * @brief Force loop @@ -367,274 +198,82 @@ __attribute__((always_inline)) __attribute__((always_inline)) INLINE static void runner_iact_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2), ri = 1.0f / r; - float xi, xj; - float hi_inv, hi2_inv; - float hj_inv, hj2_inv; - float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; - float mi, mj, POrho2i, POrho2j, rhoi, rhoj; - float v_sig, omega_ij, Pi_ij; - float f; - int k; - - /* Get some values in local variables. */ - mi = pi->mass; - mj = pj->mass; - rhoi = pi->rho; - rhoj = pj->rho; - POrho2i = pi->force.POrho2; - POrho2j = pj->force.POrho2; - - /* Get the kernel for hi. */ - hi_inv = 1.0f / hi; - hi2_inv = hi_inv * hi_inv; - xi = r * hi_inv; - kernel_deval(xi, &wi, &wi_dx); - wi_dr = hi2_inv * hi2_inv * wi_dx; - - /* Get the kernel for hj. */ - hj_inv = 1.0f / hj; - hj2_inv = hj_inv * hj_inv; - xj = r * hj_inv; - kernel_deval(xj, &wj, &wj_dx); - wj_dr = hj2_inv * hj2_inv * wj_dx; - - /* Compute dv dot r. */ - dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2]; - dvdr *= ri; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij = fminf(dvdr, 0.f); - - /* Compute signal velocity */ - v_sig = pi->force.c + pj->force.c - 3.0f * omega_ij; - - /* Compute viscosity tensor */ - Pi_ij = -const_viscosity_alpha * v_sig * omega_ij / (rhoi + rhoj); - - /* Apply balsara switch */ - Pi_ij *= (pi->force.balsara + pj->force.balsara); - - /* Get the common factor out. */ - w = ri * - ((POrho2i * wi_dr + POrho2j * wj_dr) + 0.25f * Pi_ij * (wi_dr + wj_dr)); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f = dx[k] * w; - pi->a[k] -= mj * f; - pj->a[k] += mi * f; - } - - /* Get the time derivative for u. */ - pi->force.u_dt += - mj * dvdr * (POrho2i * wi_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); - pj->force.u_dt += - mi * dvdr * (POrho2j * wj_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); - - /* Get the time derivative for h. */ - pi->force.h_dt -= mj * dvdr / rhoj * wi_dr; - pj->force.h_dt -= mi * dvdr / rhoi * wj_dr; - - /* Update the signal velocity. */ - pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); - pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); + /* float r = sqrtf(r2), ri = 1.0f / r; */ + /* float xi, xj; */ + /* float hi_inv, hi2_inv; */ + /* float hj_inv, hj2_inv; */ + /* float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; */ + /* float mi, mj, POrho2i, POrho2j, rhoi, rhoj; */ + /* float v_sig, omega_ij, Pi_ij; */ + /* float f; */ + /* int k; */ + + /* /\* Get some values in local variables. *\/ */ + /* mi = pi->mass; */ + /* mj = pj->mass; */ + /* rhoi = pi->rho; */ + /* rhoj = pj->rho; */ + /* //POrho2i = pi->force.POrho2; */ + /* //POrho2j = pj->force.POrho2; */ + + /* /\* Get the kernel for hi. *\/ */ + /* hi_inv = 1.0f / hi; */ + /* hi2_inv = hi_inv * hi_inv; */ + /* xi = r * hi_inv; */ + /* kernel_deval(xi, &wi, &wi_dx); */ + /* wi_dr = hi2_inv * hi2_inv * wi_dx; */ + + /* /\* Get the kernel for hj. *\/ */ + /* hj_inv = 1.0f / hj; */ + /* hj2_inv = hj_inv * hj_inv; */ + /* xj = r * hj_inv; */ + /* kernel_deval(xj, &wj, &wj_dx); */ + /* wj_dr = hj2_inv * hj2_inv * wj_dx; */ + + /* /\* Compute dv dot r. *\/ */ + /* dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + */ + /* (pi->v[2] - pj->v[2]) * dx[2]; */ + /* dvdr *= ri; */ + + /* /\* Compute the relative velocity. (This is 0 if the particles move away from */ + /* * each other and negative otherwise) *\/ */ + /* omega_ij = fminf(dvdr, 0.f); */ + + /* /\* Compute signal velocity *\/ */ + /* v_sig = pi->force.c + pj->force.c - 3.0f * omega_ij; */ + + /* /\* Compute viscosity tensor *\/ */ + /* Pi_ij = -const_viscosity_alpha * v_sig * omega_ij / (rhoi + rhoj); */ + + /* /\* Apply balsara switch *\/ */ + /* Pi_ij *= (pi->force.balsara + pj->force.balsara); */ + + /* /\* Get the common factor out. *\/ */ + /* w = ri * */ + /* ((POrho2i * wi_dr + POrho2j * wj_dr) + 0.25f * Pi_ij * (wi_dr + wj_dr)); */ + + /* /\* Use the force, Luke! *\/ */ + /* for (k = 0; k < 3; k++) { */ + /* f = dx[k] * w; */ + /* pi->a[k] -= mj * f; */ + /* pj->a[k] += mi * f; */ + /* } */ + + /* /\* Get the time derivative for u. *\/ */ + /* pi->force.u_dt += */ + /* mj * dvdr * (POrho2i * wi_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); */ + /* pj->force.u_dt += */ + /* mi * dvdr * (POrho2j * wj_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); */ + + /* /\* Get the time derivative for h. *\/ */ + /* pi->force.h_dt -= mj * dvdr / rhoj * wi_dr; */ + /* pj->force.h_dt -= mi * dvdr / rhoi * wj_dr; */ + + /* /\* Update the signal velocity. *\/ */ + /* pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); */ + /* pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); */ } -/** - * @brief Force loop (Vectorized version) - */ - -__attribute__((always_inline)) INLINE static void runner_iact_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef VECTORIZE - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hi2_inv, hj2_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector w; - vector piPOrho2, pjPOrho2, pirho, pjrho; - vector mi, mj; - vector f; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3], pja[3]; - vector piu_dt, pju_dt; - vector pih_dt, pjh_dt; - vector ci, cj, v_sig, vi_sig, vj_sig; - vector omega_ij, Pi_ij, balsara; - int j, k; - -/* Load stuff. */ -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = - vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, pi[2]->force.POrho2, - pi[3]->force.POrho2, pi[4]->force.POrho2, pi[5]->force.POrho2, - pi[6]->force.POrho2, pi[7]->force.POrho2); - pjPOrho2.v = - vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, pj[2]->force.POrho2, - pj[3]->force.POrho2, pj[4]->force.POrho2, pj[5]->force.POrho2, - pj[6]->force.POrho2, pj[7]->force.POrho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c, - pi[4]->force.c, pi[5]->force.c, pi[6]->force.c, pi[7]->force.c); - cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c, - pj[4]->force.c, pj[5]->force.c, pj[6]->force.c, pj[7]->force.c); - vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, - pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, - pi[6]->force.v_sig, pi[7]->force.v_sig); - vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, - pj[3]->force.v_sig, pj[4]->force.v_sig, pj[5]->force.v_sig, - pj[6]->force.v_sig, pj[7]->force.v_sig); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, - pi[2]->force.POrho2, pi[3]->force.POrho2); - pjPOrho2.v = vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, - pj[2]->force.POrho2, pj[3]->force.POrho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c); - cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c); - vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, - pi[3]->force.v_sig); - vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, - pj[3]->force.v_sig); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); -#else -#error -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hi2_inv.v = hi_inv.v * hi_inv.v; - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hj2_inv.v = hj_inv.v * hj_inv.v; - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Get the time derivative for h. */ - pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v; - pjh_dt.v = mi.v / pirho.v * dvdr.v * wj_dr.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(3.0f) * omega_ij.v; - - /* Compute viscosity tensor */ - Pi_ij.v = -balsara.v * vec_set1(const_viscosity_alpha) * v_sig.v * - omega_ij.v / (pirho.v + pjrho.v); - Pi_ij.v *= (wi_dr.v + wj_dr.v); - - /* Get the common factor out. */ - w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) + - vec_set1(0.25f) * Pi_ij.v); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * w.v; - pia[k].v = mj.v * f.v; - pja[k].v = mi.v * f.v; - } - - /* Get the time derivative for u. */ - piu_dt.v = - mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v); - pju_dt.v = - mi.v * dvdr.v * (pjPOrho2.v * wj_dr.v + vec_set1(0.125f) * Pi_ij.v); - - /* compute the signal velocity (this is always symmetrical). */ - vi_sig.v = vec_fmax(vi_sig.v, v_sig.v); - vj_sig.v = vec_fmax(vj_sig.v, v_sig.v); - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->force.u_dt += piu_dt.f[k]; - pj[k]->force.u_dt += pju_dt.f[k]; - pi[k]->force.h_dt -= pih_dt.f[k]; - pj[k]->force.h_dt -= pjh_dt.f[k]; - pi[k]->force.v_sig = vi_sig.f[k]; - pj[k]->force.v_sig = vj_sig.f[k]; - for (j = 0; j < 3; j++) { - pi[k]->a[j] -= pia[j].f[k]; - pj[k]->a[j] += pja[j].f[k]; - } - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} /** * @brief Force loop (non-symmetric version) @@ -643,257 +282,77 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2), ri = 1.0f / r; - float xi, xj; - float hi_inv, hi2_inv; - float hj_inv, hj2_inv; - float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; - float /*mi,*/ mj, POrho2i, POrho2j, rhoi, rhoj; - float v_sig, omega_ij, Pi_ij; - float f; - int k; - - /* Get some values in local variables. */ - // mi = pi->mass; - mj = pj->mass; - rhoi = pi->rho; - rhoj = pj->rho; - POrho2i = pi->force.POrho2; - POrho2j = pj->force.POrho2; - - /* Get the kernel for hi. */ - hi_inv = 1.0f / hi; - hi2_inv = hi_inv * hi_inv; - xi = r * hi_inv; - kernel_deval(xi, &wi, &wi_dx); - wi_dr = hi2_inv * hi2_inv * wi_dx; - - /* Get the kernel for hj. */ - hj_inv = 1.0f / hj; - hj2_inv = hj_inv * hj_inv; - xj = r * hj_inv; - kernel_deval(xj, &wj, &wj_dx); - wj_dr = hj2_inv * hj2_inv * wj_dx; - - /* Compute dv dot r. */ - dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + - (pi->v[2] - pj->v[2]) * dx[2]; - dvdr *= ri; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij = fminf(dvdr, 0.f); - - /* Compute signal velocity */ - v_sig = pi->force.c + pj->force.c - 3.0f * omega_ij; - - /* Compute viscosity tensor */ - Pi_ij = -const_viscosity_alpha * v_sig * omega_ij / (rhoi + rhoj); - - /* Apply balsara switch */ - Pi_ij *= (pi->force.balsara + pj->force.balsara); - - /* Get the common factor out. */ - w = ri * - ((POrho2i * wi_dr + POrho2j * wj_dr) + 0.25f * Pi_ij * (wi_dr + wj_dr)); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f = dx[k] * w; - pi->a[k] -= mj * f; - } - - /* Get the time derivative for u. */ - pi->force.u_dt += - mj * dvdr * (POrho2i * wi_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); - - /* Get the time derivative for h. */ - pi->force.h_dt -= mj * dvdr / rhoj * wi_dr; - - /* Update the signal velocity. */ - pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); - pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); + /* float r = sqrtf(r2), ri = 1.0f / r; */ + /* float xi, xj; */ + /* float hi_inv, hi2_inv; */ + /* float hj_inv, hj2_inv; */ + /* float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; */ + /* float /\*mi,*\/ mj, POrho2i, POrho2j, rhoi, rhoj; */ + /* float v_sig, omega_ij, Pi_ij; */ + /* float f; */ + /* int k; */ + + /* /\* Get some values in local variables. *\/ */ + /* // mi = pi->mass; */ + /* mj = pj->mass; */ + /* rhoi = pi->rho; */ + /* rhoj = pj->rho; */ + /* POrho2i = pi->force.POrho2; */ + /* POrho2j = pj->force.POrho2; */ + + /* /\* Get the kernel for hi. *\/ */ + /* hi_inv = 1.0f / hi; */ + /* hi2_inv = hi_inv * hi_inv; */ + /* xi = r * hi_inv; */ + /* kernel_deval(xi, &wi, &wi_dx); */ + /* wi_dr = hi2_inv * hi2_inv * wi_dx; */ + + /* /\* Get the kernel for hj. *\/ */ + /* hj_inv = 1.0f / hj; */ + /* hj2_inv = hj_inv * hj_inv; */ + /* xj = r * hj_inv; */ + /* kernel_deval(xj, &wj, &wj_dx); */ + /* wj_dr = hj2_inv * hj2_inv * wj_dx; */ + + /* /\* Compute dv dot r. *\/ */ + /* dvdr = (pi->v[0] - pj->v[0]) * dx[0] + (pi->v[1] - pj->v[1]) * dx[1] + */ + /* (pi->v[2] - pj->v[2]) * dx[2]; */ + /* dvdr *= ri; */ + + /* /\* Compute the relative velocity. (This is 0 if the particles move away from */ + /* * each other and negative otherwise) *\/ */ + /* omega_ij = fminf(dvdr, 0.f); */ + + /* /\* Compute signal velocity *\/ */ + /* v_sig = pi->force.c + pj->force.c - 3.0f * omega_ij; */ + + /* /\* Compute viscosity tensor *\/ */ + /* Pi_ij = -const_viscosity_alpha * v_sig * omega_ij / (rhoi + rhoj); */ + + /* /\* Apply balsara switch *\/ */ + /* Pi_ij *= (pi->force.balsara + pj->force.balsara); */ + + /* /\* Get the common factor out. *\/ */ + /* w = ri * */ + /* ((POrho2i * wi_dr + POrho2j * wj_dr) + 0.25f * Pi_ij * (wi_dr + wj_dr)); */ + + /* /\* Use the force, Luke! *\/ */ + /* for (k = 0; k < 3; k++) { */ + /* f = dx[k] * w; */ + /* pi->a[k] -= mj * f; */ + /* } */ + + /* /\* Get the time derivative for u. *\/ */ + /* pi->force.u_dt += */ + /* mj * dvdr * (POrho2i * wi_dr + 0.125f * Pi_ij * (wi_dr + wj_dr)); */ + + /* /\* Get the time derivative for h. *\/ */ + /* pi->force.h_dt -= mj * dvdr / rhoj * wi_dr; */ + + /* /\* Update the signal velocity. *\/ */ + /* pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); */ + /* pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); */ } -/** - * @brief Force loop (Vectorized non-symmetric version) - */ - -__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( - float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, - struct part **pj) { - -#ifdef VECTORIZE - - vector r, r2, ri; - vector xi, xj; - vector hi, hj, hi_inv, hj_inv; - vector hi2_inv, hj2_inv; - vector wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, dvdr; - vector w; - vector piPOrho2, pjPOrho2, pirho, pjrho; - vector mj; - vector f; - vector dx[3]; - vector vi[3], vj[3]; - vector pia[3]; - vector piu_dt; - vector pih_dt; - vector ci, cj, v_sig, vi_sig, vj_sig; - vector omega_ij, Pi_ij, balsara; - int j, k; - -/* Load stuff. */ -#if VEC_SIZE == 8 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - piPOrho2.v = - vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, pi[2]->force.POrho2, - pi[3]->force.POrho2, pi[4]->force.POrho2, pi[5]->force.POrho2, - pi[6]->force.POrho2, pi[7]->force.POrho2); - pjPOrho2.v = - vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, pj[2]->force.POrho2, - pj[3]->force.POrho2, pj[4]->force.POrho2, pj[5]->force.POrho2, - pj[6]->force.POrho2, pj[7]->force.POrho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho, pi[4]->rho, - pi[5]->rho, pi[6]->rho, pi[7]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho, pj[4]->rho, - pj[5]->rho, pj[6]->rho, pj[7]->rho); - ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c, - pi[4]->force.c, pi[5]->force.c, pi[6]->force.c, pi[7]->force.c); - cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c, - pj[4]->force.c, pj[5]->force.c, pj[6]->force.c, pj[7]->force.c); - vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, - pi[3]->force.v_sig, pi[4]->force.v_sig, pi[5]->force.v_sig, - pi[6]->force.v_sig, pi[7]->force.v_sig); - vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, - pj[3]->force.v_sig, pj[4]->force.v_sig, pj[5]->force.v_sig, - pj[6]->force.v_sig, pj[7]->force.v_sig); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k], - pi[4]->v[k], pi[5]->v[k], pi[6]->v[k], pi[7]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k], - pj[4]->v[k], pj[5]->v[k], pj[6]->v[k], pj[7]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); - balsara.v = - vec_set(pi[0]->force.balsara, pi[1]->force.balsara, pi[2]->force.balsara, - pi[3]->force.balsara, pi[4]->force.balsara, pi[5]->force.balsara, - pi[6]->force.balsara, pi[7]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, - pj[3]->force.balsara, pj[4]->force.balsara, pj[5]->force.balsara, - pj[6]->force.balsara, pj[7]->force.balsara); -#elif VEC_SIZE == 4 - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - piPOrho2.v = vec_set(pi[0]->force.POrho2, pi[1]->force.POrho2, - pi[2]->force.POrho2, pi[3]->force.POrho2); - pjPOrho2.v = vec_set(pj[0]->force.POrho2, pj[1]->force.POrho2, - pj[2]->force.POrho2, pj[3]->force.POrho2); - pirho.v = vec_set(pi[0]->rho, pi[1]->rho, pi[2]->rho, pi[3]->rho); - pjrho.v = vec_set(pj[0]->rho, pj[1]->rho, pj[2]->rho, pj[3]->rho); - ci.v = - vec_set(pi[0]->force.c, pi[1]->force.c, pi[2]->force.c, pi[3]->force.c); - cj.v = - vec_set(pj[0]->force.c, pj[1]->force.c, pj[2]->force.c, pj[3]->force.c); - vi_sig.v = vec_set(pi[0]->force.v_sig, pi[1]->force.v_sig, pi[2]->force.v_sig, - pi[3]->force.v_sig); - vj_sig.v = vec_set(pj[0]->force.v_sig, pj[1]->force.v_sig, pj[2]->force.v_sig, - pj[3]->force.v_sig); - for (k = 0; k < 3; k++) { - vi[k].v = vec_set(pi[0]->v[k], pi[1]->v[k], pi[2]->v[k], pi[3]->v[k]); - vj[k].v = vec_set(pj[0]->v[k], pj[1]->v[k], pj[2]->v[k], pj[3]->v[k]); - } - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); - balsara.v = vec_set(pi[0]->force.balsara, pi[1]->force.balsara, - pi[2]->force.balsara, pi[3]->force.balsara) + - vec_set(pj[0]->force.balsara, pj[1]->force.balsara, - pj[2]->force.balsara, pj[3]->force.balsara); -#else -#error -#endif - - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ri.v = vec_rsqrt(r2.v); - ri.v = ri.v - vec_set1(0.5f) * ri.v * (r2.v * ri.v * ri.v - vec_set1(1.0f)); - r.v = r2.v * ri.v; - - /* Get the kernel for hi. */ - hi.v = vec_load(Hi); - hi_inv.v = vec_rcp(hi.v); - hi_inv.v = hi_inv.v - hi_inv.v * (hi.v * hi_inv.v - vec_set1(1.0f)); - hi2_inv.v = hi_inv.v * hi_inv.v; - xi.v = r.v * hi_inv.v; - kernel_deval_vec(&xi, &wi, &wi_dx); - wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; - - /* Get the kernel for hj. */ - hj.v = vec_load(Hj); - hj_inv.v = vec_rcp(hj.v); - hj_inv.v = hj_inv.v - hj_inv.v * (hj.v * hj_inv.v - vec_set1(1.0f)); - hj2_inv.v = hj_inv.v * hj_inv.v; - xj.v = r.v * hj_inv.v; - kernel_deval_vec(&xj, &wj, &wj_dx); - wj_dr.v = hj2_inv.v * hj2_inv.v * wj_dx.v; - - /* Compute dv dot r. */ - dvdr.v = ((vi[0].v - vj[0].v) * dx[0].v) + ((vi[1].v - vj[1].v) * dx[1].v) + - ((vi[2].v - vj[2].v) * dx[2].v); - dvdr.v = dvdr.v * ri.v; - - /* Get the time derivative for h. */ - pih_dt.v = mj.v / pjrho.v * dvdr.v * wi_dr.v; - - /* Compute the relative velocity. (This is 0 if the particles move away from - * each other and negative otherwise) */ - omega_ij.v = vec_fmin(dvdr.v, vec_set1(0.0f)); - - /* Compute signal velocity */ - v_sig.v = ci.v + cj.v - vec_set1(3.0f) * omega_ij.v; - - /* Compute viscosity tensor */ - Pi_ij.v = -balsara.v * vec_set1(const_viscosity_alpha) * v_sig.v * - omega_ij.v / (pirho.v + pjrho.v); - Pi_ij.v *= (wi_dr.v + wj_dr.v); - - /* Get the common factor out. */ - w.v = ri.v * ((piPOrho2.v * wi_dr.v + pjPOrho2.v * wj_dr.v) + - vec_set1(0.25f) * Pi_ij.v); - - /* Use the force, Luke! */ - for (k = 0; k < 3; k++) { - f.v = dx[k].v * w.v; - pia[k].v = mj.v * f.v; - } - - /* Get the time derivative for u. */ - piu_dt.v = - mj.v * dvdr.v * (piPOrho2.v * wi_dr.v + vec_set1(0.125f) * Pi_ij.v); - - /* compute the signal velocity (this is always symmetrical). */ - vi_sig.v = vec_fmax(vi_sig.v, v_sig.v); - vj_sig.v = vec_fmax(vj_sig.v, v_sig.v); - - /* Store the forces back on the particles. */ - for (k = 0; k < VEC_SIZE; k++) { - pi[k]->force.u_dt += piu_dt.f[k]; - pi[k]->force.h_dt -= pih_dt.f[k]; - pi[k]->force.v_sig = vi_sig.f[k]; - pj[k]->force.v_sig = vj_sig.f[k]; - for (j = 0; j < 3; j++) pi[k]->a[j] -= pia[j].f[k]; - } - -#else - - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_nonsym_force(R2[k], &Dx[3 * k], Hi[k], Hj[k], pi[k], pj[k]); - -#endif -} #endif /* SWIFT_RUNNER_IACT_LEGACY_H */ diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 846534e6f511d0b13516681d66a6d8bdbd043e4e..f0139b97492a060abb7edfc0014bdef55211ccbd 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -59,8 +59,18 @@ struct part { /* Particle time of end of time-step. */ float t_end; - /* Particle internal energy. */ - float u; + struct { + + /* Number of neighbours */ + float wcount; + + /* Number of neighbours spatial derivative */ + float wcount_dh; + + } density; + + /* Particle entropy. */ + float entropy; /* Particle density. */ float rho; @@ -69,52 +79,30 @@ struct part { */ float rho_dh; - /* Store density/force specific stuff. */ - union { - - struct { - - /* Particle velocity divergence. */ - float div_v; - - /* Derivative of particle number density. */ - float wcount_dh; - - /* Particle velocity curl. */ - float curl_v[3]; - - /* Particle number density. */ - float wcount; - - } density; - - struct { - - /* Balsara switch */ - float balsara; - - /* Aggregate quantities. */ - float POrho2; - - /* Change in particle energy over time. */ - float u_dt; - - /* Change in smoothing length over time. */ - float h_dt; - - /* Signal velocity */ - float v_sig; - - /* Sound speed */ - float c; - - } force; - }; - - /* Particle mass. */ float mass; + /* Particle pressure */ + float pressure; + + /* Entropy time derivative */ + float entropy_dt; + + /* Velocity divergence */ + float div_v; + + /* Velocity curl */ + float curl_v; + float rot_v[3]; + + /* Signal velocity */ + float v_sig; + + /* temp */ + struct { + float h_dt; //MATTHIEU + } force; + /* Particle ID. */ unsigned long long id; diff --git a/src/runner.c b/src/runner.c index 88675dcd776bd902e1a7e184ed16e95b00868672..e818a73fefd49f5f5e10bc27f992bfb7382f7ad7 100644 --- a/src/runner.c +++ b/src/runner.c @@ -36,6 +36,7 @@ /* Local headers. */ #include "atomic.h" #include "const.h" +#include "debug.h" #include "engine.h" #include "error.h" #include "scheduler.h" @@ -587,7 +588,7 @@ void runner_doghost(struct runner *r, struct cell *c) { if (p->t_end <= t_end) { /* Finish the density calculation */ - hydro_end_density(p); + hydro_end_density(p, t_end); /* If no derivative, double the smoothing length. */ if (p->density.wcount_dh == 0.0f) h_corr = p->h; @@ -620,7 +621,9 @@ void runner_doghost(struct runner *r, struct cell *c) { } /* We now have a particle whose smoothing length has converged */ - + //if(p->id == 1000) + // printParticle(parts, 1000, count); + /* As of here, particle force variables will be set. Do _NOT_ try to read any particle density variables! */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 12b54c88b6cddfc623a6f00c3108f7ab061af61b..f14cb521718ff481957e12134eb773368050eb1a 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1400,6 +1400,9 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { /* Get a pointer to the ith particle. */ pi = &parts[pid]; + if(pi->id == 1000) message("oO 1000"); + if(pi->id == 515050) message("oO 515050"); + /* Get the particle position and radius. */ for (k = 0; k < 3; k++) pix[k] = pi->x[k]; hi = pi->h; @@ -1624,6 +1627,10 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Get a pointer to the ith particle. */ pi = &parts[pid]; + if(pi->id == 1000) message("oO 1000"); + if(pi->id == 515050) message("oO 515050"); + + /* Get the particle position and radius. */ for (k = 0; k < 3; k++) pix[k] = pi->x[k]; hi = pi->h; diff --git a/src/single_io.c b/src/single_io.c index 10b84afeeeef77e72031536876ff9ebb4f37ca70..09a33ed96a77630fa72cf44668ff9c8a7a25fbc3 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -227,7 +227,7 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, int* N, readArray(h_grp, "Velocities", FLOAT, *N, 3, *parts, v, COMPULSORY); readArray(h_grp, "Masses", FLOAT, *N, 1, *parts, mass, COMPULSORY); readArray(h_grp, "SmoothingLength", FLOAT, *N, 1, *parts, h, COMPULSORY); - readArray(h_grp, "InternalEnergy", FLOAT, *N, 1, *parts, u, COMPULSORY); + readArray(h_grp, "InternalEnergy", FLOAT, *N, 1, *parts, entropy, COMPULSORY); //MATTHIEU readArray(h_grp, "ParticleIDs", ULONGLONG, *N, 1, *parts, id, COMPULSORY); // readArray(h_grp, "TimeStep", FLOAT, *N, 1, *parts, dt, OPTIONAL); readArray(h_grp, "Acceleration", FLOAT, *N, 3, *parts, a, OPTIONAL); @@ -469,8 +469,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { UNIT_CONV_MASS); writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, u, - us, UNIT_CONV_ENERGY_PER_UNIT_MASS); + writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, entropy, + us, UNIT_CONV_ENERGY_PER_UNIT_MASS); //MATTHIEU writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, id, us, UNIT_CONV_NO_UNITS); /* writeArray(h_grp, fileName, xmfFile, "TimeStep", FLOAT, N, 1, parts, dt, diff --git a/src/space.c b/src/space.c index 0d077b65652d9cfc3cfc9d2dbf595c98e4f28ffd..51391cbdea184f7bcfdc35fbb43d7f61937334ee 100644 --- a/src/space.c +++ b/src/space.c @@ -1198,7 +1198,7 @@ void space_init(struct space *s, double dim[3], struct part *parts, int N, xp->v_full[0] = p->v[0]; xp->v_full[1] = p->v[1]; xp->v_full[2] = p->v[2]; - xp->u_hdt = p->u; + //xp->u_hdt = p->u; // MATTHIEU } /* For now, clone the parts to make gparts. */ diff --git a/src/tools.c b/src/tools.c index 70c9e1def1946de7aaf00a209fe0983edd54d5f1..27be360ad992cc74ad7525339466a1677e997464 100644 --- a/src/tools.c +++ b/src/tools.c @@ -300,20 +300,22 @@ void density_dump(int N) { int k; float r2[4] = {0.0f, 0.0f, 0.0f, 0.0f}, hi[4], hj[4]; - struct part *pi[4], *pj[4], Pi[4], Pj[4]; + struct part /**pi[4], *pj[4],*/ Pi[4], Pj[4]; /* Init the interaction parameters. */ for (k = 0; k < 4; k++) { Pi[k].mass = 1.0f; Pi[k].rho = 0.0f; Pi[k].density.wcount = 0.0f; + Pi[k].id = k; Pj[k].mass = 1.0f; Pj[k].rho = 0.0f; Pj[k].density.wcount = 0.0f; + Pj[k].id = k+4; hi[k] = 1.0; hj[k] = 1.0; - pi[k] = &Pi[k]; - pj[k] = &Pj[k]; + //pi[k] = &Pi[k]; + //pj[k] = &Pj[k]; } for (k = 0; k <= N; k++) { @@ -325,17 +327,19 @@ void density_dump(int N) { Pj[0].density.wcount = 0; runner_iact_density(r2[0], NULL, hi[0], hj[0], &Pi[0], &Pj[0]); printf(" %e %e %e", r2[0], Pi[0].density.wcount, Pj[0].density.wcount); - Pi[0].density.wcount = 0; - Pj[0].density.wcount = 0; - Pi[1].density.wcount = 0; - Pj[1].density.wcount = 0; - Pi[2].density.wcount = 0; - Pj[2].density.wcount = 0; - Pi[3].density.wcount = 0; - Pj[3].density.wcount = 0; - runner_iact_vec_density(r2, NULL, hi, hj, pi, pj); - printf(" %e %e %e %e\n", Pi[0].density.wcount, Pi[1].density.wcount, - Pi[2].density.wcount, Pi[3].density.wcount); + /* Pi[0].density.wcount = 0; */ + /* Pj[0].density.wcount = 0; */ + /* Pi[1].density.wcount = 0; */ + /* Pj[1].density.wcount = 0; */ + /* Pi[2].density.wcount = 0; */ + /* Pj[2].density.wcount = 0; */ + /* Pi[3].density.wcount = 0; */ + /* Pj[3].density.wcount = 0; */ + /* runner_iact_vec_density(r2, NULL, hi, hj, pi, pj); */ + /* printf(" %e %e %e %e\n", Pi[0].density.wcount, Pi[1].density.wcount, */ + /* Pi[2].density.wcount, Pi[3].density.wcount); */ + + // MATTHIEU } } @@ -360,13 +364,8 @@ void engine_single_density(double *dim, long long int pid, /* Clear accumulators. */ ih = 1.0f / p.h; - p.rho = 0.0f; - p.rho_dh = 0.0f; - p.density.wcount = 0.0f; - p.density.wcount_dh = 0.0f; - p.density.div_v = 0.0; - for (k = 0; k < 3; k++) p.density.curl_v[k] = 0.0; - + hydro_init_part(&p); + /* Loop over all particle pairs (force). */ for (k = 0; k < N; k++) { if (parts[k].id == p.id) continue; @@ -411,13 +410,8 @@ void engine_single_force(double *dim, long long int pid, p = parts[k]; /* Clear accumulators. */ - p.a[0] = 0.0f; - p.a[1] = 0.0f; - p.a[2] = 0.0f; - p.force.u_dt = 0.0f; - p.force.h_dt = 0.0f; - p.force.v_sig = 0.0f; - + hydro_reset_acceleration(&p); + /* Loop over all particle pairs (force). */ for (k = 0; k < N; k++) { // for ( k = N-1 ; k >= 0 ; k-- ) { @@ -435,18 +429,13 @@ void engine_single_force(double *dim, long long int pid, r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2]; if (r2 < p.h * p.h * kernel_gamma2 || r2 < parts[k].h * parts[k].h * kernel_gamma2) { - p.a[0] = 0.0f; - p.a[1] = 0.0f; - p.a[2] = 0.0f; - p.force.u_dt = 0.0f; - p.force.h_dt = 0.0f; - p.force.v_sig = 0.0f; + hydro_reset_acceleration(&p); runner_iact_nonsym_force(r2, fdx, p.h, parts[k].h, &p, &parts[k]); } } /* Dump the result. */ - message("part %lli (h=%e) has a=[%.3e,%.3e,%.3e], udt=%e.", p.id, p.h, p.a[0], - p.a[1], p.a[2], p.force.u_dt); + message("part %lli (h=%e) has a=[%.3e,%.3e,%.3e]", p.id, p.h, p.a[0], + p.a[1], p.a[2]); fflush(stdout); }