diff --git a/.gitignore b/.gitignore index 79937010d21f73e20f1f60a80557a60235f0118e..c75a8f8fa2c16b8c092ab1a361b003d1e3384032 100644 --- a/.gitignore +++ b/.gitignore @@ -100,6 +100,7 @@ tests/testVoronoi3D tests/testDump tests/testLogger tests/benchmarkInteractions +tests/testGravityDerivatives theory/latex/swift.pdf theory/SPH/Kernels/kernels.pdf diff --git a/examples/Makefile.am b/examples/Makefile.am index 5501601f95bde15484142e994dbf3d6fa475da98..496d2c005004a1a68276fafb17e618c4b3ba9f8a 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -1,4 +1,4 @@ -# This file is part of SWIFT. +# tHIS FIle is part of SWIFT. # Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), # Matthieu Schaller (matthieu.schaller@durham.ac.uk). # @@ -95,9 +95,7 @@ EXTRA_DIST = BigCosmoVolume/makeIC.py \ EXTRA_DIST += parameter_example.yml # Scripts to plot task graphs -EXTRA_DIST += plot_tasks_MPI.py plot_tasks.py \ - analyse_tasks_MPI.py analyse_tasks.py \ - process_plot_tasks_MPI process_plot_tasks +EXTRA_DIST += plot_tasks.py analyse_tasks.py process_plot_tasks_MPI process_plot_tasks # Scripts to plot threadpool 'task' graphs EXTRA_DIST += analyse_threadpool_tasks.py \ diff --git a/src/cache.h b/src/cache.h index 70e1736dbcda070cf62a5e84e51cef67c3ae5716..22b79b1e49230c434a852da16512493e242ba3f9 100644 --- a/src/cache.h +++ b/src/cache.h @@ -370,11 +370,11 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( #ifdef SWIFT_DEBUG_CHECKS const float shift_threshold_x = - 4. * ci->width[0] * (1. + 2. * space_maxreldx); + 2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part); const float shift_threshold_y = - 4. * ci->width[1] * (1. + 2. * space_maxreldx); + 2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part); const float shift_threshold_z = - 4. * ci->width[2] * (1. + 2. * space_maxreldx); + 2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part); /* Make sure that particle positions have been shifted correctly. */ for (int i = 0; i < ci_cache_count; i++) { diff --git a/src/cell.c b/src/cell.c index 07d7f280f9165b2934d4bff38ff21fdac2cab92d..69b5c21b8e6d806408456da86e584461ac996399 100644 --- a/src/cell.c +++ b/src/cell.c @@ -82,73 +82,6 @@ int cell_getsize(struct cell *c) { return count; } -/** - * @brief Unpack the data of a given cell and its sub-cells. - * - * @param pc An array of packed #pcell. - * @param c The #cell in which to unpack the #pcell. - * @param s The #space in which the cells are created. - * - * @return The number of cells created. - */ -int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { - -#ifdef WITH_MPI - - /* Unpack the current pcell. */ - c->h_max = pc->h_max; - c->ti_end_min = pc->ti_end_min; - c->ti_end_max = pc->ti_end_max; - c->ti_old_part = pc->ti_old_part; - c->ti_old_gpart = pc->ti_old_gpart; - c->count = pc->count; - c->gcount = pc->gcount; - c->scount = pc->scount; - c->tag = pc->tag; - - /* Number of new cells created. */ - int count = 1; - - /* Fill the progeny recursively, depth-first. */ - for (int k = 0; k < 8; k++) - if (pc->progeny[k] >= 0) { - struct cell *temp; - space_getcells(s, 1, &temp); - temp->count = 0; - temp->gcount = 0; - temp->scount = 0; - temp->loc[0] = c->loc[0]; - temp->loc[1] = c->loc[1]; - temp->loc[2] = c->loc[2]; - temp->width[0] = c->width[0] / 2; - temp->width[1] = c->width[1] / 2; - temp->width[2] = c->width[2] / 2; - temp->dmin = c->dmin / 2; - if (k & 4) temp->loc[0] += temp->width[0]; - if (k & 2) temp->loc[1] += temp->width[1]; - if (k & 1) temp->loc[2] += temp->width[2]; - temp->depth = c->depth + 1; - temp->split = 0; - temp->dx_max_part = 0.f; - temp->dx_max_gpart = 0.f; - temp->dx_max_sort = 0.f; - temp->nodeID = c->nodeID; - temp->parent = c; - c->progeny[k] = temp; - c->split = 1; - count += cell_unpack(&pc[pc->progeny[k]], temp, s); - } - - /* Return the total number of unpacked cells. */ - c->pcell_size = count; - return count; - -#else - error("SWIFT was not compiled with MPI support."); - return 0; -#endif -} - /** * @brief Link the cells recursively to the given #part array. * @@ -233,7 +166,7 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) { * * @return The number of packed cells. */ -int cell_pack(struct cell *c, struct pcell *pc) { +int cell_pack(struct cell *restrict c, struct pcell *restrict pc) { #ifdef WITH_MPI @@ -267,26 +200,97 @@ int cell_pack(struct cell *c, struct pcell *pc) { #endif } +/** + * @brief Unpack the data of a given cell and its sub-cells. + * + * @param pc An array of packed #pcell. + * @param c The #cell in which to unpack the #pcell. + * @param s The #space in which the cells are created. + * + * @return The number of cells created. + */ +int cell_unpack(struct pcell *restrict pc, struct cell *restrict c, + struct space *restrict s) { + +#ifdef WITH_MPI + + /* Unpack the current pcell. */ + c->h_max = pc->h_max; + c->ti_end_min = pc->ti_end_min; + c->ti_end_max = pc->ti_end_max; + c->ti_old_part = pc->ti_old_part; + c->ti_old_gpart = pc->ti_old_gpart; + c->count = pc->count; + c->gcount = pc->gcount; + c->scount = pc->scount; + c->tag = pc->tag; + + /* Number of new cells created. */ + int count = 1; + + /* Fill the progeny recursively, depth-first. */ + for (int k = 0; k < 8; k++) + if (pc->progeny[k] >= 0) { + struct cell *temp; + space_getcells(s, 1, &temp); + temp->count = 0; + temp->gcount = 0; + temp->scount = 0; + temp->loc[0] = c->loc[0]; + temp->loc[1] = c->loc[1]; + temp->loc[2] = c->loc[2]; + temp->width[0] = c->width[0] / 2; + temp->width[1] = c->width[1] / 2; + temp->width[2] = c->width[2] / 2; + temp->dmin = c->dmin / 2; + if (k & 4) temp->loc[0] += temp->width[0]; + if (k & 2) temp->loc[1] += temp->width[1]; + if (k & 1) temp->loc[2] += temp->width[2]; + temp->depth = c->depth + 1; + temp->split = 0; + temp->dx_max_part = 0.f; + temp->dx_max_gpart = 0.f; + temp->dx_max_sort = 0.f; + temp->nodeID = c->nodeID; + temp->parent = c; + c->progeny[k] = temp; + c->split = 1; + count += cell_unpack(&pc[pc->progeny[k]], temp, s); + } + + /* Return the total number of unpacked cells. */ + c->pcell_size = count; + return count; + +#else + error("SWIFT was not compiled with MPI support."); + return 0; +#endif +} + /** * @brief Pack the time information of the given cell and all it's sub-cells. * * @param c The #cell. - * @param ti_ends (output) The time information we pack into + * @param pcells (output) The end-of-timestep information we pack into * * @return The number of packed cells. */ -int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends) { +int cell_pack_end_step(struct cell *restrict c, + struct pcell_step *restrict pcells) { #ifdef WITH_MPI /* Pack this cell's data. */ - ti_ends[0] = c->ti_end_min; + pcells[0].ti_end_min = c->ti_end_min; + pcells[0].dx_max_part = c->dx_max_part; + pcells[0].dx_max_gpart = c->dx_max_gpart; /* Fill in the progeny, depth-first recursion. */ int count = 1; for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { - count += cell_pack_ti_ends(c->progeny[k], &ti_ends[count]); + count += cell_pack_end_step(c->progeny[k], &pcells[count]); } /* Return the number of packed values. */ @@ -302,22 +306,25 @@ int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends) { * @brief Unpack the time information of a given cell and its sub-cells. * * @param c The #cell - * @param ti_ends The time information to unpack + * @param pcells The end-of-timestep information to unpack * * @return The number of cells created. */ -int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends) { +int cell_unpack_end_step(struct cell *restrict c, + struct pcell_step *restrict pcells) { #ifdef WITH_MPI /* Unpack this cell's data. */ - c->ti_end_min = ti_ends[0]; + c->ti_end_min = pcells[0].ti_end_min; + c->dx_max_part = pcells[0].dx_max_part; + c->dx_max_gpart = pcells[0].dx_max_gpart; /* Fill in the progeny, depth-first recursion. */ int count = 1; for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { - count += cell_unpack_ti_ends(c->progeny[k], &ti_ends[count]); + count += cell_unpack_end_step(c->progeny[k], &pcells[count]); } /* Return the number of packed values. */ diff --git a/src/cell.h b/src/cell.h index 9c6bfa3431bba5f84bbdd16c4e6a1842f924523e..efc90e90040a46b4b5b631dec190762056bed151 100644 --- a/src/cell.h +++ b/src/cell.h @@ -70,24 +70,63 @@ struct link { struct link *next; }; -/* Packed cell. */ +/** + * @brief Packed cell for information correct at rebuild time. + * + * Contains all the information for a tree walk in a non-local cell. + */ struct pcell { - /* Stats on this cell's particles. */ + /*! Maximal smoothing length. */ double h_max; - integertime_t ti_end_min, ti_end_max, ti_beg_max, ti_old_part, ti_old_gpart; - /* Number of particles in this cell. */ - int count, gcount, scount; + /*! Minimal integer end-of-timestep in this cell */ + integertime_t ti_end_min; + + /*! Maximal integer end-of-timestep in this cell */ + integertime_t ti_end_max; + + /*! Maximal integer beginning-of-timestep in this cell */ + integertime_t ti_beg_max; + + /*! Integer time of the last drift of the #part in this cell */ + integertime_t ti_old_part; + + /*! Integer time of the last drift of the #gpart in this cell */ + integertime_t ti_old_gpart; + + /*! Number of #part in this cell. */ + int count; + + /*! Number of #gpart in this cell. */ + int gcount; + + /*! Number of #spart in this cell. */ + int scount; - /* tag used for MPI communication. */ + /*! tag used for MPI communication. */ int tag; - /* Relative indices of the cell's progeny. */ + /*! Relative indices of the cell's progeny. */ int progeny[8]; } SWIFT_STRUCT_ALIGN; +/** + * @brief Cell information at the end of a time-step. + */ +struct pcell_step { + + /*! Minimal integer end-of-timestep in this cell */ + integertime_t ti_end_min; + + /*! Maximal distance any #part has travelled since last rebuild */ + float dx_max_part; + + /*! Maximal distance any #gpart has travelled since last rebuild */ + float dx_max_gpart; +}; + /** * @brief Cell within the tree structure. * @@ -389,8 +428,8 @@ int cell_slocktree(struct cell *c); void cell_sunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); -int cell_pack_ti_ends(struct cell *c, integertime_t *ti_ends); -int cell_unpack_ti_ends(struct cell *c, integertime_t *ti_ends); +int cell_pack_end_step(struct cell *c, struct pcell_step *pcell); +int cell_unpack_end_step(struct cell *c, struct pcell_step *pcell); int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); diff --git a/src/runner.c b/src/runner.c index 7d00045218ba9beb476220c142f87ae7ae9b0e10..dab985f928c7d4f77ec4cab921216682211a3195 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1961,7 +1961,7 @@ void *runner_main(void *data) { break; case task_type_recv: if (t->subtype == task_subtype_tend) { - cell_unpack_ti_ends(ci, t->buff); + cell_unpack_end_step(ci, t->buff); free(t->buff); } else if (t->subtype == task_subtype_xv) { runner_do_recv_part(r, ci, 1, 1); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index bcbd0b7425747ea5e9cf0bcccb1d653d8b8d8126..3ea935b950a3989fbb79499a81a54f1bf8aca432 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -808,6 +808,15 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, cj->dx_max_sort_old); } + + /* Some constants used to checks that the parts are in the right frame */ + const float shift_threshold_x = + 2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part); + const float shift_threshold_y = + 2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part); + const float shift_threshold_z = + 2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part); + #endif /* SWIFT_DEBUG_CHECKS */ /* Get some other useful values. */ @@ -859,6 +868,32 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); @@ -913,6 +948,32 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); @@ -1050,6 +1111,15 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort, cj->dx_max_sort_old); } + + /* Some constants used to checks that the parts are in the right frame */ + const float shift_threshold_x = + 2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part); + const float shift_threshold_y = + 2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part); + const float shift_threshold_z = + 2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part); + #endif /* SWIFT_DEBUG_CHECKS */ /* Get some other useful values. */ @@ -1154,6 +1224,32 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); @@ -1188,6 +1284,32 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); @@ -1252,6 +1374,32 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); @@ -1288,6 +1436,32 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS + /* Check that particles are in the correct frame after the shifts */ + if (pix > shift_threshold_x || pix < -shift_threshold_x) + error( + "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)", + pix, ci->width[0]); + if (piy > shift_threshold_y || piy < -shift_threshold_y) + error( + "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)", + piy, ci->width[1]); + if (piz > shift_threshold_z || piz < -shift_threshold_z) + error( + "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)", + piz, ci->width[2]); + if (pjx > shift_threshold_x || pjx < -shift_threshold_x) + error( + "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)", + pjx, ci->width[0]); + if (pjy > shift_threshold_y || pjy < -shift_threshold_y) + error( + "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)", + pjy, ci->width[1]); + if (pjz > shift_threshold_z || pjz < -shift_threshold_z) + error( + "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)", + pjz, ci->width[2]); + /* Check that particles have been drifted to the current time */ if (pi->ti_drift != e->ti_current) error("Particle pi not drifted to current time"); diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index bd385cb0ad3b657b614d8755f41e48729f13a9f9..5e982830c609bcf2ce533c0423449a3e74e96bf4 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -325,7 +325,7 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( last_pj = active_id; /* Find the maximum index into cell i for each particle in range in cell j. */ - if (last_pj > 0) { + if (last_pj >= 0) { /* Start from the last particle in cell i. */ temp = ci->count - 1; diff --git a/src/scheduler.c b/src/scheduler.c index b1cc1a572d3344e7b1e2338c7594da0edff58919..b2466d55104dbb68d7efed008af1290c4fd73212 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -1274,10 +1274,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_recv: #ifdef WITH_MPI if (t->subtype == task_subtype_tend) { - t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size); - err = MPI_Irecv(t->buff, t->ci->pcell_size * sizeof(integertime_t), - MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, - &t->req); + t->buff = malloc(sizeof(struct pcell_step) * t->ci->pcell_size); + err = MPI_Irecv( + t->buff, t->ci->pcell_size * sizeof(struct pcell_step), MPI_BYTE, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); } else if (t->subtype == task_subtype_xv || t->subtype == task_subtype_rho || t->subtype == task_subtype_gradient) { @@ -1309,11 +1309,11 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_send: #ifdef WITH_MPI if (t->subtype == task_subtype_tend) { - t->buff = malloc(sizeof(integertime_t) * t->ci->pcell_size); - cell_pack_ti_ends(t->ci, t->buff); - err = MPI_Isend(t->buff, t->ci->pcell_size * sizeof(integertime_t), - MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, - &t->req); + t->buff = malloc(sizeof(struct pcell_step) * t->ci->pcell_size); + cell_pack_end_step(t->ci, t->buff); + err = MPI_Isend( + t->buff, t->ci->pcell_size * sizeof(struct pcell_step), MPI_BYTE, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); } else if (t->subtype == task_subtype_xv || t->subtype == task_subtype_rho || t->subtype == task_subtype_gradient) { diff --git a/tests/testActivePair.sh.in b/tests/testActivePair.sh.in index ff8d027a469bd9bc78286b843cf2dffd3ef27ad3..108da19c5c667a6a46a81707a7508a27f39f6b38 100755 --- a/tests/testActivePair.sh.in +++ b/tests/testActivePair.sh.in @@ -4,8 +4,19 @@ echo "" rm -f brute_force_pair_active.dat swift_dopair_active.dat +echo "Running ./testActivePair -n 6 -r 1 -d 0 -f active" + ./testActivePair -n 6 -r 1 -d 0 -f active python @srcdir@/difffloat.py brute_force_active.dat swift_dopair_active.dat @srcdir@/tolerance_pair_active.dat +rm -f brute_force_pair_active.dat swift_dopair_active.dat + +# Run the special case that triggered a bug. See merge request !435. +echo "Running ./testActivePair -n 6 -r 1 -d 0 -f active -s 1506434777" + +./testActivePair -n 6 -r 1 -d 0 -f active -s 1506434777 + +python @srcdir@/difffloat.py brute_force_active.dat swift_dopair_active.dat @srcdir@/tolerance_pair_active.dat + exit $? diff --git a/tests/testGravityDerivatives.c b/tests/testGravityDerivatives.c index eb7894a464a7e733c26bf3c8a9d40dd8a709f426..1e58dcc49a9fe277ddbc6982b71cfd741992e3b3 100644 --- a/tests/testGravityDerivatives.c +++ b/tests/testGravityDerivatives.c @@ -985,7 +985,7 @@ int main() { #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - tol *= 2.; + tol *= 2.5; /* 3rd order terms */ test(pot.D_300, D_300(dx, dy, dz, r_inv), tol, min, "D_300"); @@ -1018,10 +1018,9 @@ int main() { test(pot.D_121, D_121(dx, dy, dz, r_inv), tol, min, "D_121"); test(pot.D_112, D_112(dx, dy, dz, r_inv), tol, min, "D_112"); #endif - #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 - tol *= 2.; + tol *= 2.5; /* 5th order terms */ test(pot.D_500, D_500(dx, dy, dz, r_inv), tol, min, "D_500"); diff --git a/tests/tolerance_periodic_BC_perturbed.dat b/tests/tolerance_periodic_BC_perturbed.dat index df5ee6458ba05eed08006586514467fcdb715990..d24941308ff7f8a282bce5d551180ace37f3f9d0 100644 --- a/tests/tolerance_periodic_BC_perturbed.dat +++ b/tests/tolerance_periodic_BC_perturbed.dat @@ -1,4 +1,4 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 3e-6 4e-5 1e-3 1e-2 2e-4 1e-4 1e-4 1e-4 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 6e-3 1e-4 3e-3 1e-2 6e-3 6e-3 6e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-6 7e-3 1e-4 3e-3 1e-2 6e-3 6e-3 6e-3 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-6 1e0 5e-4 3e-3 3e-3 3e-3 diff --git a/theory/Multipoles/bibliography.bib b/theory/Multipoles/bibliography.bib index 193db42ea4947e49930b79cbd663562d971ec2d4..c3d1289584cab55cd8e0d4d0765d70e22f0fcf2e 100644 --- a/theory/Multipoles/bibliography.bib +++ b/theory/Multipoles/bibliography.bib @@ -161,4 +161,34 @@ keywords = "adaptive algorithms" } +@ARTICLE{Springel2005, + author = {{Springel}, V.}, + title = "{The cosmological simulation code GADGET-2}", + journal = {\mnras}, + eprint = {astro-ph/0505010}, + keywords = {methods: numerical, galaxies: interactions, dark matter}, + year = 2005, + month = dec, + volume = 364, + pages = {1105-1134}, + doi = {10.1111/j.1365-2966.2005.09655.x}, + adsurl = {http://adsabs.harvard.edu/abs/2005MNRAS.364.1105S}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + +@ARTICLE{Bagla2003, + author = {{Bagla}, J.~S. and {Ray}, S.}, + title = "{Performance characteristics of TreePM codes}", + journal = {\na}, + eprint = {astro-ph/0212129}, + year = 2003, + month = sep, + volume = 8, + pages = {665-677}, + doi = {10.1016/S1384-1076(03)00056-3}, + adsurl = {http://adsabs.harvard.edu/abs/2003NewA....8..665B}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + diff --git a/theory/Multipoles/fmm_standalone.tex b/theory/Multipoles/fmm_standalone.tex index d3030dc52c53eca421521023649d09522b39b7bf..65d6b522f3a6a1f9ede41091a39f4f5145cf041c 100644 --- a/theory/Multipoles/fmm_standalone.tex +++ b/theory/Multipoles/fmm_standalone.tex @@ -4,6 +4,7 @@ \usepackage{times} \usepackage{comment} +\newcommand{\gadget}{{\sc Gadget}\xspace} \newcommand{\swift}{{\sc Swift}\xspace} \newcommand{\nbody}{$N$-body\xspace} @@ -32,7 +33,7 @@ Making gravity great again. \input{potential_softening} \input{fmm_summary} -\input{gravity_derivatives} +%\input{gravity_derivatives} \input{mesh_summary} \bibliographystyle{mnras} diff --git a/theory/Multipoles/fmm_summary.tex b/theory/Multipoles/fmm_summary.tex index 1ff9ab88ada6836d6118c7cfd74e39f4d1c504b3..051f01db866994b51e86d91059b8e3e06d317835 100644 --- a/theory/Multipoles/fmm_summary.tex +++ b/theory/Multipoles/fmm_summary.tex @@ -162,21 +162,21 @@ read: All the kernels (Eqs.~\ref{eq:fmm:P2M}-\ref{eq:fmm:L2L}) are rather straightforward to evaluate as they are only made of additions and -multiplications (provided $\mathsf{D}$ can be evaluated quickly, see -Sec.~\ref{ssec:grav_derivatives}), which are extremely efficient -instructions on modern architectures. However, the fully expanded sums -can lead to rather large and prone to typo expressions. To avoid any -mishaps, we use a \texttt{python} script to generate C code in which -all the sums are unrolled and correct by construction. In \swift, we -implemented the kernels up to order $p=5$, as it proved to be accurate -enough for our purpose, but this could be extended to higher order -easily. This implies storing $56$ numbers per cell for each -$\textsf{M}$ and $\textsf{F}$ plus three numbers for the location of -the centre of mass. For leaf-cells with large numbers of particles, as -in \swift, this is a small memory overhead. One further small -improvement consists in choosing $\mathbf{z}_A$ to be the centre of -mass of cell $A$ rather than its geometrical centre. The first order -multipoles ($\mathsf{M}_{100},\mathsf{M}_{010},\mathsf{M}_{001}$) then -vanish by construction. This allows us to simplify some of the -expressions and helps reduce, albeit by a small fraction, the memory -footprint of the tree structure. +multiplications (provided $\mathsf{D}$ can be evaluated quickly), +which are extremely efficient instructions on modern +architectures. However, the fully expanded sums can lead to rather +large and prone to typo expressions. To avoid any mishaps, we use a +\texttt{python} script to generate C code in which all the sums are +unrolled and correct by construction. In \swift, we implemented the +kernels up to order $p=5$, as it proved to be accurate enough for our +purpose, but this could be extended to higher order easily. This +implies storing $56$ numbers per cell for each $\textsf{M}$ and +$\textsf{F}$ plus three numbers for the location of the centre of +mass. For leaf-cells with large numbers of particles, as in \swift, +this is a small memory overhead. One further small improvement +consists in choosing $\mathbf{z}_A$ to be the centre of mass of cell +$A$ rather than its geometrical centre. The first order multipoles +($\mathsf{M}_{100},\mathsf{M}_{010},\mathsf{M}_{001}$) then vanish by +construction. This allows us to simplify some of the expressions and +helps reduce, albeit by a small fraction, the memory footprint of the +tree structure. diff --git a/theory/Multipoles/mesh_summary.tex b/theory/Multipoles/mesh_summary.tex index 3069257c8845804d9a307cc54fffec5e36e4ae8c..5e9a0a2fed2d95474a975aed39c17c117118970f 100644 --- a/theory/Multipoles/mesh_summary.tex +++ b/theory/Multipoles/mesh_summary.tex @@ -1,33 +1,82 @@ \subsection{Coupling the FMM to a mesh for periodic long-range forces} \label{ssec:mesh_summary} -\begin{equation} - S(x) = \frac{e^x}{1 + e^x} -\end{equation} +We truncate the potential and forces computed via the FMM using a +smooth function that drops quickly to zero at some scale $r_s$ set by +the top-level mesh. Traditionally, implementations have used +expressions which are cheap to evaluate in Fourier space +\citep[e.g.][]{Bagla2003, + Springel2005}. This, however, implies a large cost for each +interaction computed within the tree as the real-space truncation +function won't have a simple analytic form that can be evaluated +efficiently by computers. Since the FMM scheme involves to not only +evaluate the forces but higher-order derivatives, a more appropiate +choice is necessary. We use the sigmoid $S(x) \equiv \frac{e^x}{1 + e^x}$ +as the basis of our truncation function write for the potential: \begin{align} - \varphi_s(r) &= \frac{1}{r}\left[2 - 2S\left(\frac{2r}{r_s}\right)\right] \nonumber\\ - &= \frac{1}{r}\left[2 - \frac{2e^{\frac{2r}{r_s}}}{1+e^{\frac{2r}{r_s}}}\right] + \varphi_s(r) &= \frac{1}{r} \chi(r, r_s) = \frac{1}{r}\times\left[2 - 2S\left(\frac{2r}{r_s}\right)\right].% \nonumber\\ + %&= \frac{1}{r}\left[2 - \frac{2e^{\frac{2r}{r_s}}}{1+e^{\frac{2r}{r_s}}}.\right] \end{align} +This function alongside the trunctation function used in \gadget is +shown on Fig.~\ref{fig:fmm:potential_short}. This choice of $S(x)$ can +seem rather cumbersome at first but writing $\alpha(x) \equiv (1+e^x)^{-1}$, +one can expressed all derivatives of $S(x)$ as simple polynomials in +$\alpha(x)$, which are easy to evaluate. For instance, in the case of +the direct force evaluation between two particles, we obtain + + \begin{align} - |\mathbf{f}_s(r)| &= \frac{1}{r^2}\left[\frac{4r}{r_s}S'\left(\frac{2r}{r_s}\right) - 2S\left(\frac{2r}{r_s}\right) + 2\right] \nonumber \\ - &= \frac{1}{r^2}\left[\frac{4r}{r_s}\frac{e^{\frac{2r}{r_s}}}{(1+e^{\frac{2r}{r_s}})^2} - \frac{2e^{\frac{2r}{r_s}}}{1+e^{\frac{2r}{r_s}}} + 2\right] + |\mathbf{f}_s(r)| &= + \frac{1}{r^2}\times\left[\frac{4r}{r_s}S'\left(\frac{2r}{r_s}\right) - + 2S\left(\frac{2r}{r_s}\right) + 2\right] \nonumber \\ + %&= + %\frac{1}{r^2}\left[\frac{4r}{r_s}\frac{e^{\frac{2r}{r_s}}}{(1+e^{\frac{2r}{r_s}})^2} + %- \frac{2e^{\frac{2r}{r_s}}}{1+e^{\frac{2r}{r_s}}} + 2\right] + &= + \frac{1}{r^2}\times 2 \left[x\alpha(x) - x\alpha(x)^2 - e^x\alpha(x) + 1\right], \end{align} +with $x\equiv2r/r_s$. The truncated force is compared to the Newtonian +force on Fig.~\ref{fig:fmm:force_short}. At distance $r<r_s/10$, the +truncation term is negligibly close to one and the truncated forces +can be replaced by their Newtonian equivalent. We use this +optimization in \swift and only compute truncated forces between pairs +of particles that are in tree-leaves larger than $1/10$ of the mesh +size or between two tree-leaves distant by more than that amount. + +The truncation function in Fourier space reads \begin{equation} - \tilde\varphi_l(k) = \frac{1}{k^2}\left[\frac{\upi}{2}kr_s\textrm{csch}\left(\frac{\upi}{2}kr_s\right) \right] + \tilde\varphi_l(k) = + \frac{1}{k^2}\left[\frac{\upi}{2}kr_s\textrm{csch}\left(\frac{\upi}{2}kr_s\right) + \right] \end{equation} \begin{figure} \includegraphics[width=\columnwidth]{potential_short.pdf} -\caption{aa} +\caption{Truncated potential used in \swift (green line) and \gadget + (yellow line) alongside the full Newtonian potential (blue dasheed + line). The green dash-dotted line corresponds to the same + trunctation function where the exponential in the sigmoid is + replaced by a sixth order Taylor expansion. At $r>4r_s$, the + truncated potential becomes negligible.} \label{fig:fmm:potential_short} \end{figure} + \begin{figure} \includegraphics[width=\columnwidth]{force_short.pdf} -\caption{bb} +\caption{used in \swift (green line) and \gadget + (yellow line) alongside the full Newtonian force term (blue dasheed + line). The green dash-dotted line corresponds to the same + trunctation function where the exponential in the sigmoid is + replaced by a sixth order Taylor expansion. At $r<r_s/10$, the + truncated forces becomes almost equal to the Newtonian ones and can + safely be replaced by their simpler form. The deviation between the + exact expression and the one obtained from Taylor expansion at + $r>r_s$ has a small impact since no pairs of particles should + interact directly over distances of order the mesh size. } \label{fig:fmm:force_short} \end{figure} diff --git a/theory/Multipoles/plot_mesh.py b/theory/Multipoles/plot_mesh.py index 6706016f73b4b6251c6d517ec89eacbb7a469417..2468625d1408556f4f5fb00db17d5a331becdac4 100644 --- a/theory/Multipoles/plot_mesh.py +++ b/theory/Multipoles/plot_mesh.py @@ -52,7 +52,7 @@ colors=['#4477AA', '#CC6677', '#DDCC77', '#117733'] # Parameters r_s = 2. -r_min = 1e-2 +r_min = 3e-2 r_max = 1.5e2 # Radius @@ -68,35 +68,56 @@ phit_newton = 1. / k**2 force_newton = 1. / r**2 def my_exp(x): - return 1. + x + (x**2 / 2.) + (x**3 / 6.) + (x**4 / 24.) + (x**5 / 120.)# + (x**6 / 720.) + return 1. + x + (x**2 / 2.) + (x**3 / 6.) + (x**4 / 24.) + (x**5 / 120.) + (x**6 / 720.) #return exp(x) - + +def term(x): # 1 / (1 + e^x) + return 1. / (1. + exp(x)) + +def my_term(x): # 1 / (1 + e^x) + #return 0.5 - 0.25 * x + (x**3 / 48.) - (x**5 / 480) + return 1. / (1. + my_exp(x)) + def csch(x): # hyperbolic cosecant return 1. / sinh(x) def sigmoid(x): - return my_exp(x) / (my_exp(x) + 1.) + return exp(x) * term(x) def d_sigmoid(x): - return my_exp(x) / ((my_exp(x) + 1)**2) + return exp(x) * term(x)**2 + +def my_sigmoid(x): + #return my_exp(x) / (my_exp(x) + 1.) + return my_exp(x) * my_term(x) + +def my_d_sigmoid(x): + #return my_exp(x) / ((my_exp(x) + 1)**2) + return my_exp(x) * my_term(x)**2 def swift_corr(x): return 2 * sigmoid( 4 * x ) - 1 -#figure() -#x = linspace(-4, 4, 100) -#plot(x, special.erf(x), '-', color=colors[0]) -#plot(x, swift_corr(x), '-', color=colors[1]) -#plot(x, x, '-', color=colors[2]) -#ylim(-1.1, 1.1) -#xlim(-4.1, 4.1) -#savefig("temp.pdf") +def swift_corr2(x): + return 2 * my_sigmoid( 4 * x ) - 1 + +figure() +x = linspace(-4, 4, 100) +plot(x, special.erf(x), '-', color=colors[2]) +plot(x, swift_corr(x), '-', color=colors[3]) +plot(x, swift_corr2(x), '-.', color=colors[3]) +plot(x, x, '-', color=colors[0]) +ylim(-1.1, 1.1) +xlim(-4.1, 4.1) +savefig("temp.pdf") # Correction in real space corr_short_gadget2 = special.erf(r / (2.*r_s)) -corr_short_swift = swift_corr(r / (2.*r_s)) -eta_short_gadget2 = special.erfc(r / 2.*r_s) + (r / (r_s * math.sqrt(math.pi))) * exp(-r**2 / (4.*r_s**2)) +corr_short_swift = swift_corr(r / (2.*r_s)) +corr_short_swift2 = swift_corr2(r / (2.*r_s)) +eta_short_gadget2 = special.erfc(r / (2.*r_s)) + (r / (r_s * math.sqrt(math.pi))) * exp(-r**2 / (4.*r_s**2)) eta_short_swift = 4. * (r / r_s) * d_sigmoid(2. * r / r_s) - 2. * sigmoid(2 * r / r_s) + 2. +eta_short_swift2 = 4. * (r / r_s) * my_d_sigmoid(2. * r / r_s) - 2. * my_sigmoid(2 * r / r_s) + 2. # Corection in Fourier space corr_long_gadget2 = exp(-k**2*r_s**2) @@ -105,8 +126,10 @@ corr_long_swift = math.pi * k * r_s * csch(0.5 * math.pi * r_s * k) / 2. # Shortrange term phi_short_gadget2 = (1. / r ) * (1. - corr_short_gadget2) phi_short_swift = (1. / r ) * (1. - corr_short_swift) +phi_short_swift2 = (1. / r ) * (1. - corr_short_swift2) force_short_gadget2 = (1. / r**2) * eta_short_gadget2 force_short_swift = (1. / r**2) * eta_short_swift +force_short_swift2 = (1. / r**2) * eta_short_swift2 # Long-range term phi_long_gadget2 = (1. / r ) * corr_short_gadget2 @@ -125,6 +148,7 @@ subplot(311, xscale="log", yscale="log") plot(r_rs, phi_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) plot(r_rs, phi_short_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) plot(r_rs, phi_short_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +plot(r_rs, phi_short_swift2, '-.', lw=1.4, color=colors[3]) plot([1., 1.], [1e-5, 1e5], 'k-', alpha=0.5, lw=0.5) xlim(1.1*r_min/ r_s, 0.9*r_max / r_s) @@ -138,8 +162,11 @@ subplot(312, xscale="log", yscale="log") plot(r_rs, np.ones(np.size(r)), '--', lw=1.4, color=colors[0]) plot(r_rs, 1. - corr_short_gadget2, '-', lw=1.4, color=colors[2]) plot(r_rs, 1. - corr_short_swift, '-', lw=1.4, color=colors[3]) +plot(r_rs, 1. - corr_short_swift2, '-.', lw=1.4, color=colors[3]) plot(r_rs, np.ones(np.size(r))*0.01, 'k:', alpha=0.5, lw=0.5) plot([1., 1.], [-1e5, 1e5], 'k-', alpha=0.5, lw=0.5) +plot([-1, -1], [-1, -1], 'k-', lw=1.2, label="${\\textrm{Exact}~e^x}$") +plot([-1, -1], [-1, -1], 'k-.', lw=1.2, label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$") yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) xlim(1.1*r_min/r_s, 0.9*r_max/r_s) @@ -147,10 +174,13 @@ ylim(3e-3, 1.5) #ylabel("$\\chi_s(r)$", labelpad=-3) ylabel("$\\varphi_s(r) \\times r$", labelpad=-2) +legend(loc="center left", frameon=False, handletextpad=0.1, handlelength=2.2, fontsize=7) + # 1 - Correction subplot(313, xscale="log", yscale="log") plot(r_rs, corr_short_gadget2, '-', lw=1.4, color=colors[2]) plot(r_rs, corr_short_swift, '-', lw=1.4, color=colors[3]) +plot(r_rs, corr_short_swift2, '-.', lw=1.4, color=colors[3]) plot([1., 1.], [1e-5, 1e5], 'k-', alpha=0.5, lw=0.5) plot(r_rs, np.ones(np.size(r)), 'k:', alpha=0.5, lw=0.5) @@ -161,7 +191,7 @@ ylim(3e-3, 1.5) #ylabel("$1 - \\chi_s(r)$", labelpad=-2) ylabel("$1 - \\varphi_s(r) \\times r$", labelpad=-2) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) -xlabel("$r / r_s$", labelpad=-3) +xlabel("$r / r_s$", labelpad=1) savefig("potential_short.pdf") @@ -175,6 +205,7 @@ subplot(311, xscale="log", yscale="log") plot(r_rs, force_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) plot(r_rs, force_short_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) plot(r_rs, force_short_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +plot(r_rs, force_short_swift2, '-.', lw=1.4, color=colors[3]) plot([1., 1.], [1e-5, 1e5], 'k-', alpha=0.5, lw=0.5) xlim(1.1*r_min/ r_s, 0.9*r_max / r_s) @@ -189,8 +220,11 @@ subplot(312, xscale="log", yscale="log") plot(r_rs, np.ones(np.size(r)), '--', lw=1.4, color=colors[0]) plot(r_rs, eta_short_gadget2, '-', lw=1.4, color=colors[2]) plot(r_rs, eta_short_swift, '-', lw=1.4, color=colors[3]) +plot(r_rs, eta_short_swift2, '-.', lw=1.4, color=colors[3]) plot(r_rs, np.ones(np.size(r))*0.01, 'k:', alpha=0.5, lw=0.5) plot([1., 1.], [-1e5, 1e5], 'k-', alpha=0.5, lw=0.5) +plot([-1, -1], [-1, -1], 'k-', lw=1.2, label="${\\textrm{Exact}~e^x}$") +plot([-1, -1], [-1, -1], 'k-.', lw=1.2, label="${6^\\textrm{th}~\\textrm{order~series}~e^x}$") yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) xlim(1.1*r_min/r_s, 0.9*r_max/r_s) @@ -198,10 +232,13 @@ ylim(3e-3, 1.5) #ylabel("$\\eta_s(r)$", labelpad=-3) ylabel("$|\\mathbf{f}_s(r)|\\times r^2$", labelpad=-2) +legend(loc="center left", frameon=False, handletextpad=0.1, handlelength=2.2, fontsize=7) + # 1 - Correction subplot(313, xscale="log", yscale="log") plot(r_rs, 1. - eta_short_gadget2, '-', lw=1.4, color=colors[2]) plot(r_rs, 1. - eta_short_swift, '-', lw=1.4, color=colors[3]) +plot(r_rs, 1. - eta_short_swift2, '-.', lw=1.4, color=colors[3]) plot([1., 1.], [1e-5, 1e5], 'k-', alpha=0.5, lw=0.5) plot(r_rs, np.ones(np.size(r)), 'k:', alpha=0.5, lw=0.5) @@ -212,7 +249,7 @@ ylim(3e-3, 1.5) #ylabel("$1 - \\eta_s(r)$", labelpad=-2) ylabel("$1 - |\\mathbf{f}_s(r)|\\times r^2$", labelpad=-3) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) -xlabel("$r / r_s$", labelpad=-3) +xlabel("$r / r_s$", labelpad=1) savefig("force_short.pdf") @@ -225,7 +262,6 @@ subplot(311, xscale="log", yscale="log") plot(k_rs, phit_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) plot(k_rs, phit_long_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) plot(k_rs, phit_long_swift, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) -plot(k_rs, -phit_long_swift, ':', lw=1.4, color=colors[3]) plot([1., 1.], [1e-5, 1e5], 'k-', alpha=0.5, lw=0.5) legend(loc="lower left", frameon=True, handletextpad=0.1, handlelength=3.2, fontsize=8) @@ -262,6 +298,6 @@ ylim(3e-3, 1.5) ylabel("$1 - k^2 \\times \\tilde{\\varphi_l}(k)$", labelpad=-3) yticks([1e-2, 1e-1, 1], ["$0.01$", "$0.1$", "$1$"]) -xlabel("$k \\times r_s$", labelpad=0) +xlabel("$k \\times r_s$", labelpad=1) savefig("potential_long.pdf") diff --git a/theory/Multipoles/potential_derivatives.tex b/theory/Multipoles/potential_derivatives.tex index d1dba978663e966f2132a65133b3c2fec5e707b6..9dc9244ca56eaa56047f561aa221dd6a761e7a5a 100644 --- a/theory/Multipoles/potential_derivatives.tex +++ b/theory/Multipoles/potential_derivatives.tex @@ -3,137 +3,237 @@ For completeness, we give here the full expression for the first few derivatives of the potential that are used in our FMM scheme. We use -the notation $\mathbf{r}=(r_x, r_y, r_z)$, $r = |\mathbf{r}|$ and -$u=r/H$. We can construct the higher order derivatives by successively -applying the "chain rule". We show representative examples of the -first few relevant ones here split by order. We start by constructing -common quantities that appear in derivatives of multiple orders. +the notation $\mathbf{r}=(r_x, r_y, r_z)$, $r = |\mathbf{r}|$, $u=r/H$ +and $x=2r/r_s$. We also assume $H \ll r_s$. We can construct the higher order derivatives by +successively applying the "chain rule". We show representative +examples of the first few relevant ones here split by order. We start +by constructing derivatives of the truncated potentials: \begin{align} - \mathsf{\tilde{D}}_{1}(r, u, H) = + \alpha(x) &= \left(1 + e^x\right)^{-1} \nonumber \\ + \chi(r, r_s) &= 2\left(1 - e^{2r/r_s}\alpha(2r/r_s) \right) \nonumber \\ + \chi'(r, r_s) &= \frac{2}{r_s}\left(2\alpha(x)^2 - 2\alpha(x)\right) \nonumber \\ + \chi''(r, r_s) &= \frac{4}{r_s^2}\left(4\alpha(x)^3 - 6\alpha(x)^2 + 2\alpha(x)\right) \nonumber \\ + \chi^{(3)}(r, r_s) &= \frac{8}{r_s^3} \left(12\alpha(x)^4 - 24\alpha(x)^3 + 14\alpha(x)^2 -2 \alpha(x)\right) \nonumber \\ + \chi^{(4)}(r, r_s) &= \frac{16}{r_s^4} \left(48\alpha(x)^5 - 120\alpha(x)^4 + 100\alpha(x)^3 -30 \alpha(x)^2 + 2\alpha(x)\right) \nonumber \\ + \chi^{(5)}(r, r_s) &= \frac{32}{r_s^5} \left(240\alpha(x)^6 - 720\alpha(x)^5 + 780\alpha(x)^4 - 360\alpha(x)^3 + 62\alpha(x)^2 - 2\alpha(x) \right) \nonumber +\end{align} +We can now construct common quantities that appear in derivatives of +multiple orders: + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{1}(r, r_s, H) = \left\lbrace\begin{array}{rcl} \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right)\times H^{-1} & \mbox{if} & u < 1,\\ - r^{-1} & \mbox{if} & u \geq 1, + %r^{-1} & \mbox{if} & u \geq 1, + \chi(r, r_s) \times r^{-1} & \mbox{if} & u \geq 1, \end{array} \right.\nonumber \end{align} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{align} - \mathsf{\tilde{D}}_{3}(r, u, H) = + \mathsf{\tilde{D}}_{3}(r, r_s, H) = \left\lbrace\begin{array}{rcl} -\left(21u^5 - 90u^4 + 140u^3 -84u^2 +14\right)\times H^{-3}& \mbox{if} & u < 1,\\ - -1 \times r^{-3} & \mbox{if} & u \geq 1, + %-1 \times r^{-3} & \mbox{if} & u \geq 1, + \left(r\chi'(r, r_s) - \chi(r, r_s)\right) \times r^{-3} & \mbox{if} & u \geq 1, \end{array} \right.\nonumber \end{align} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{align} - \mathsf{\tilde{D}}_{5}(r, u, H) = + \mathsf{\tilde{D}}_{5}(r, r_s, H) = \left\lbrace\begin{array}{rcl} \left(-105u^3 + 360u^2 - 420u + 168\right)\times H^{-5}& \mbox{if} & u < 1,\\ - 3\times r^{-5} & \mbox{if} & u \geq 1, + %3\times r^{-5} & \mbox{if} & u \geq 1, + \left(r^2\chi''(r, r_s) - 3r\chi'(r, r_s) + 3\chi(r, r_s) \right)\times r^{-5} & \mbox{if} & u \geq 1, \end{array} \right.\nonumber \end{align} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{align} - \mathsf{\tilde{D}}_{7}(r, u, H) = + \mathsf{\tilde{D}}_{7}(r, r_s, H) = \left\lbrace\begin{array}{rcl} -\left(315u - 720 + 420u^{-1}\right)\times H^{-7} & \mbox{if} & u < 1,\\ - -15\times r^{-7} & \mbox{if} & u \geq 1, + %-15\times r^{-7} & \mbox{if} & u \geq 1, + \left(r^3\chi^{(3)}(r, r_s) - 6r^2\chi''(r, r_s)+15r\chi'(r, r_s)-15\chi(r, r_s)\right) \times r^{-7} & \mbox{if} & u \geq 1, \end{array} \right.\nonumber \end{align} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{align} - \mathsf{\tilde{D}}_{9}(r, u, H) = + \mathsf{\tilde{D}}_{9}(r, r_s, H) = \left\lbrace\begin{array}{rcl} \left(-315u^{-1} + 420u^{-3}\right)\times H^{-9}& \mbox{if} & u < 1,\\ - 105\times r^{-9} & \mbox{if} & u \geq 1. + %105\times r^{-9} & \mbox{if} & u \geq 1. + \left(r^4\chi^{(4)}(r, r_s) - 10r^3\chi^{(3)} + 45r^2\chi''(r, r_s) - 105r\chi'(r, r_s) + 105\chi(r, r_s) \right) \times r^{-9} & \mbox{if} & u \geq 1 \end{array} \right.\nonumber \end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{11}(r, r_s, H) = + \left\lbrace\begin{array}{rcl} + -\left(315u^{-3} - 1260u^{-5}\right)\times H^{-11}& \mbox{if} & u < 1,\\ + %-945\times r^{-11} & \mbox{if} & u \geq 1. + \left(r^5\chi^{(5)}(r, r_s) - 15r^4\chi^{(4)}(r, r_s) + 105r^3\chi^{(3)}(r, r_s) - 420r^2\chi''(r, r_s) + 945r \chi'(r, r_s) - 945\chi(r, r_s)\right) \times r^{-11} & \mbox{if} & u \geq 1. + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Starting from the potential (Eq. \ref{eq:fmm:potential}, reproduced here for completeness), we can now build all the relevent derivatives \begin{align} - \mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r},H) = - \mathsf{\tilde{D}}_{1}(r, u, H) \nonumber + \mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r}, r_s, H) = + \mathsf{\tilde{D}}_{1}(r, r_s, H) \nonumber \end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \noindent\rule{6cm}{0.4pt} \begin{align} - \mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r},H) = - r_x \mathsf{\tilde{D}}_{3}(r, u, H) \nonumber + \mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r}, r_s, H) = + r_x \mathsf{\tilde{D}}_{3}(r, r_s, H) \nonumber \end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \noindent\rule{6cm}{0.4pt} \begin{align} -\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r},H) = -r_x^2 \mathsf{\tilde{D}}_{5}(r, u, H) + -\mathsf{\tilde{D}}_{3}(r, u, H)\nonumber +\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r}, r_s, H) = +r_x^2 \mathsf{\tilde{D}}_{5}(r, r_s, H) + +\mathsf{\tilde{D}}_{3}(r, r_s, H)\nonumber \end{align} \begin{align} -\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r},H) = - r_x r_y \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber +\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r}, r_s, H) = + r_x r_y \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber \end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \noindent\rule{6cm}{0.4pt} \begin{align} -\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = - r_x^3 \mathsf{\tilde{D}}_{7}(r, u, H) - + 3 r_x \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber +\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r}, r_s, H) = + r_x^3 \mathsf{\tilde{D}}_{7}(r, r_s, H) + + 3 r_x \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber \end{align} \begin{align} -\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^2 r_y} \varphi (\mathbf{r},H) = -r_x^2 r_y \mathsf{\tilde{D}}_{7}(r, u, H) + -r_y \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber +\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^2 r_y} \varphi (\mathbf{r}, r_s, H) = +r_x^2 r_y \mathsf{\tilde{D}}_{7}(r, r_s, H) + +r_y \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber \end{align} \begin{align} -\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r},H) = - r_x r_y r_z \mathsf{\tilde{D}}_{7}(r, u, H) \nonumber +\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r}, r_s, H) = + r_x r_y r_z \mathsf{\tilde{D}}_{7}(r, r_s, H) \nonumber \end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \noindent\rule{6cm}{0.4pt} \begin{align} \mathsf{D}_{400}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^4} - \varphi (\mathbf{r},H) = - r_x^4 \mathsf{\tilde{D}}_{9}(r, u, H)+ - 6r_x^2 \mathsf{\tilde{D}}_{7}(r, u, H) + - 3 \mathsf{\tilde{D}}_{5}(r, u, H) + \varphi (\mathbf{r}, r_s, H) = + r_x^4 \mathsf{\tilde{D}}_{9}(r, r_s, H)+ + 6r_x^2 \mathsf{\tilde{D}}_{7}(r, r_s, H) + + 3 \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber \end{align} \begin{align} \mathsf{D}_{310}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^3 - \partial r_y} \varphi (\mathbf{r},H) = - r_x^3 r_y \mathsf{\tilde{D}}_{9}(r, u, H) + - 3 r_x r_y \mathsf{\tilde{D}}_{7}(r, u, H) + \partial r_y} \varphi (\mathbf{r}, r_s, H) = + r_x^3 r_y \mathsf{\tilde{D}}_{9}(r, r_s, H) + + 3 r_x r_y \mathsf{\tilde{D}}_{7}(r, r_s, H) \nonumber \end{align} \begin{align} \mathsf{D}_{220}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2 - \partial r_y^2} \varphi (\mathbf{r},H) = - r_x^2 r_y^2 \mathsf{\tilde{D}}_{9}(r, u, H) + - r_x^2 \mathsf{\tilde{D}}_{7}(r, u, H) + - r_y^2 \mathsf{\tilde{D}}_{7}(r, u, H) + - \mathsf{\tilde{D}}_{5}(r, u, H) + \partial r_y^2} \varphi (\mathbf{r}, r_s, H) = + r_x^2 r_y^2 \mathsf{\tilde{D}}_{9}(r, r_s, H) + + r_x^2 \mathsf{\tilde{D}}_{7}(r, r_s, H) + + r_y^2 \mathsf{\tilde{D}}_{7}(r, r_s, H) + + \mathsf{\tilde{D}}_{5}(r, r_s, H) \nonumber \end{align} \begin{align} \mathsf{D}_{211}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2 - \partial r_y \partial r_z} \varphi (\mathbf{r},H) = - r_x^2 r_y r_z \mathsf{\tilde{D}}_{9}(r, u, H) + - r_y r_z \mathsf{\tilde{D}}_{7}(r, u, H) + \partial r_y \partial r_z} \varphi (\mathbf{r}, r_s, H) = + r_x^2 r_y r_z \mathsf{\tilde{D}}_{9}(r, r_s, H) + + r_y r_z \mathsf{\tilde{D}}_{7}(r, r_s, H) \nonumber \end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\noindent\rule{6cm}{0.4pt} +\begin{align} + \mathsf{D}_{500}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^5} + \varphi (\mathbf{r}, r_s, H) = + r_x^5 \mathsf{\tilde{D}}_{11}(r, r_s, H) + + 10r_x^3\mathsf{\tilde{D}}_{9}(r, r_s, H) + + 15r_x\mathsf{\tilde{D}}_{7}(r, r_s, H) + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{410}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^4 + \partial r_y} \varphi (\mathbf{r}, r_s, H) = + r_x^4 r_y \mathsf{\tilde{D}}_{11}(r, r_s, H) + + 6 r_x^2 r_y \mathsf{\tilde{D}}_{9}(r, r_s, H) + + 3 r_y \mathsf{\tilde{D}}_{7}(r, r_s, H) + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{320}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^3 + \partial r_y^2} \varphi (\mathbf{r}, r_s, H) = + r_x^3 r_y^2 \mathsf{\tilde{D}}_{11}(r, r_s, H) + + r_x^3 \mathsf{\tilde{D}}_{9}(r, r_s, H) + + 3 r_x r_y^2 \mathsf{\tilde{D}}_{9}(r, r_s, H) + + 3 r_x \mathsf{\tilde{D}}_{7}(r, r_s, H) + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{311}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^3 + \partial r_y \partial r_z} \varphi (\mathbf{r}, r_s, H) = + r_x^3 r_y r_z \mathsf{\tilde{D}}_{11}(r, r_s, H) + + 3 r_x r_y r_z \mathsf{\tilde{D}}_{9}(r, r_s, H) + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{221}(\mathbf{r}) &= \frac{\partial^5}{\partial r_x^2 + \partial r_y^2 \partial r_z} \varphi (\mathbf{r}, r_s, H) = + r_x^2 r_y^2 r_z \mathsf{\tilde{D}}_{11}(r, r_s, H) + + r_x^2 r_z \mathsf{\tilde{D}}_{9}(r, r_s, H) + + r_y^2 r_z \mathsf{\tilde{D}}_{9}(r, r_s, H) + + r_z \mathsf{\tilde{D}}_{y}(r, r_s, H) + \nonumber +\end{align} + + + + + + + + + + + + + + + + + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{comment} \noindent\rule{6cm}{0.4pt}