Commit aa97dbed authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'subset_sorting' into 'master'

Subset sorting

Massive change in the way we treat sub-cell tasks and sorts in general, i.e.:

 * Sorts and drifts now unlock *all* sorts and drifts above them, this ensures they are executed bottom-up even if intermediate tasks are not active,
 * Sub-cell task hierarchies are traversed consistently during density/ghost/force computations,
 * Sub-cell tasks no longer do the drifting and sorting themselves,
 * Sorts are only re-computed once the oldest sorted index goes stale.
 * Ghosts exist at a lower level in the tree.

See merge request !343
parents 2b692465 e5d24a2e
......@@ -14,7 +14,7 @@ TimeIntegration:
dt_max: 1e-4 # The maximal time-step size of the simulation (in internal units).
Scheduler:
cell_split_size: 50
cell_split_size: 400
# Parameters governing the snapshots
Snapshots:
......
......@@ -43,11 +43,11 @@ Statistics:
# Parameters for the hydrodynamics scheme
SPH:
resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours.
CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration.
max_ghost_iterations: 30 # (Optional) Maximal number of iterations allowed to converge towards the smoothing length.
max_volume_change: 2. # (Optional) Maximal allowed change of kernel volume over one time-step
h_tolerance: 1e-4 # (Optional) Relative accuracy of the Netwon-Raphson scheme for the smoothing lengths.
h_max: 10. # (Optional) Maximal allowed smoothing length in internal units. Defaults to FLT_MAX if unspecified.
max_volume_change: 1.4 # (Optional) Maximal allowed change of kernel volume over one time-step.
max_ghost_iterations: 30 # (Optional) Maximal number of iterations allowed to converge towards the smoothing length.
# Parameters for the self-gravity scheme
Gravity:
......
This diff is collapsed.
......@@ -31,15 +31,16 @@
/* Local includes. */
#include "align.h"
#include "kernel_hydro.h"
#include "lock.h"
#include "multipole.h"
#include "part.h"
#include "space.h"
#include "task.h"
#include "timeline.h"
/* Avoid cyclic inclusions */
struct engine;
struct space;
struct scheduler;
/* Max tag size set to 2^29 to take into account some MPI implementations
......@@ -151,7 +152,9 @@ struct cell {
/*! The multipole initialistation task */
struct task *init_grav;
/*! The ghost task */
/*! The ghost tasks */
struct task *ghost_in;
struct task *ghost_out;
struct task *ghost;
/*! The extra ghost task for complex hydro schemes */
......@@ -269,9 +272,6 @@ struct cell {
/*! Nr of #spart in this cell. */
int scount;
/*! The size of the sort array */
int sortsize;
/*! Bit-mask indicating the sorted directions */
unsigned int sorted;
......@@ -326,6 +326,26 @@ struct cell {
/*! The maximal depth of this cell and its progenies */
char maxdepth;
/*! Values of dx_max and h_max before the drifts, used for sub-cell tasks. */
float dx_max_old;
float h_max_old;
float dx_max_sort_old;
/* Bit mask of sort directions that will be needed in the next timestep. */
unsigned int requires_sorts;
/*! Does this cell need to be drifted? */
char do_drift;
/*! Do any of this cell's sub-cells need to be drifted? */
char do_sub_drift;
/*! Bit mask of sorts that need to be computed for this cell. */
unsigned int do_sort;
/*! Do any of this cell's sub-cells need to be sorted? */
char do_sub_sort;
#ifdef SWIFT_DEBUG_CHECKS
/*! The list of tasks that have been executed on this cell */
char tasks_executed[64];
......@@ -373,10 +393,102 @@ void cell_reset_task_counters(struct cell *c);
int cell_is_drift_needed(struct cell *c, const struct engine *e);
int cell_unskip_tasks(struct cell *c, struct scheduler *s);
void cell_set_super(struct cell *c, struct cell *super);
void cell_drift_part(struct cell *c, const struct engine *e);
void cell_drift_part(struct cell *c, const struct engine *e, int force);
void cell_drift_gpart(struct cell *c, const struct engine *e);
void cell_drift_multipole(struct cell *c, const struct engine *e);
void cell_drift_all_multipoles(struct cell *c, const struct engine *e);
void cell_check_timesteps(struct cell *c);
void cell_store_pre_drift_values(struct cell *c);
void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj,
struct scheduler *s);
void cell_activate_drift_part(struct cell *c, struct scheduler *s);
void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s);
void cell_clear_drift_flags(struct cell *c, void *data);
/* Inlined functions (for speed). */
/**
* @brief Can a sub-pair hydro task recurse to a lower level based
* on the status of the particles in the cell.
*
* @param c The #cell.
*/
__attribute__((always_inline)) INLINE static int cell_can_recurse_in_pair_task(
const struct cell *c) {
/* Is the cell split ? */
/* If so, is the cut-off radius plus the max distance the parts have moved */
/* smaller than the sub-cell sizes ? */
/* Note: We use the _old values as these might have been updated by a drift */
return c->split &&
((kernel_gamma * c->h_max_old + c->dx_max_old) < 0.5f * c->dmin);
}
/**
* @brief Can a sub-self hydro task recurse to a lower level based
* on the status of the particles in the cell.
*
* @param c The #cell.
*/
__attribute__((always_inline)) INLINE static int cell_can_recurse_in_self_task(
const struct cell *c) {
/* Is the cell split ? */
/* Note: No need for more checks here as all the sub-pairs and sub-self */
/* operations will be executed. So no need for the particle to be at exactly
*/
/* the right place. */
return c->split;
}
/**
* @brief Can a pair task associated with a cell be split into smaller
* sub-tasks.
*
* @param c The #cell.
*/
__attribute__((always_inline)) INLINE static int cell_can_split_pair_task(
const struct cell *c) {
/* Is the cell split ? */
/* If so, is the cut-off radius with some leeway smaller than */
/* the sub-cell sizes ? */
/* Note that since tasks are create after a rebuild no need to take */
/* into account any part motion (i.e. dx_max == 0 here) */
return c->split && (space_stretch * kernel_gamma * c->h_max < 0.5f * c->dmin);
}
/**
* @brief Can a self task associated with a cell be split into smaller
* sub-tasks.
*
* @param c The #cell.
*/
__attribute__((always_inline)) INLINE static int cell_can_split_self_task(
const struct cell *c) {
/* Is the cell split ? */
/* Note: No need for more checks here as all the sub-pairs and sub-self */
/* tasks will be created. So no need to check for h_max */
return c->split && (space_stretch * kernel_gamma * c->h_max < 0.5f * c->dmin);
}
/**
* @brief Have particles in a pair of cells moved too much and require a rebuild
* ?
*
* @param ci The first #cell.
* @param cj The second #cell.
*/
__attribute__((always_inline)) INLINE static int cell_need_rebuild_for_pair(
const struct cell *ci, const struct cell *cj) {
/* Is the cut-off radius plus the max distance the parts in both cells have */
/* moved larger than the cell size ? */
/* Note ci->dmin == cj->dmin */
return (kernel_gamma * max(ci->h_max, cj->h_max) + ci->dx_max_part +
cj->dx_max_part >
cj->dmin);
}
#endif /* SWIFT_CELL_H */
......@@ -118,6 +118,34 @@ __attribute__((always_inline)) INLINE static float pow_dimension_plus_one(
#endif
}
/**
* @brief Returns the argument to the power given by the dimension minus one
*
* Computes \f$x^{d-1}\f$.
*/
__attribute__((always_inline)) INLINE static float pow_dimension_minus_one(
float x) {
#if defined(HYDRO_DIMENSION_3D)
return x * x;
#elif defined(HYDRO_DIMENSION_2D)
return x;
#elif defined(HYDRO_DIMENSION_1D)
return 1.f;
#else
error("The dimension is not defined !");
return 0.f;
#endif
}
/**
* @brief Inverts the given dimension by dimension matrix (in place)
*
......
......@@ -120,6 +120,24 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) {
res->next = atomic_swap(l, res);
}
/**
* @brief Recursively add non-implicit ghost tasks to a cell hierarchy.
*/
void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in,
struct task *ghost_out) {
if (!c->split || c->count < engine_max_parts_per_ghost) {
struct scheduler *s = &e->sched;
c->ghost =
scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, c, NULL);
scheduler_addunlock(s, ghost_in, c->ghost);
scheduler_addunlock(s, c->ghost, ghost_out);
} else {
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL)
engine_add_ghosts(e, c->progeny[k], ghost_in, ghost_out);
}
}
/**
* @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
* i.e. all the O(Npart) tasks.
......@@ -143,9 +161,17 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
/* Are we in a super-cell ? */
if (c->super == c) {
/* Add the sort task. */
c->sorts =
scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0, c, NULL);
/* Local tasks only... */
if (c->nodeID == e->nodeID) {
/* Add the drift task. */
c->drift_part = scheduler_addtask(s, task_type_drift_part,
task_subtype_none, 0, 0, c, NULL);
/* Add the two half kicks */
c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0,
c, NULL);
......@@ -180,10 +206,16 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
scheduler_addunlock(s, c->grav_down, c->kick2);
}
/* Generate the ghost task. */
if (is_hydro)
c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
0, c, NULL);
/* Generate the ghost tasks. */
if (is_hydro) {
c->ghost_in =
scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
/* implicit = */ 1, c, NULL);
c->ghost_out =
scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
/* implicit = */ 1, c, NULL);
engine_add_ghosts(e, c, c->ghost_in, c->ghost_out);
}
#ifdef EXTRA_HYDRO_LOOP
/* Generate the extra ghost task. */
......@@ -1042,18 +1074,15 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj,
scheduler_addunlock(s, t_rho, ci->super->kick2);
/* The send_rho task depends on the cell's ghost task. */
scheduler_addunlock(s, ci->super->ghost, t_rho);
scheduler_addunlock(s, ci->super->ghost_out, t_rho);
/* The send_xv task should unlock the super-cell's ghost task. */
scheduler_addunlock(s, t_xv, ci->super->ghost);
scheduler_addunlock(s, t_xv, ci->super->ghost_in);
#endif
/* Drift before you send */
if (ci->drift_part == NULL)
ci->drift_part = scheduler_addtask(s, task_type_drift_part,
task_subtype_none, 0, 0, ci, NULL);
scheduler_addunlock(s, ci->drift_part, t_xv);
scheduler_addunlock(s, ci->super->drift_part, t_xv);
/* The super-cell's timestep task should unlock the send_ti task. */
scheduler_addunlock(s, ci->super->timestep, t_ti);
......@@ -1862,27 +1891,11 @@ void engine_count_and_link_tasks(struct engine *e) {
struct cell *const ci = t->ci;
struct cell *const cj = t->cj;
/* Link sort tasks to the next-higher sort task. */
/* Link sort tasks to all the higher sort task. */
if (t->type == task_type_sort) {
struct cell *finger = t->ci->parent;
while (finger != NULL && finger->sorts == NULL) finger = finger->parent;
if (finger != NULL) scheduler_addunlock(sched, t, finger->sorts);
}
/* Link drift tasks to the next-higher drift task. */
else if (t->type == task_type_drift_part) {
struct cell *finger = ci->parent;
while (finger != NULL && finger->drift_part == NULL)
finger = finger->parent;
if (finger != NULL) scheduler_addunlock(sched, t, finger->drift_part);
}
/* Link drift tasks to the next-higher drift task. */
else if (t->type == task_type_drift_gpart) {
struct cell *finger = ci->parent;
while (finger != NULL && finger->drift_gpart == NULL)
finger = finger->parent;
if (finger != NULL) scheduler_addunlock(sched, t, finger->drift_gpart);
for (struct cell *finger = t->ci->parent; finger != NULL;
finger = finger->parent)
if (finger->sorts != NULL) scheduler_addunlock(sched, t, finger->sorts);
}
/* Link self tasks to cells. */
......@@ -2108,8 +2121,8 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched,
struct cell *c,
int with_cooling) {
/* density loop --> ghost --> force loop */
scheduler_addunlock(sched, density, c->super->ghost);
scheduler_addunlock(sched, c->super->ghost, force);
scheduler_addunlock(sched, density, c->super->ghost_in);
scheduler_addunlock(sched, c->super->ghost_out, force);
if (with_cooling) {
/* force loop --> cooling (--> kick2) */
......@@ -2145,14 +2158,15 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
/* Sort tasks depend on the drift of the cell. */
if (t->type == task_type_sort && t->ci->nodeID == engine_rank) {
scheduler_addunlock(sched, t->ci->drift_part, t);
scheduler_addunlock(sched, t->ci->super->drift_part, t);
}
/* Self-interaction? */
else if (t->type == task_type_self && t->subtype == task_subtype_density) {
/* Make all density tasks depend on the drift. */
scheduler_addunlock(sched, t->ci->drift_part, t);
/* Make all density tasks depend on the drift and the sorts. */
scheduler_addunlock(sched, t->ci->super->drift_part, t);
scheduler_addunlock(sched, t->ci->super->sorts, t);
#ifdef EXTRA_HYDRO_LOOP
/* Start by constructing the task for the second and third hydro loop */
......@@ -2186,11 +2200,15 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
/* Otherwise, pair interaction? */
else if (t->type == task_type_pair && t->subtype == task_subtype_density) {
/* Make all density tasks depend on the drift. */
/* Make all density tasks depend on the drift and the sorts. */
if (t->ci->nodeID == engine_rank)
scheduler_addunlock(sched, t->ci->drift_part, t);
if (t->cj->nodeID == engine_rank)
scheduler_addunlock(sched, t->cj->drift_part, t);
scheduler_addunlock(sched, t->ci->super->drift_part, t);
scheduler_addunlock(sched, t->ci->super->sorts, t);
if (t->ci->super != t->cj->super) {
if (t->cj->nodeID == engine_rank)
scheduler_addunlock(sched, t->cj->super->drift_part, t);
scheduler_addunlock(sched, t->cj->super->sorts, t);
}
#ifdef EXTRA_HYDRO_LOOP
/* Start by constructing the task for the second and third hydro loop */
......@@ -2243,6 +2261,10 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
else if (t->type == task_type_sub_self &&
t->subtype == task_subtype_density) {
/* Make all density tasks depend on the drift and sorts. */
scheduler_addunlock(sched, t->ci->super->drift_part, t);
scheduler_addunlock(sched, t->ci->super->sorts, t);
#ifdef EXTRA_HYDRO_LOOP
/* Start by constructing the task for the second and third hydro loop */
......@@ -2285,6 +2307,16 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
else if (t->type == task_type_sub_pair &&
t->subtype == task_subtype_density) {
/* Make all density tasks depend on the drift. */
if (t->ci->nodeID == engine_rank)
scheduler_addunlock(sched, t->ci->super->drift_part, t);
scheduler_addunlock(sched, t->ci->super->sorts, t);
if (t->ci->super != t->cj->super) {
if (t->cj->nodeID == engine_rank)
scheduler_addunlock(sched, t->cj->super->drift_part, t);
scheduler_addunlock(sched, t->cj->super->sorts, t);
}
#ifdef EXTRA_HYDRO_LOOP
/* Start by constructing the task for the second and third hydro loop */
......@@ -2542,6 +2574,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
/* Set this task's skip. */
if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
/* Store current values of dx_max and h_max. */
if (t->type == task_type_sub_self && t->subtype == task_subtype_density) {
cell_activate_subcell_tasks(t->ci, NULL, s);
}
}
/* Pair? */
......@@ -2561,38 +2598,27 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
if (t->subtype != task_subtype_density) continue;
/* Too much particle movement? */
if (max(ci->h_max, cj->h_max) + ci->dx_max_part + cj->dx_max_part >
cj->dmin)
*rebuild_space = 1;
if (cell_need_rebuild_for_pair(ci, cj)) *rebuild_space = 1;
/* Set the correct sorting flags */
if (t->type == task_type_pair) {
if (ci->dx_max_sort > space_maxreldx * ci->dmin) {
for (struct cell *finger = ci; finger != NULL;
finger = finger->parent)
finger->sorted = 0;
}
if (cj->dx_max_sort > space_maxreldx * cj->dmin) {
for (struct cell *finger = cj; finger != NULL;
finger = finger->parent)
finger->sorted = 0;
}
if (!(ci->sorted & (1 << t->flags))) {
#ifdef SWIFT_DEBUG_CHECKS
if (!(ci->sorts->flags & (1 << t->flags)))
error("bad flags in sort task.");
#endif
scheduler_activate(s, ci->sorts);
if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift_part);
}
if (!(cj->sorted & (1 << t->flags))) {
#ifdef SWIFT_DEBUG_CHECKS
if (!(cj->sorts->flags & (1 << t->flags)))
error("bad flags in sort task.");
#endif
scheduler_activate(s, cj->sorts);
if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part);
}
/* Store some values. */
atomic_or(&ci->requires_sorts, 1 << t->flags);
atomic_or(&cj->requires_sorts, 1 << t->flags);
ci->dx_max_sort_old = ci->dx_max_sort;
cj->dx_max_sort_old = cj->dx_max_sort;
/* Activate the drift tasks. */
if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s);
if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s);
/* Activate the sorts where needed. */
cell_activate_sorts(ci, t->flags, s);
cell_activate_sorts(cj, t->flags, s);
}
/* Store current values of dx_max and h_max. */
else if (t->type == task_type_sub_pair) {
cell_activate_subcell_tasks(t->ci, t->cj, s);
}
#ifdef WITH_MPI
......@@ -2617,15 +2643,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
if (l == NULL) error("Missing link to send_xv task.");
scheduler_activate(s, l->t);
/* Drift both cells, the foreign one at the level which it is sent. */
if (l->t->ci->drift_part)
scheduler_activate(s, l->t->ci->drift_part);
else
error("Drift task missing !");
if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part);
/* Drift the cell which will be sent at the level at which it is sent,
i.e. drift the cell specified in the send task (l->t) itself. */
cell_activate_drift_part(l->t->ci, s);
if (cell_is_active(cj, e)) {
for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID;
l = l->next)
;
......@@ -2667,12 +2689,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
if (l == NULL) error("Missing link to send_xv task.");
scheduler_activate(s, l->t);
/* Drift both cells, the foreign one at the level which it is sent. */
if (l->t->ci->drift_part)
scheduler_activate(s, l->t->ci->drift_part);
else
error("Drift task missing !");
if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part);
/* Drift the cell which will be sent at the level at which it is sent,
i.e. drift the cell specified in the send task (l->t) itself. */
cell_activate_drift_part(l->t->ci, s);
if (cell_is_active(ci, e)) {
for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID;
......@@ -2695,24 +2714,14 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
if (l == NULL) error("Missing link to send_ti task.");
scheduler_activate(s, l->t);
}
} else if (t->type == task_type_pair) {
scheduler_activate(s, ci->drift_part);
scheduler_activate(s, cj->drift_part);
}
#else
if (t->type == task_type_pair) {
scheduler_activate(s, ci->drift_part);
scheduler_activate(s, cj->drift_part);
}
#endif
}
/* Kick/Drift/init ? */
else if (t->type == task_type_kick1 || t->type == task_type_kick2 ||
t->type == task_type_drift_part ||
t->type == task_type_drift_gpart ||
t->type == task_type_init_grav) {
if (t->type == task_type_kick1 || t->type == task_type_kick2 ||
t->type == task_type_drift_part || t->type == task_type_drift_gpart ||
t->type == task_type_init_grav) {
if (cell_is_active(t->ci, e)) scheduler_activate(s, t);
}
......@@ -3166,6 +3175,9 @@ void engine_skip_force_and_kick(struct engine *e) {
t->type == task_type_cooling || t->type == task_type_sourceterms)
t->skip = 1;
}
/* Run through the cells and clear some flags. */
space_map_cells_pre(e->s, 1, cell_clear_drift_flags, NULL);
}
/**
......@@ -3182,10 +3194,12 @@ void engine_skip_drift(struct engine *e) {
struct task *t = &tasks[i];
/* Skip everything that moves the particles */
if (t->type == task_type_drift_part || t->type == task_type_drift_gpart)
t->skip = 1;
/* Skip everything that updates the particles */
if (t->type == task_type_drift_part) t->skip = 1;
}
/* Run through the cells and clear some flags. */
space_map_cells_pre(e->s, 1, cell_clear_drift_flags, NULL);
}
/**
......@@ -3561,7 +3575,7 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements,
struct cell *c = &cells[ind];
if (c != NULL && c->nodeID == e->nodeID) {
/* Drift all the particles */
cell_drift_part(c, e);
cell_drift_part(c, e, 1);
/* Drift all the g-particles */
cell_drift_gpart(c, e);
......@@ -4252,7 +4266,7 @@ void engine_init(struct engine *e, struct space *s,
"Version: %s \n# "
"Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic "
"scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f "
"+/- %.2f\n# Eta: %f\n",
"+/- %.4f\n# Eta: %f\n",
hostname(), git_branch(), git_revision(), compiler_name(),
compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION,
kernel_name, e->hydro_properties->target_neighbours,
......
......@@ -82,6 +82,7 @@ extern const char *engine_policy_names[];
#define engine_redistribute_alloc_margin 1.2
#define engine_default_energy_file_name "energy"
#define engine_default_timesteps_file_name "timesteps"
#define engine_max_parts_per_ghost 1000
/* The rank of the engine as a global variable (for messages). */
extern int engine_rank;
......
......@@ -206,12 +206,13 @@ __attribute__((always_inline)) INLINE static void hydro_end_density(
p->rho += p->mass * kernel_root;
p->density.rho_dh -= hydro_dimension * p->mass * kernel_root;
p->density.wcount += kernel_root;
p->density.wcount_dh -= hydro_dimension * kernel_root;
/* Finish the calculation by inserting the missing h-factors */
p->rho *= h_inv_dim;
p->density.rho_dh *= h_inv_dim_plus_one;
p->density.wcount *= kernel_norm;
p->density.wcount_dh *= h_inv * kernel_gamma * kernel_norm;
p->density.wcount *= h_inv_dim;
p->density.wcount_dh *= h_inv_dim_plus_one;