/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
* Matthieu Schaller (schaller@strw.leidenuniv.nl)
* 2015 Peter W. Draper (p.w.draper@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
******************************************************************************/
/* Config parameters. */
#include
/* This object's header. */
#include "runner.h"
/* Local headers. */
#include "active.h"
#include "cell.h"
#include "engine.h"
#include "timers.h"
/*! The size of the sorting stack used at the leaf level */
const int sort_stack_size = 10;
/**
* @brief Sorts again all the stars in a given cell hierarchy.
*
* This is intended to be used after the star formation task has been run
* to get the cells back into a state where self/pair star tasks can be run.
*
* @param r The thread #runner.
* @param c The top-level cell to run on.
* @param timer Are we timing this?
*/
void runner_do_stars_resort(struct runner *r, struct cell *c, const int timer) {
#ifdef SWIFT_DEBUG_CHECKS
if (c->nodeID != r->e->nodeID) error("Task must be run locally!");
#endif
TIMER_TIC;
/* Did we demand a recalculation of the stars'sorts? */
if (cell_get_flag(c, cell_flag_do_stars_resort)) {
runner_do_all_stars_sort(r, c);
cell_clear_flag(c, cell_flag_do_stars_resort);
}
if (timer) TIMER_TOC(timer_do_stars_resort);
}
/**
* @brief Sort the entries in ascending order using QuickSort.
*
* @param sort The entries
* @param N The number of entries.
*/
void runner_do_sort_ascending(struct sort_entry *sort, int N) {
struct {
short int lo, hi;
} qstack[sort_stack_size];
int qpos, i, j, lo, hi, imin;
struct sort_entry temp;
float pivot;
if (N >= (1LL << sort_stack_size)) {
error(
"The stack size for sorting is too small."
"Either increase it or reduce the number of parts per cell.");
}
/* Sort parts in cell_i in decreasing order with quicksort */
qstack[0].lo = 0;
qstack[0].hi = N - 1;
qpos = 0;
while (qpos >= 0) {
lo = qstack[qpos].lo;
hi = qstack[qpos].hi;
qpos -= 1;
/* Do we have a low number of element to sort? */
if (hi - lo < 15) {
/* Sort the last elements. */
for (i = lo; i < hi; i++) {
imin = i;
/* Find the minimal value. */
for (j = i + 1; j <= hi; j++) {
if (sort[j].d < sort[imin].d) {
imin = j;
}
}
/* Swap the elements if a smaller element exists. */
if (imin != i) {
temp = sort[imin];
sort[imin] = sort[i];
sort[i] = temp;
}
}
} else {
/* Select a pivot */
pivot = sort[(lo + hi) / 2].d;
i = lo;
j = hi;
/* Ensure that the elements before/after the pivot
are smaller/larger than the pivot. */
while (i <= j) {
/* Find the first elements that do not respect
the order. */
while (sort[i].d < pivot) i++;
while (sort[j].d > pivot) j--;
/* Did we get two different elements */
if (i <= j) {
if (i < j) {
/* Swap the elements */
temp = sort[i];
sort[i] = sort[j];
sort[j] = temp;
}
i += 1;
j -= 1;
}
}
/* Add the next operations to the stack.
* The order is important in order to decrease the stack size.
*/
if (j > (lo + hi) / 2) {
if (lo < j) {
qpos += 1;
qstack[qpos].lo = lo;
qstack[qpos].hi = j;
}
if (i < hi) {
qpos += 1;
qstack[qpos].lo = i;
qstack[qpos].hi = hi;
}
} else {
if (i < hi) {
qpos += 1;
qstack[qpos].lo = i;
qstack[qpos].hi = hi;
}
if (lo < j) {
qpos += 1;
qstack[qpos].lo = lo;
qstack[qpos].hi = j;
}
}
}
}
}
#ifdef SWIFT_DEBUG_CHECKS
/**
* @brief Recursively checks that the flags are consistent in a cell hierarchy.
*
* Debugging function. Exists in two flavours: hydro & stars.
*/
#define RUNNER_CHECK_SORTS(TYPE) \
void runner_check_sorts_##TYPE(struct cell *c, int flags) { \
\
if (flags & ~c->TYPE.sorted) error("Inconsistent sort flags (downward)!"); \
if (c->split) \
for (int k = 0; k < 8; k++) \
if (c->progeny[k] != NULL && c->progeny[k]->TYPE.count > 0) \
runner_check_sorts_##TYPE(c->progeny[k], c->TYPE.sorted); \
}
#else
#define RUNNER_CHECK_SORTS(TYPE) \
void runner_check_sorts_##TYPE(struct cell *c, int flags) { \
error("Calling debugging code without debugging flag activated."); \
}
#endif
RUNNER_CHECK_SORTS(hydro)
RUNNER_CHECK_SORTS(stars)
/**
* @brief Sort the particles in the given cell along all cardinal directions.
*
* @param r The #runner.
* @param c The #cell.
* @param flags Cell flag.
* @param cleanup If true, re-build the sorts for the selected flags instead
* of just adding them.
* @param rt_requests_sort whether this sort was requested for RT. If true,
* this cell is allowed to be undrifted.
* @param clock Flag indicating whether to record the timing or not, needed
* for recursive calls.
*/
void runner_do_hydro_sort(struct runner *r, struct cell *c, int flags,
const int cleanup, const int lock,
const int rt_requests_sort, const int clock) {
struct sort_entry *fingers[8];
const int count = c->hydro.count;
const struct part *parts = c->hydro.parts;
struct xpart *xparts = c->hydro.xparts;
float buff[8];
TIMER_TIC;
#ifdef SWIFT_DEBUG_CHECKS
if (c->hydro.super == NULL) error("Task called above the super level!!!");
#endif
/* Should we lock the cell once more? */
if (lock) lock_lock(&c->hydro.extra_sort_lock);
/* We need to do the local sorts plus whatever was requested further up. */
flags |= c->hydro.do_sort;
if (cleanup) {
c->hydro.sorted = 0;
} else {
flags &= ~c->hydro.sorted;
}
if (flags == 0 && !cell_get_flag(c, cell_flag_do_hydro_sub_sort) &&
!cell_get_flag(c, cell_flag_do_rt_sub_sort)) {
/* Unlock before returning */
if (lock && lock_unlock(&c->hydro.extra_sort_lock) != 0)
error("Impossible to unlock the cell!");
return;
}
/* Check that the particles have been moved to the current time */
if (flags && !cell_are_part_drifted(c, r->e)) {
/* If the sort was requested by RT, cell may be intentionally
* undrifted. */
if (!rt_requests_sort) error("Sorting un-drifted cell");
}
#ifdef SWIFT_DEBUG_CHECKS
/* Make sure the sort flags are consistent (downward). */
runner_check_sorts_hydro(c, c->hydro.sorted);
/* Make sure the sort flags are consistent (upward). */
for (struct cell *finger = c->parent; finger != NULL;
finger = finger->parent) {
if (finger->hydro.sorted & ~c->hydro.sorted)
error("Inconsistent sort flags (upward).");
}
/* Update the sort timer which represents the last time the sorts
were re-set. */
if (c->hydro.sorted == 0) c->hydro.ti_sort = r->e->ti_current;
#endif
/* Allocate memory for sorting. */
cell_malloc_hydro_sorts(c, flags);
/* Does this cell have any progeny? */
if (c->split) {
/* Fill in the gaps within the progeny. */
float dx_max_sort = 0.0f;
float dx_max_sort_old = 0.0f;
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
if (c->progeny[k]->hydro.count > 0) {
/* Only propagate cleanup if the progeny is stale. */
runner_do_hydro_sort(
r, c->progeny[k], flags,
cleanup && (c->progeny[k]->hydro.dx_max_sort_old >
space_maxreldx * c->progeny[k]->dmin),
lock, rt_requests_sort, /*clock=*/0);
dx_max_sort = max(dx_max_sort, c->progeny[k]->hydro.dx_max_sort);
dx_max_sort_old =
max(dx_max_sort_old, c->progeny[k]->hydro.dx_max_sort_old);
} else {
/* We need to clean up the unused flags that were in case the
number of particles in the cell would change */
cell_clear_hydro_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
}
}
}
c->hydro.dx_max_sort = dx_max_sort;
c->hydro.dx_max_sort_old = dx_max_sort_old;
/* Loop over the 13 different sort arrays. */
for (int j = 0; j < 13; j++) {
/* Has this sort array been flagged? */
if (!(flags & (1 << j))) continue;
/* Init the particle index offsets. */
int off[8];
off[0] = 0;
for (int k = 1; k < 8; k++)
if (c->progeny[k - 1] != NULL)
off[k] = off[k - 1] + c->progeny[k - 1]->hydro.count;
else
off[k] = off[k - 1];
/* Init the entries and indices. */
int inds[8];
for (int k = 0; k < 8; k++) {
inds[k] = k;
if (c->progeny[k] != NULL && c->progeny[k]->hydro.count > 0) {
fingers[k] = cell_get_hydro_sorts(c->progeny[k], j);
buff[k] = fingers[k]->d;
off[k] = off[k];
} else
buff[k] = FLT_MAX;
}
/* Sort the buffer. */
for (int i = 0; i < 7; i++)
for (int k = i + 1; k < 8; k++)
if (buff[inds[k]] < buff[inds[i]]) {
int temp_i = inds[i];
inds[i] = inds[k];
inds[k] = temp_i;
}
/* For each entry in the new sort list. */
struct sort_entry *finger = cell_get_hydro_sorts(c, j);
for (int ind = 0; ind < count; ind++) {
/* Copy the minimum into the new sort array. */
finger[ind].d = buff[inds[0]];
finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
/* Update the buffer. */
fingers[inds[0]] += 1;
buff[inds[0]] = fingers[inds[0]]->d;
/* Find the smallest entry. */
for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
int temp_i = inds[k - 1];
inds[k - 1] = inds[k];
inds[k] = temp_i;
}
} /* Merge. */
/* Add a sentinel. */
struct sort_entry *entries = cell_get_hydro_sorts(c, j);
entries[count].d = FLT_MAX;
entries[count].i = 0;
/* Mark as sorted. */
atomic_or(&c->hydro.sorted, 1 << j);
} /* loop over sort arrays. */
} /* progeny? */
/* Otherwise, just sort. */
else {
/* Reset the sort distance */
if (c->hydro.sorted == 0) {
#ifdef SWIFT_DEBUG_CHECKS
if (xparts != NULL && c->nodeID != engine_rank)
error("Have non-NULL xparts in foreign cell");
#endif
/* And the individual sort distances if we are a local cell */
if (xparts != NULL) {
for (int k = 0; k < count; k++) {
xparts[k].x_diff_sort[0] = 0.0f;
xparts[k].x_diff_sort[1] = 0.0f;
xparts[k].x_diff_sort[2] = 0.0f;
}
}
c->hydro.dx_max_sort_old = 0.f;
c->hydro.dx_max_sort = 0.f;
}
/* Fill the sort array. */
for (int k = 0; k < count; k++) {
const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]};
for (int j = 0; j < 13; j++)
if (flags & (1 << j)) {
struct sort_entry *entries = cell_get_hydro_sorts(c, j);
entries[k].i = k;
entries[k].d = px[0] * runner_shift[j][0] +
px[1] * runner_shift[j][1] +
px[2] * runner_shift[j][2];
}
}
/* Add the sentinel and sort. */
for (int j = 0; j < 13; j++)
if (flags & (1 << j)) {
struct sort_entry *entries = cell_get_hydro_sorts(c, j);
entries[count].d = FLT_MAX;
entries[count].i = 0;
runner_do_sort_ascending(entries, count);
atomic_or(&c->hydro.sorted, 1 << j);
}
}
#ifdef SWIFT_DEBUG_CHECKS
/* Verify the sorting. */
for (int j = 0; j < 13; j++) {
if (!(flags & (1 << j))) continue;
struct sort_entry *finger = cell_get_hydro_sorts(c, j);
for (int k = 1; k < count; k++) {
if (finger[k].d < finger[k - 1].d)
error("Sorting failed, ascending array.");
if (finger[k].i >= count) error("Sorting failed, indices borked.");
}
}
/* Make sure the sort flags are consistent (downward). */
runner_check_sorts_hydro(c, flags);
/* Make sure the sort flags are consistent (upward). */
for (struct cell *finger = c->parent; finger != NULL;
finger = finger->parent) {
if (finger->hydro.sorted & ~c->hydro.sorted)
error("Inconsistent sort flags.");
}
#endif
/* Clear the cell's sort flags. */
c->hydro.do_sort = 0;
cell_clear_flag(c, cell_flag_do_hydro_sub_sort);
cell_clear_flag(c, cell_flag_do_rt_sub_sort);
cell_clear_flag(c, cell_flag_rt_requests_sort);
c->hydro.requires_sorts = 0;
/* Make sure we unlock if necessary */
if (lock && lock_unlock(&c->hydro.extra_sort_lock) != 0)
error("Impossible to unlock the cell!");
if (clock) TIMER_TOC(timer_dosort);
}
/**
* @brief Sort the stars particles in the given cell along all cardinal
* directions.
*
* @param r The #runner.
* @param c The #cell.
* @param flags Cell flag.
* @param cleanup If true, re-build the sorts for the selected flags instead
* of just adding them.
* @param clock Flag indicating whether to record the timing or not, needed
* for recursive calls.
*/
void runner_do_stars_sort(struct runner *r, struct cell *c, int flags,
int cleanup, int clock) {
struct sort_entry *fingers[8];
const int count = c->stars.count;
struct spart *sparts = c->stars.parts;
float buff[8];
TIMER_TIC;
#ifdef SWIFT_DEBUG_CHECKS
if (c->hydro.super == NULL) error("Task called above the super level!!!");
#endif
/* We need to do the local sorts plus whatever was requested further up. */
flags |= c->stars.do_sort;
if (cleanup) {
c->stars.sorted = 0;
} else {
flags &= ~c->stars.sorted;
}
if (flags == 0 && !cell_get_flag(c, cell_flag_do_stars_sub_sort)) return;
/* Check that the particles have been moved to the current time */
if (flags && !cell_are_spart_drifted(c, r->e)) {
error("Sorting un-drifted cell c->nodeID=%d", c->nodeID);
}
#ifdef SWIFT_DEBUG_CHECKS
/* Make sure the sort flags are consistent (downward). */
runner_check_sorts_stars(c, c->stars.sorted);
/* Make sure the sort flags are consistent (upward). */
for (struct cell *finger = c->parent; finger != NULL;
finger = finger->parent) {
if (finger->stars.sorted & ~c->stars.sorted)
error("Inconsistent sort flags (upward).");
}
/* Update the sort timer which represents the last time the sorts
were re-set. */
if (c->stars.sorted == 0) c->stars.ti_sort = r->e->ti_current;
#endif
/* start by allocating the entry arrays in the requested dimensions. */
cell_malloc_stars_sorts(c, flags);
/* Does this cell have any progeny? */
if (c->split) {
/* Fill in the gaps within the progeny. */
float dx_max_sort = 0.0f;
float dx_max_sort_old = 0.0f;
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
if (c->progeny[k]->stars.count > 0) {
/* Only propagate cleanup if the progeny is stale. */
const int cleanup_prog =
cleanup && (c->progeny[k]->stars.dx_max_sort_old >
space_maxreldx * c->progeny[k]->dmin);
runner_do_stars_sort(r, c->progeny[k], flags, cleanup_prog, 0);
dx_max_sort = max(dx_max_sort, c->progeny[k]->stars.dx_max_sort);
dx_max_sort_old =
max(dx_max_sort_old, c->progeny[k]->stars.dx_max_sort_old);
} else {
/* We need to clean up the unused flags that were in case the
number of particles in the cell would change */
cell_clear_stars_sort_flags(c->progeny[k], /*clear_unused_flags=*/1);
}
}
}
c->stars.dx_max_sort = dx_max_sort;
c->stars.dx_max_sort_old = dx_max_sort_old;
/* Loop over the 13 different sort arrays. */
for (int j = 0; j < 13; j++) {
/* Has this sort array been flagged? */
if (!(flags & (1 << j))) continue;
/* Init the particle index offsets. */
int off[8];
off[0] = 0;
for (int k = 1; k < 8; k++)
if (c->progeny[k - 1] != NULL)
off[k] = off[k - 1] + c->progeny[k - 1]->stars.count;
else
off[k] = off[k - 1];
/* Init the entries and indices. */
int inds[8];
for (int k = 0; k < 8; k++) {
inds[k] = k;
if (c->progeny[k] != NULL && c->progeny[k]->stars.count > 0) {
fingers[k] = cell_get_stars_sorts(c->progeny[k], j);
buff[k] = fingers[k]->d;
off[k] = off[k];
} else
buff[k] = FLT_MAX;
}
/* Sort the buffer. */
for (int i = 0; i < 7; i++)
for (int k = i + 1; k < 8; k++)
if (buff[inds[k]] < buff[inds[i]]) {
int temp_i = inds[i];
inds[i] = inds[k];
inds[k] = temp_i;
}
/* For each entry in the new sort list. */
struct sort_entry *finger = cell_get_stars_sorts(c, j);
for (int ind = 0; ind < count; ind++) {
/* Copy the minimum into the new sort array. */
finger[ind].d = buff[inds[0]];
finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
/* Update the buffer. */
fingers[inds[0]] += 1;
buff[inds[0]] = fingers[inds[0]]->d;
/* Find the smallest entry. */
for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
int temp_i = inds[k - 1];
inds[k - 1] = inds[k];
inds[k] = temp_i;
}
} /* Merge. */
/* Add a sentinel. */
struct sort_entry *entries = cell_get_stars_sorts(c, j);
entries[count].d = FLT_MAX;
entries[count].i = 0;
/* Mark as sorted. */
atomic_or(&c->stars.sorted, 1 << j);
} /* loop over sort arrays. */
} /* progeny? */
/* Otherwise, just sort. */
else {
/* Reset the sort distance */
if (c->stars.sorted == 0) {
/* And the individual sort distances if we are a local cell */
for (int k = 0; k < count; k++) {
sparts[k].x_diff_sort[0] = 0.0f;
sparts[k].x_diff_sort[1] = 0.0f;
sparts[k].x_diff_sort[2] = 0.0f;
}
c->stars.dx_max_sort_old = 0.f;
c->stars.dx_max_sort = 0.f;
}
/* Fill the sort array. */
for (int k = 0; k < count; k++) {
const double px[3] = {sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]};
for (int j = 0; j < 13; j++)
if (flags & (1 << j)) {
struct sort_entry *entries = cell_get_stars_sorts(c, j);
entries[k].i = k;
entries[k].d = px[0] * runner_shift[j][0] +
px[1] * runner_shift[j][1] +
px[2] * runner_shift[j][2];
}
}
/* Add the sentinel and sort. */
for (int j = 0; j < 13; j++)
if (flags & (1 << j)) {
struct sort_entry *entries = cell_get_stars_sorts(c, j);
entries[count].d = FLT_MAX;
entries[count].i = 0;
runner_do_sort_ascending(entries, count);
atomic_or(&c->stars.sorted, 1 << j);
}
}
#ifdef SWIFT_DEBUG_CHECKS
/* Verify the sorting. */
for (int j = 0; j < 13; j++) {
if (!(flags & (1 << j))) continue;
struct sort_entry *finger = cell_get_stars_sorts(c, j);
for (int k = 1; k < count; k++) {
if (finger[k].d < finger[k - 1].d)
error("Sorting failed, ascending array.");
if (finger[k].i >= count) error("Sorting failed, indices borked.");
}
}
/* Make sure the sort flags are consistent (downward). */
runner_check_sorts_stars(c, flags);
/* Make sure the sort flags are consistent (upward). */
for (struct cell *finger = c->parent; finger != NULL;
finger = finger->parent) {
if (finger->stars.sorted & ~c->stars.sorted)
error("Inconsistent sort flags.");
}
#endif
/* Clear the cell's sort flags. */
c->stars.do_sort = 0;
cell_clear_flag(c, cell_flag_do_stars_sub_sort);
c->stars.requires_sorts = 0;
if (clock) TIMER_TOC(timer_do_stars_sort);
}
/**
* @brief Recurse into a cell until reaching the super level and call
* the hydro sorting function there.
*
* This function must be called at or above the super level!
*
* This function will sort the particles in all 13 directions.
*
* @param r the #runner.
* @param c the #cell.
*/
void runner_do_all_hydro_sort(struct runner *r, struct cell *c) {
#ifdef SWIFT_DEBUG_CHECKS
if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
#endif
if (!cell_is_active_hydro(c, r->e)) return;
/* Shall we sort at this level? */
if (c->hydro.super == c) {
/* Sort everything */
runner_do_hydro_sort(r, c, 0x1FFF, /*cleanup=*/0, /* lock=*/0,
/*rt_requests_sort=*/0, /*timer=*/0);
} else {
#ifdef SWIFT_DEBUG_CHECKS
if (c->hydro.super != NULL) error("Function called below the super level!");
#endif
/* Ok, then, let's try lower */
if (c->split) {
for (int k = 0; k < 8; ++k) {
if (c->progeny[k] != NULL) runner_do_all_hydro_sort(r, c->progeny[k]);
}
} else {
#ifdef SWIFT_DEBUG_CHECKS
error("Reached a leaf without encountering a hydro super cell!");
#endif
}
}
}
/**
* @brief Recurse into a cell until reaching the super level and call
* the star sorting function there.
*
* This function must be called at or above the super level!
*
* This function will sort the particles in all 13 directions.
*
* @param r the #runner.
* @param c the #cell.
*/
void runner_do_all_stars_sort(struct runner *r, struct cell *c) {
#ifdef SWIFT_DEBUG_CHECKS
if (c->nodeID != engine_rank) error("Function called on a foreign cell!");
#endif
if (!cell_is_active_stars(c, r->e) && !cell_is_active_hydro(c, r->e)) return;
/* Shall we sort at this level? */
if (c->hydro.super == c) {
/* Sort everything */
runner_do_stars_sort(r, c, 0x1FFF, /*cleanup=*/0, /*timer=*/0);
} else {
#ifdef SWIFT_DEBUG_CHECKS
if (c->hydro.super != NULL) error("Function called below the super level!");
#endif
/* Ok, then, let's try lower */
if (c->split) {
for (int k = 0; k < 8; ++k) {
if (c->progeny[k] != NULL) runner_do_all_stars_sort(r, c->progeny[k]);
}
} else {
#ifdef SWIFT_DEBUG_CHECKS
error("Reached a leaf without encountering a hydro super cell!");
#endif
}
}
}